# Coarse 클래스 정의

이 노트북은 AWS 아이콘의 Coarse 클래스(20개 카테고리)를 정의합니다.

## 개요

- 20개의 Coarse 클래스 정의
- Fine 서비스 목록 정의
- 클래스 구조 검증

**원본 노트북**: `coarse-20-class.ipynb`


In [11]:
import os, re
import pandas as pd

In [12]:
!which python ; which python3 ; which pip ; which pip3

/home/wsm/miniconda3/envs/archlens/bin/python
/home/wsm/miniconda3/envs/archlens/bin/python3
/home/wsm/miniconda3/envs/archlens/bin/pip
/home/wsm/miniconda3/envs/archlens/bin/pip3


In [13]:
COARSE_CLASSES = [
    "Compute",
    "Networking",
    "Storage",
    "Database",
    "Containers & Orchestration",
    "Serverless & Event-driven",
    "Application Integration",
    "Analytics",
    "AI & Machine Learning",
    "Security & Identity",
    "Monitoring & Logging",
    "Management & Governance",
    "DevOps & Developer Tools",
    "Migration & Transfer",
    "Business Applications",
    "IoT",
    "Media Services",
    "Blockchain",
    "Quantum",
    "Robotics / AR-VR",
]

In [14]:
FINE_SERVICES = [
    # =========================
    # Compute
    # =========================
    {
        "canonical_service_name": "Amazon EC2",
        "service_code": "ec2",
        "coarse_class": "Compute",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Batch",
        "service_code": "batch",
        "coarse_class": "Compute",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Elastic Beanstalk",
        "service_code": "elasticbeanstalk",
        "coarse_class": "Compute",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Lightsail",
        "service_code": "lightsail",
        "coarse_class": "Compute",
        "include_fine": True,
    },

    # =========================
    # Networking
    # =========================
    {
        "canonical_service_name": "Amazon VPC",
        "service_code": "ec2",  # VPC 관련 API는 ec2 하위
        "coarse_class": "Networking",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Elastic Load Balancing",
        "service_code": "elasticloadbalancing",
        "coarse_class": "Networking",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Route 53",
        "service_code": "route53",
        "coarse_class": "Networking",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Global Accelerator",
        "service_code": "globalaccelerator",
        "coarse_class": "Networking",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Direct Connect",
        "service_code": "directconnect",
        "coarse_class": "Networking",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Transit Gateway",
        "service_code": "ec2",  # TGW도 ec2 내 리소스
        "coarse_class": "Networking",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Cloud Map",
        "service_code": "servicediscovery",
        "coarse_class": "Networking",
        "include_fine": True,
    },

    # =========================
    # Storage
    # =========================
    {
        "canonical_service_name": "Amazon S3",
        "service_code": "s3",
        "coarse_class": "Storage",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon EFS",
        "service_code": "efs",
        "coarse_class": "Storage",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon FSx",
        "service_code": "fsx",
        "coarse_class": "Storage",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Backup",
        "service_code": "backup",
        "coarse_class": "Storage",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Storage Gateway",
        "service_code": "storagegateway",
        "coarse_class": "Storage",
        "include_fine": True,
    },

    # =========================
    # Database
    # =========================
    {
        "canonical_service_name": "Amazon RDS",
        "service_code": "rds",
        "coarse_class": "Database",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Aurora",
        "service_code": "rds",
        "coarse_class": "Database",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon DynamoDB",
        "service_code": "dynamodb",
        "coarse_class": "Database",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Redshift",
        "service_code": "redshift",
        "coarse_class": "Database",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon ElastiCache",
        "service_code": "elasticache",
        "coarse_class": "Database",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon DocumentDB",
        "service_code": "docdb",
        "coarse_class": "Database",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Neptune",
        "service_code": "neptune",
        "coarse_class": "Database",
        "include_fine": True,
    },

    # =========================
    # Containers & Orchestration
    # =========================
    {
        "canonical_service_name": "Amazon ECS",
        "service_code": "ecs",
        "coarse_class": "Containers & Orchestration",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon EKS",
        "service_code": "eks",
        "coarse_class": "Containers & Orchestration",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Fargate",
        "service_code": "fargate",
        "coarse_class": "Containers & Orchestration",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS App Runner",
        "service_code": "apprunner",
        "coarse_class": "Containers & Orchestration",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon ECR",
        "service_code": "ecr",
        "coarse_class": "Containers & Orchestration",
        "include_fine": True,
    },

    # =========================
    # Serverless & Event-driven
    # =========================
    {
        "canonical_service_name": "AWS Lambda",
        "service_code": "lambda",
        "coarse_class": "Serverless & Event-driven",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon EventBridge",
        "service_code": "events",
        "coarse_class": "Serverless & Event-driven",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Step Functions",
        "service_code": "states",
        "coarse_class": "Serverless & Event-driven",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon SNS",
        "service_code": "sns",
        "coarse_class": "Serverless & Event-driven",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon SQS",
        "service_code": "sqs",
        "coarse_class": "Serverless & Event-driven",
        "include_fine": True,
    },

    # =========================
    # Application Integration
    # =========================
    {
        "canonical_service_name": "Amazon API Gateway",
        "service_code": "apigateway",
        "coarse_class": "Application Integration",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS AppSync",
        "service_code": "appsync",
        "coarse_class": "Application Integration",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon MQ",
        "service_code": "mq",
        "coarse_class": "Application Integration",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon MSK",
        "service_code": "kafka",  # boto3: kafka / kafka-cluster 등
        "coarse_class": "Application Integration",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS App Mesh",
        "service_code": "appmesh",
        "coarse_class": "Application Integration",
        "include_fine": True,
    },

    # =========================
    # Analytics
    # =========================
    {
        "canonical_service_name": "Amazon Kinesis Data Streams",
        "service_code": "kinesis",
        "coarse_class": "Analytics",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Kinesis Data Firehose",
        "service_code": "firehose",
        "coarse_class": "Analytics",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Kinesis Data Analytics",
        "service_code": "kinesisanalytics",
        "coarse_class": "Analytics",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Glue",
        "service_code": "glue",
        "coarse_class": "Analytics",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Athena",
        "service_code": "athena",
        "coarse_class": "Analytics",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon EMR",
        "service_code": "emr",
        "coarse_class": "Analytics",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon OpenSearch Service",
        "service_code": "opensearch",  # 또는 es
        "coarse_class": "Analytics",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon QuickSight",
        "service_code": "quicksight",
        "coarse_class": "Analytics",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Lake Formation",
        "service_code": "lakeformation",
        "coarse_class": "Analytics",
        "include_fine": True,
    },

    # =========================
    # AI & Machine Learning
    # =========================
    {
        "canonical_service_name": "Amazon SageMaker",
        "service_code": "sagemaker",
        "coarse_class": "AI & Machine Learning",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Comprehend",
        "service_code": "comprehend",
        "coarse_class": "AI & Machine Learning",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Rekognition",
        "service_code": "rekognition",
        "coarse_class": "AI & Machine Learning",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Textract",
        "service_code": "textract",
        "coarse_class": "AI & Machine Learning",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Transcribe",
        "service_code": "transcribe",
        "coarse_class": "AI & Machine Learning",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Polly",
        "service_code": "polly",
        "coarse_class": "AI & Machine Learning",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Translate",
        "service_code": "translate",
        "coarse_class": "AI & Machine Learning",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Bedrock",
        "service_code": "bedrock",  # placeholder
        "coarse_class": "AI & Machine Learning",
        "include_fine": True,
    },

    # =========================
    # Security & Identity
    # =========================
    {
        "canonical_service_name": "AWS Identity and Access Management (IAM)",
        "service_code": "iam",
        "coarse_class": "Security & Identity",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Key Management Service (KMS)",
        "service_code": "kms",
        "coarse_class": "Security & Identity",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Secrets Manager",
        "service_code": "secretsmanager",
        "coarse_class": "Security & Identity",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Shield",
        "service_code": "shield",
        "coarse_class": "Security & Identity",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS WAF",
        "service_code": "waf",
        "coarse_class": "Security & Identity",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon GuardDuty",
        "service_code": "guardduty",
        "coarse_class": "Security & Identity",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Security Hub",
        "service_code": "securityhub",
        "coarse_class": "Security & Identity",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Macie",
        "service_code": "macie",
        "coarse_class": "Security & Identity",
        "include_fine": True,
    },

    # =========================
    # Monitoring & Logging
    # =========================
    {
        "canonical_service_name": "Amazon CloudWatch",
        "service_code": "cloudwatch",
        "coarse_class": "Monitoring & Logging",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS CloudTrail",
        "service_code": "cloudtrail",
        "coarse_class": "Monitoring & Logging",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS X-Ray",
        "service_code": "xray",
        "coarse_class": "Monitoring & Logging",
        "include_fine": True,
    },

    # =========================
    # Management & Governance
    # =========================
    {
        "canonical_service_name": "AWS Config",
        "service_code": "config",
        "coarse_class": "Management & Governance",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Systems Manager",
        "service_code": "ssm",
        "coarse_class": "Management & Governance",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Control Tower",
        "service_code": "controltower",
        "coarse_class": "Management & Governance",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Organizations",
        "service_code": "organizations",
        "coarse_class": "Management & Governance",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS License Manager",
        "service_code": "license-manager",
        "coarse_class": "Management & Governance",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Trusted Advisor",
        "service_code": "trustedadvisor",
        "coarse_class": "Management & Governance",
        "include_fine": True,
    },

    # =========================
    # DevOps & Developer Tools
    # =========================
    {
        "canonical_service_name": "AWS CodeCommit",
        "service_code": "codecommit",
        "coarse_class": "DevOps & Developer Tools",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS CodeBuild",
        "service_code": "codebuild",
        "coarse_class": "DevOps & Developer Tools",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS CodeDeploy",
        "service_code": "codedeploy",
        "coarse_class": "DevOps & Developer Tools",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS CodePipeline",
        "service_code": "codepipeline",
        "coarse_class": "DevOps & Developer Tools",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS CodeArtifact",
        "service_code": "codeartifact",
        "coarse_class": "DevOps & Developer Tools",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS CloudFormation",
        "service_code": "cloudformation",
        "coarse_class": "DevOps & Developer Tools",
        "include_fine": True,
    },

    # =========================
    # Migration & Transfer
    # =========================
    {
        "canonical_service_name": "AWS Database Migration Service",
        "service_code": "dms",
        "coarse_class": "Migration & Transfer",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS DataSync",
        "service_code": "datasync",
        "coarse_class": "Migration & Transfer",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Snowball",
        "service_code": "snowball",
        "coarse_class": "Migration & Transfer",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Transfer Family",
        "service_code": "transfer",
        "coarse_class": "Migration & Transfer",
        "include_fine": True,
    },

    # =========================
    # Business Applications
    # =========================
    {
        "canonical_service_name": "Amazon WorkSpaces",
        "service_code": "workspaces",
        "coarse_class": "Business Applications",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon WorkDocs",
        "service_code": "workdocs",
        "coarse_class": "Business Applications",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon WorkMail",
        "service_code": "workmail",
        "coarse_class": "Business Applications",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Connect",
        "service_code": "connect",
        "coarse_class": "Business Applications",
        "include_fine": True,
    },
    {
        "canonical_service_name": "Amazon Chime",
        "service_code": "chime",
        "coarse_class": "Business Applications",
        "include_fine": True,
    },

    # =========================
    # IoT
    # =========================
    {
        "canonical_service_name": "AWS IoT Core",
        "service_code": "iot",
        "coarse_class": "IoT",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS IoT Greengrass",
        "service_code": "greengrass",
        "coarse_class": "IoT",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS IoT Analytics",
        "service_code": "iotanalytics",
        "coarse_class": "IoT",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS IoT SiteWise",
        "service_code": "sitewise",
        "coarse_class": "IoT",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS IoT Events",
        "service_code": "iotevents",
        "coarse_class": "IoT",
        "include_fine": True,
    },

    # =========================
    # Media Services
    # =========================
    {
        "canonical_service_name": "AWS Elemental MediaConvert",
        "service_code": "mediaconvert",
        "coarse_class": "Media Services",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Elemental MediaLive",
        "service_code": "medialive",
        "coarse_class": "Media Services",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Elemental MediaPackage",
        "service_code": "mediapackage",
        "coarse_class": "Media Services",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Elemental MediaStore",
        "service_code": "mediastore",
        "coarse_class": "Media Services",
        "include_fine": True,
    },
    {
        "canonical_service_name": "AWS Elemental MediaTailor",
        "service_code": "mediatailor",
        "coarse_class": "Media Services",
        "include_fine": True,
    },

    # =========================
    # Blockchain
    # =========================
    {
        "canonical_service_name": "Amazon Managed Blockchain",
        "service_code": "managedblockchain",
        "coarse_class": "Blockchain",
        "include_fine": True,
    },

    # =========================
    # Quantum
    # =========================
    {
        "canonical_service_name": "Amazon Braket",
        "service_code": "braket",
        "coarse_class": "Quantum",
        "include_fine": True,
    },

    # =========================
    # Robotics / AR-VR
    # =========================
    {
        "canonical_service_name": "AWS RoboMaker",
        "service_code": "robomaker",
        "coarse_class": "Robotics / AR-VR",
        "include_fine": True,
    },
]


In [15]:
df_coarse = pd.DataFrame({"coarse_class": COARSE_CLASSES})
df_fine = pd.DataFrame(FINE_SERVICES)

# sanity check
assert df_fine["coarse_class"].isin(COARSE_CLASSES).all()
assert df_fine["canonical_service_name"].is_unique

# service -> coarse 매핑 dict
SERVICE_TO_COARSE = {
    row["canonical_service_name"]: row["coarse_class"]
    for _, row in df_fine.iterrows()
}

# 나중에 unified_taxonomy와 조인할 때 사용


In [16]:
# =====================================
# 1) coarse + fine 스키마 통합 테이블 생성
# =====================================
# df_fine: fine 서비스 100개
# df_coarse: coarse 클래스 20개

df_taxonomy = df_fine.copy()

# include_fine가 True인 것만 Stage2에 포함
df_taxonomy_stage2 = df_taxonomy[df_taxonomy["include_fine"] == True].reset_index(drop=True)

print("Stage2 fine label 개수:", len(df_taxonomy_stage2))

# coarse-class 단위 DataFrame (라벨링 툴에 import할 수 있게)
df_taxonomy_stage1 = pd.DataFrame({"coarse_class": COARSE_CLASSES})

print("Stage1 coarse-class 개수:", len(df_taxonomy_stage1))

Stage2 fine label 개수: 100
Stage1 coarse-class 개수: 20


In [17]:
# =====================================
# 2) 서비스명 정규화 함수 (유니버설)
# =====================================
def normalize_name(text: str) -> str:
    if not isinstance(text, str):
        return ""
    t = text.lower().strip()
    
    # AWS/Amazon 접두사 제거
    t = re.sub(r"^(aws|amazon)\s+", "", t)
    
    # 특수문자 표준화
    t = t.replace("&", "and")
    t = t.replace("/", " ")
    t = t.replace("_", " ")
    t = t.replace("-", " ")
    
    # 불필요 토큰 제거
    DROP_WORDS = {"service", "services"}
    tokens = [w for w in re.split(r"\s+", t) if w and w not in DROP_WORDS]
    
    return " ".join(tokens).strip()

In [18]:
# =====================================
# 3) canonical_name → coarse/fine lookup dict 생성
# =====================================
SERVICE_TO_COARSE = {
    row["canonical_service_name"].lower(): row["coarse_class"]
    for _, row in df_taxonomy.iterrows()
}

SERVICE_TO_CODE = {
    row["canonical_service_name"].lower(): row["service_code"]
    for _, row in df_taxonomy.iterrows()
}

# Stage2 fine label name 목록
FINE_LABELS = sorted([row["canonical_service_name"] for _, row in df_taxonomy_stage2.iterrows()])

In [19]:
# =====================================
# 4) 서비스 자동 정규화 + coarse 매핑 함수
# =====================================
def resolve_service_to_canonical(name: str):
    """
    unified_taxonomy.csv 같은 raw 데이터의 서비스명을
    canonical_service_name으로 매핑하기 위한 1차 룰 기반 함수.
    fuzzy 매칭 적용하기 전 간단 룰 기반만.
    """
    norm = normalize_name(name)
    
    # 1) exact match
    for canonical in SERVICE_TO_COARSE.keys():
        if normalize_name(canonical) == norm:
            return canonical
    
    # 2) 부분 매칭 이름 규칙
    for canonical in SERVICE_TO_COARSE.keys():
        if normalize_name(canonical) in norm:
            return canonical
    
    return None  # 이후 fuzzy 매칭 또는 수동 검토 대상


def get_coarse_from_canonical(canonical: str):
    if canonical is None:
        return None
    return SERVICE_TO_COARSE.get(canonical.lower())


def get_service_code_from_canonical(canonical: str):
    if canonical is None:
        return None
    return SERVICE_TO_CODE.get(canonical.lower())


print("스키마 통합/정규화 유틸 준비 완료.")

스키마 통합/정규화 유틸 준비 완료.


In [20]:
raw_name = "Amazon EC2_Instance_48.svg"
canonical = resolve_service_to_canonical(raw_name)
print("Canonical:", canonical)
print("Coarse:", get_coarse_from_canonical(canonical))
print("Service Code:", get_service_code_from_canonical(canonical))

Canonical: amazon ec2
Coarse: Compute
Service Code: ec2


In [None]:
df_unified = pd.read_csv("data/outputs/unified/aws_unified_taxonomy.csv")

df_unified["canonical_mapped"] = df_unified["original_service_name"].apply(resolve_service_to_canonical)
df_unified["coarse_mapped"] = df_unified["canonical_mapped"].apply(get_coarse_from_canonical)
df_unified["service_code_mapped"] = df_unified["canonical_mapped"].apply(get_service_code_from_canonical)

df_unified.head()

Unnamed: 0,canonical_service_name,original_service_name,service_code,group,category,type,source,file_path,zip_path,confidence,normalization_method,regions,main_resource_example,secondary_examples,canonical_mapped,coarse_mapped,service_code_mapped
0,IoT_LoRaWAN Protocol_48.svg,AWS IoT_LoRaWAN Protocol_48.svg,iot_lorawanprotocol_48.svg,IoT,,icon,icon_mapping,Resource-Icons_02072025/Res_IoT/Res_AWS-IoT_Lo...,Resource-Icons_02072025/Res_IoT/Res_AWS-IoT_Lo...,1.0,icon_mapping,,,,,,
1,s_IoT_LoRaWAN Protocol_48.svg,s_AWS IoT_LoRaWAN Protocol_48.svg,s_iot_lorawanprotocol_48.svg,IoT,,icon,icon_mapping,__MACOSX/Resource-Icons_02072025/Res_IoT/._Res...,__MACOSX/Resource-Icons_02072025/Res_IoT/._Res...,1.0,icon_mapping,,,,,,
2,IoT Greengrass_Component_48.svg,AWS IoT Greengrass_Component_48.svg,iotgreengrass_component_48.svg,IoT,,icon,icon_mapping,Resource-Icons_02072025/Res_IoT/Res_AWS-IoT-Gr...,Resource-Icons_02072025/Res_IoT/Res_AWS-IoT-Gr...,1.0,icon_mapping,,,,aws iot greengrass,IoT,greengrass
3,s_IoT Greengrass_Component_48.svg,s_AWS IoT Greengrass_Component_48.svg,s_iotgreengrass_component_48.svg,IoT,,icon,icon_mapping,__MACOSX/Resource-Icons_02072025/Res_IoT/._Res...,__MACOSX/Resource-Icons_02072025/Res_IoT/._Res...,1.0,icon_mapping,,,,aws iot greengrass,IoT,greengrass
4,IoT_Thing_Coffee Pot_48.svg,AWS IoT_Thing_Coffee Pot_48.svg,iot_thing_coffeepot_48.svg,IoT,,icon,icon_mapping,Resource-Icons_02072025/Res_IoT/Res_AWS-IoT_Th...,Resource-Icons_02072025/Res_IoT/Res_AWS-IoT_Th...,1.0,icon_mapping,,,,,,


In [None]:
# Stage1/Stage2 라벨링용 리스트 바로 추출
STAGE1_CLASSES = COARSE_CLASSES
STAGE2_CLASSES = FINE_LABELS

print(STAGE1_CLASSES)
print(STAGE2_CLASSES[:10])  # 일부 미리보기


['Compute', 'Networking', 'Storage', 'Database', 'Containers & Orchestration', 'Serverless & Event-driven', 'Application Integration', 'Analytics', 'AI & Machine Learning', 'Security & Identity', 'Monitoring & Logging', 'Management & Governance', 'DevOps & Developer Tools', 'Migration & Transfer', 'Business Applications', 'IoT', 'Media Services', 'Blockchain', 'Quantum', 'Robotics / AR-VR']
['AWS App Mesh', 'AWS App Runner', 'AWS AppSync', 'AWS Backup', 'AWS Batch', 'AWS Cloud Map', 'AWS CloudFormation', 'AWS CloudTrail', 'AWS CodeArtifact', 'AWS CodeBuild']


In [25]:
with open("stage1_classes.txt", "w") as f:
    f.write("\n".join(STAGE1_CLASSES))

with open("stage2_classes.txt", "w") as f:
    f.write("\n".join(STAGE2_CLASSES))

In [28]:
# https://aws.amazon.com/ko/architecture/icons/
import zipfile

# data 디렉터리 내에 "Asset-Package"가 이름에 포함된 모든 zip 파일 찾기
asset_zip_files = [
    fname for fname in os.listdir("data")
    if "Asset-Package" in fname and fname.lower().endswith(".zip")
]

print("Asset-Package zip files in data directory:")
for zip_file in asset_zip_files:
    print(zip_file)

for zip_file in asset_zip_files:
    with zipfile.ZipFile(os.path.join("data", zip_file), "r") as zip_ref:
        extract_dir = os.path.join("data", zip_file.replace(".zip",""))
        zip_ref.extractall(extract_dir)
        print(f"Extracted {zip_file} to {extract_dir}")

Asset-Package zip files in data directory:
Asset-Package_02072025.dee42cd0a6eaacc3da1ad9519579357fb546f803.zip
Extracted Asset-Package_02072025.dee42cd0a6eaacc3da1ad9519579357fb546f803.zip to data/Asset-Package_02072025.dee42cd0a6eaacc3da1ad9519579357fb546f803


In [31]:
!ls -l data/Asset-Package*

-rw-rw-r-- 1 wsm wsm 13456941 11월 28 17:26 data/Asset-Package_02072025.dee42cd0a6eaacc3da1ad9519579357fb546f803.zip

data/Asset-Package_02072025.dee42cd0a6eaacc3da1ad9519579357fb546f803:
합계 20
drwxrwxr-x  2 wsm wsm 4096 11월 28 17:27 Architecture-Group-Icons_02072025
drwxrwxr-x 27 wsm wsm 4096 11월 28 17:27 Architecture-Service-Icons_02072025
drwxrwxr-x  6 wsm wsm 4096 11월 28 17:27 Category-Icons_02072025
drwxrwxr-x 23 wsm wsm 4096 11월 28 17:27 Resource-Icons_02072025
drwxrwxr-x  6 wsm wsm 4096 11월 28 17:27 __MACOSX


In [35]:
!tree -L 3 data/Asset-Package* | tee tree.txt

[01;34mdata/Asset-Package_02072025.dee42cd0a6eaacc3da1ad9519579357fb546f803[0m
├── [01;34mArchitecture-Group-Icons_02072025[0m
│   ├── [01;35mAWS-Account_32.png[0m
│   ├── [01;35mAWS-Account_32.svg[0m
│   ├── [01;35mAWS-Cloud-logo_32.png[0m
│   ├── [01;35mAWS-Cloud-logo_32.svg[0m
│   ├── [01;35mAWS-Cloud-logo_32_Dark.png[0m
│   ├── [01;35mAWS-Cloud-logo_32_Dark.svg[0m
│   ├── [01;35mAWS-Cloud_32.png[0m
│   ├── [01;35mAWS-Cloud_32.svg[0m
│   ├── [01;35mAWS-Cloud_32_Dark.png[0m
│   ├── [01;35mAWS-Cloud_32_Dark.svg[0m
│   ├── [01;35mAWS-IoT-Greengrass-Deployment_32.png[0m
│   ├── [01;35mAWS-IoT-Greengrass-Deployment_32.svg[0m
│   ├── [01;35mAuto-Scaling-group_32.png[0m
│   ├── [01;35mAuto-Scaling-group_32.svg[0m
│   ├── [01;35mCorporate-data-center_32.png[0m
│   ├── [01;35mCorporate-data-center_32.svg[0m
│   ├── [01;35mEC2-instance-contents_32.png[0m
│   ├── [01;35mEC2-instance-contents_32.svg[0m
│   ├── [01;35mPrivate-subnet_32.png[0m
│   ├── [0

In [36]:
# 아이콘 스캔 + 서비스 매핑
from pathlib import Path
import pandas as pd
import re

# Asset root autodetect
data_root = Path("data")
asset_roots = list(data_root.glob("Asset-Package*"))
assert len(asset_roots) > 0, "No Asset-Package* directory found"
asset_root = asset_roots[0]
print("Asset root:", asset_root)

# Identify icon folders based on real structure
SERVICE_ICON_ROOT  = asset_root / "Architecture-Service-Icons_02072025"
RESOURCE_ICON_ROOT = asset_root / "Resource-Icons_02072025"
CATEGORY_ICON_ROOT = asset_root / "Category-Icons_02072025"
GROUP_ICON_ROOT    = asset_root / "Architecture-Group-Icons_02072025"

print("Service icons:", SERVICE_ICON_ROOT.exists())
print("Resource icons:", RESOURCE_ICON_ROOT.exists())
print("Category icons:", CATEGORY_ICON_ROOT.exists())
print("Group icons:", GROUP_ICON_ROOT.exists())

# ---------------------------------------------
# Utilities
# ---------------------------------------------
SIZE_SUFFIX_RE = re.compile(r"_(16|24|32|48|64|128)$")

def extract_name_from_stem(stem: str) -> str:
    s = stem
    s = SIZE_SUFFIX_RE.sub("", s)
    s = s.replace("_", " ").replace("-", " ")
    s = re.sub(r"\s+", " ", s).strip()
    return s

def iter_icons(root: Path, icon_type: str):
    """ Recursively iterate all SVG icons under the given root """
    if not root.exists():
        return
    for svg in root.rglob("*.svg"):
        rel = svg.relative_to(asset_root)
        yield {
            "icon_type": icon_type,
            "file_path": str(rel),
            "file_name": svg.name,
            "stem": svg.stem,
            "dir": svg.parent.name,
            "candidate_name": extract_name_from_stem(svg.stem),
        }

# ---------------------------------------------
# Scan icons based on actual AWS Assets folder structure
# ---------------------------------------------
rows = []
rows.extend(list(iter_icons(SERVICE_ICON_ROOT,  "service")))
rows.extend(list(iter_icons(RESOURCE_ICON_ROOT, "resource")))
rows.extend(list(iter_icons(CATEGORY_ICON_ROOT, "category")))
rows.extend(list(iter_icons(GROUP_ICON_ROOT,    "group")))

df_icons = pd.DataFrame(rows)
print("Total icons scanned:", len(df_icons))
df_icons.head()


Asset root: data/Asset-Package_02072025.dee42cd0a6eaacc3da1ad9519579357fb546f803
Service icons: True
Resource icons: True
Category icons: True
Group icons: True
Total icons scanned: 1882


Unnamed: 0,icon_type,file_path,file_name,stem,dir,candidate_name
0,service,Architecture-Service-Icons_02072025/Arch_Gener...,Arch_AWS-Marketplace_Dark_64.svg,Arch_AWS-Marketplace_Dark_64,64,Arch AWS Marketplace Dark
1,service,Architecture-Service-Icons_02072025/Arch_Gener...,Arch_AWS-Marketplace_Light_64.svg,Arch_AWS-Marketplace_Light_64,64,Arch AWS Marketplace Light
2,service,Architecture-Service-Icons_02072025/Arch_Gener...,Arch_AWS-Marketplace_Dark_16.svg,Arch_AWS-Marketplace_Dark_16,16,Arch AWS Marketplace Dark
3,service,Architecture-Service-Icons_02072025/Arch_Gener...,Arch_AWS-Marketplace_Light_16.svg,Arch_AWS-Marketplace_Light_16,16,Arch AWS Marketplace Light
4,service,Architecture-Service-Icons_02072025/Arch_Gener...,Arch_AWS-Marketplace_Dark_32.svg,Arch_AWS-Marketplace_Dark_32,32,Arch AWS Marketplace Dark


In [38]:
df_icons["canonical_service_name"] = df_icons["candidate_name"].apply(resolve_service_to_canonical)
df_icons["coarse_class"] = df_icons["canonical_service_name"].apply(get_coarse_from_canonical)
df_icons["service_code_mapped"] = df_icons["canonical_service_name"].apply(get_service_code_from_canonical)

df_icons["matched"] = df_icons["canonical_service_name"].notna()

# head(20) 결과 확인 (원한다면)
display(df_icons.head(20))

# 최종 결과를 csv로 저장
df_icons.to_csv("coarse20_icons_result.csv", index=False)

Unnamed: 0,icon_type,file_path,file_name,stem,dir,candidate_name,canonical_service_name,coarse_class,service_code_mapped,matched
0,service,Architecture-Service-Icons_02072025/Arch_Gener...,Arch_AWS-Marketplace_Dark_64.svg,Arch_AWS-Marketplace_Dark_64,64,Arch AWS Marketplace Dark,,,,False
1,service,Architecture-Service-Icons_02072025/Arch_Gener...,Arch_AWS-Marketplace_Light_64.svg,Arch_AWS-Marketplace_Light_64,64,Arch AWS Marketplace Light,,,,False
2,service,Architecture-Service-Icons_02072025/Arch_Gener...,Arch_AWS-Marketplace_Dark_16.svg,Arch_AWS-Marketplace_Dark_16,16,Arch AWS Marketplace Dark,,,,False
3,service,Architecture-Service-Icons_02072025/Arch_Gener...,Arch_AWS-Marketplace_Light_16.svg,Arch_AWS-Marketplace_Light_16,16,Arch AWS Marketplace Light,,,,False
4,service,Architecture-Service-Icons_02072025/Arch_Gener...,Arch_AWS-Marketplace_Dark_32.svg,Arch_AWS-Marketplace_Dark_32,32,Arch AWS Marketplace Dark,,,,False
5,service,Architecture-Service-Icons_02072025/Arch_Gener...,Arch_AWS-Marketplace_Light_32.svg,Arch_AWS-Marketplace_Light_32,32,Arch AWS Marketplace Light,,,,False
6,service,Architecture-Service-Icons_02072025/Arch_Gener...,Arch_AWS-Marketplace_Light_48.svg,Arch_AWS-Marketplace_Light_48,48,Arch AWS Marketplace Light,,,,False
7,service,Architecture-Service-Icons_02072025/Arch_Gener...,Arch_AWS-Marketplace_Dark_48.svg,Arch_AWS-Marketplace_Dark_48,48,Arch AWS Marketplace Dark,,,,False
8,service,Architecture-Service-Icons_02072025/Arch_Quant...,Arch_Amazon-Braket_64.svg,Arch_Amazon-Braket_64,64,Arch Amazon Braket,amazon braket,Quantum,braket,True
9,service,Architecture-Service-Icons_02072025/Arch_Quant...,Arch_Amazon-Braket_16.svg,Arch_Amazon-Braket_16,16,Arch Amazon Braket,amazon braket,Quantum,braket,True
