# Label Studio 스키마 생성

Label Studio용 라벨 스키마 XML 생성

**원본 스크립트**: `generate_aws_icon_schemas.py`


AWS icon canonical schema generator

입력
- aws_icons_fine_dataset.yaml  : YOLO용 기존 data.yaml (class index -> name)
- labels_fine.csv              : canonical_service_name, coarse_class 포함 CSV

출력
- label_studio_aws_icons.xml   : Label Studio용 라벨 스키마 (RectangleLabels)
- aws_icons_fine_dataset.out.yaml : YOLO data.yaml (동일 인덱스 유지)
- aws_icons_label_mapping.json : fine/coarse/index 매핑 JSON


In [None]:
import argparse
import json
import os
from collections import defaultdict
from typing import Dict, List
from pathlib import Path

import pandas as pd
import yaml


# coarse 카테고리 → 색상 (Label Studio background)
COARSE_COLOR_MAP: Dict[str, str] = {
    "Compute": "#FF9800",
    "Storage": "#4CAF50",
    "Database": "#1E88E5",
    "Networking": "#7E57C2",
    "Security & Identity": "#C62828",
    "Application Integration": "#FF7043",
    "Analytics": "#0288D1",
    "DevOps & Developer Tools": "#00897B",
    "Management & Governance": "#00695C",
    "Monitoring & Logging": "#AD1457",
    "Migration & Transfer": "#5D4037",
    "AI & Machine Learning": "#512DA8",
    "Containers & Orchestration": "#EF6C00",
    "Business Applications": "#7CB342",
    "Blockchain": "#5C6BC0",
    "IoT": "#6D4C41",
    "Quantum": "#9C27B0",
    "Robotics / AR-VR": "#FFB300",
    "Serverless & Event-driven": "#F4511E",
}




In [None]:
def check_file_exists(path: str, description: str = ""):
    """파일 경로 존재 여부 확인 및 안내"""
    file_path = Path(path)
    if not file_path.is_file():
        msg = f"[ERROR] 파일이 존재하지 않습니다: {file_path}"
        if description:
            msg += f"  ({description})"
        raise FileNotFoundError(msg)
    return file_path.resolve()


def load_yaml(path: str) -> dict:
    with open(path, "r", encoding="utf-8") as f:
        return yaml.safe_load(f)


def build_label_studio_xml(
    fine_names: List[str],
    fine_to_coarse: Dict[str, str],
    coarse_color_map: Dict[str, str],
) -> str:
    """Label Studio용 XML 생성 (Object Detection + RectangleLabels)"""

    lines: List[str] = []
    lines.append("<View>")
    lines.append('  <Image name="image" value="$image" zoom="true"/>')
    lines.append("")
    lines.append('  <RectangleLabels name="label" toName="image">')

    by_coarse: Dict[str, List[str]] = defaultdict(list)
    for name in fine_names:
        by_coarse[fine_to_coarse[name]].append(name)

    for coarse in sorted(by_coarse.keys()):
        lines.append(f"    <!-- {coarse} -->")
        for name in sorted(by_coarse[coarse]):
            color = coarse_color_map.get(coarse, "#9E9E9E")
            value = name.replace('"', "&quot;")  # XML escape
            lines.append(f'    <Label value="{value}" background="{color}"/>')
        lines.append("")

    lines.append("  </RectangleLabels>")
    lines.append("")
    lines.append("</View>")

    return "\n".join(lines)




In [None]:
def build_yolo_yaml(orig_yaml: dict) -> dict:
    """기존 data.yaml을 기반으로 재생성 (인덱스/이름 그대로 유지)"""

    names_ordered = {
        i: orig_yaml["names"][i] for i in sorted(orig_yaml["names"].keys())
    }

    out = {
        "path": orig_yaml.get("path", "."),
        "train": orig_yaml["train"],
        "val": orig_yaml["val"],
    }
    if "test" in orig_yaml:
        out["test"] = orig_yaml["test"]
    out["names"] = names_ordered
    return out


def build_mapping_json(
    yolo_yaml: dict,
    fine_to_coarse: Dict[str, str],
) -> dict:
    """fine/coarse/index 매핑 JSON 구조 생성"""

    id_to_fine: Dict[int, str] = yolo_yaml["names"]
    fine_to_id: Dict[str, int] = {v: k for k, v in id_to_fine.items()}

    coarse_to_fine: Dict[str, List[str]] = defaultdict(list)
    for fine, coarse in fine_to_coarse.items():
        coarse_to_fine[coarse].append(fine)
    for c in coarse_to_fine:
        coarse_to_fine[c].sort()

    mapping = {
        "fine_to_id": fine_to_id,
        "id_to_fine": {str(i): name for i, name in id_to_fine.items()},
        "fine_to_coarse": fine_to_coarse,
        "coarse_to_fine": coarse_to_fine,
    }
    return mapping




In [None]:
def main():
    parser = argparse.ArgumentParser()
    # 실제 파일 위치를 기본값으로 지정
    parser.add_argument(
        "--yaml",
        default="dataset/icons/aws_icons_fine_dataset.yaml",
        help="YOLO data.yaml 파일 경로",
    )
    parser.add_argument(
        "--labels-fine",
        default="labels_fine.csv",
        help="canonical_service_name, coarse_class 포함 CSV",
    )
    parser.add_argument(
        "--out-xml",
        default="label_studio_aws_icons.xml",
        help="출력 Label Studio XML 경로",
    )
    parser.add_argument(
        "--out-yaml",
        default="aws_icons_fine_dataset.out.yaml",
        help="출력 YOLO data.yaml 경로",
    )
    parser.add_argument(
        "--out-json",
        default="aws_icons_label_mapping.json",
        help="출력 coarse/fine 매핑 JSON 경로",
    )
    args = parser.parse_args()

    # ===== 파일 경로 확인 =====
    yaml_path = check_file_exists(args.yaml, "YOLO data.yaml")
    labels_fine_path = check_file_exists(args.labels_fine, "labels_fine.csv")

    # 출력 디렉터리 존재 확인 (없으면 생성)
    for out_path in [args.out_xml, args.out_yaml, args.out_json]:
        out_dir = Path(out_path).parent
        if out_dir and not out_dir.exists():
            print(f"[INFO] 출력 디렉터리 생성: {out_dir}")
            out_dir.mkdir(parents=True, exist_ok=True)

    # 1) 입력 로드
    yolo_in = load_yaml(str(yaml_path))
    df_fine = pd.read_csv(str(labels_fine_path))

    # canonical_service_name ↔ coarse_class 매핑
    df_unique = df_fine.drop_duplicates("canonical_service_name")
    fine_to_coarse = (
        df_unique.set_index("canonical_service_name")["coarse_class"].to_dict()
    )

    # 스키마 일관성 체크
    fine_from_csv = set(fine_to_coarse.keys())
    fine_from_yaml = set(yolo_in["names"].values())
    if fine_from_csv != fine_from_yaml:
        missing_in_yaml = fine_from_csv - fine_from_yaml
        missing_in_csv = fine_from_yaml - fine_from_csv
        raise RuntimeError(
            f"fine label 불일치 발생\n"
            f"- CSV에만 존재: {sorted(missing_in_yaml)}\n"
            f"- YAML에만 존재: {sorted(missing_in_csv)}\n"
            f"입력 YAML 경로: {yaml_path}\n"
            f"입력 CSV 경로: {labels_fine_path}"
        )

    # 2) Label Studio XML 생성
    fine_names_sorted = sorted(fine_from_yaml)
    xml_str = build_label_studio_xml(
        fine_names_sorted,
        fine_to_coarse,
        COARSE_COLOR_MAP,
    )
    with open(args.out_xml, "w", encoding="utf-8") as f:
        f.write(xml_str)

    # 3) YOLO data.yaml 생성
    yolo_out = build_yolo_yaml(yolo_in)
    with open(args.out_yaml, "w", encoding="utf-8") as f:
        yaml.safe_dump(yolo_out, f, sort_keys=False, allow_unicode=True)

    # 4) coarse/fine/index 매핑 JSON 생성
    mapping = build_mapping_json(yolo_out, fine_to_coarse)
    with open(args.out_json, "w", encoding="utf-8") as f:
        json.dump(mapping, f, indent=2, ensure_ascii=False)

    print(f"[OK] Label Studio XML  : {os.path.abspath(args.out_xml)}")
    print(f"[OK] YOLO data.yaml    : {os.path.abspath(args.out_yaml)}")
    print(f"[OK] Mapping JSON      : {os.path.abspath(args.out_json)}")


if __name__ == "__main__":
    main()
