In [1]:
import os
import json

In [2]:
# Example style mappings based on the categories in your image
style_mapping = {
    "트래디셔널": {"클래식": 1, "프레피": 2},
    "매니시": {"매니시": 3, "톰보이": 4},
    "페미닌": {"페미닌": 5, "로맨틱": 6, "섹시": 7},
    "에스닉": {"히피": 8, "웨스턴": 9, "오리엔탈": 10},
    "컨템포러리": {"모던": 11, "소피스트케이티드": 12, "아방가르드": 13},
    "내추럴": {"컨트리": 14, "리조트": 15},
    "젠더플루이드": {"젠더리스": 16},
    "스포티": {"스포티": 17},
    "서브컬처": {"레트로": 18, "키치/키덜트": 19, "힙합": 20, "핑크": 21},
    "캐주얼": {"밀리터리": 22, "스트리트": 23}
}

In [3]:
def encode_style(style_category, style_name):
    if style_category in style_mapping and style_name in style_mapping[style_category]:
        return style_mapping[style_category][style_name]
    return 0  # Default value for unknown style


In [8]:
top_category_mapping = {
    "탑": 1, "블라우스": 2, "티셔츠": 3, "니트웨어": 4, "셔츠": 5, "브라탑": 6, "후드티": 7
}
bottom_category_mapping = {
    "청바지": 1, "팬츠": 2, "스커트": 3, "레깅스": 4, "조거팬츠": 5
}
outer_category_mapping = {
    "코트": 1, "재킷": 2, "점퍼": 3, "패딩": 4, "베스트": 5, "가디건": 6, "집업": 7
}
dress_category_mapping = {
    "드레스": 1, "점프수트": 2
}
color_mapping = {
    "블랙": 1, "화이트": 2, "그레이": 3, "레드": 4, "핑크": 5, "오렌지": 6, "베이지": 7,
    "브라운": 8, "옐로우": 9, "그린": 10, "카키": 11, "실버": 12
}
material_mapping = {
    "우븐": 1, "저지": 2, "스웨이드": 3, "퍼": 4, "무스탕": 5
}
def encode_attributes(attributes):
    encoded_attributes = {
        "상위 카테고리": top_category_mapping.get(attributes.get("카테고리", ""), 0),
        "색상": color_mapping.get(attributes.get("색상", ""), 0),
        "소재": [material_mapping.get(m, 0) for m in attributes.get("소재", [])],
        # Add more attributes as needed
    }
    return encoded_attributes


In [None]:
# Change the output path of processed dataset
output_path = "/Users/dennycheong/YBIGTA/evenT/KFASHION_DATASET/processed"
if not os.path.exists(output_path):
    os.makedirs(output_path)

# Converts everything to COCO_Dataset format for individual image
def convert_to_coco_format(input_data):
    coco_format = {
        "info": {
            "description": "K-Fashion Dataset to COCO format",
            "version": "1.0",
            "year": 2024,
            "contributor": "evenT",
            "date_created": "2024-11-15"
        },
        "licenses": [],
        "images": [],
        "annotations": [],
        "categories": []
    }

    # Mapping of Categories
    categories = [
        {"id": 1, "name": "outer", "supercategory": "clothing"},
        {"id": 2, "name": "pants", "supercategory": "clothing"},
        {"id": 3, "name": "dress", "supercategory": "clothing"},
        {"id": 4, "name": "top", "supercategory": "clothing"}
    ]
    coco_format["categories"] = categories
    category_mapping = {
        "아우터": 1,  # outer
        "하의": 2,   # pants
        "원피스": 3, # dress
        "상의": 4    # top
    }

    annotation_id = 1  
    for item in input_data:
        if "이미지 정보" in item:
            image_info = {
                "id": item["이미지 정보"].get("이미지 식별자", -1),  # Default -1 if not found
                "file_name": item["이미지 정보"].get("이미지 파일명", ""),
                "width": item["이미지 정보"].get("이미지 너비", 0),
                "height": item["이미지 정보"].get("이미지 높이", 0),
            }
            coco_format["images"].append(image_info)

        if "데이터셋 정보" in item:
            rect_data = item["데이터셋 정보"]["데이터셋 상세설명"].get("렉트좌표", {})
            polygon_data = item["데이터셋 정보"]["데이터셋 상세설명"].get("폴리곤좌표", {})
            attributes_data = item["데이터셋 정보"]["데이터셋 상세설명"].get("라벨링", {}) 
            style_data = item["데이터셋 정보"]["데이터셋 상세설명"].get("스타일", [{}])[0]  

            style_category = style_data.get("스타일", "")
            substyle = style_data.get("서브스타일", "")
            style_encoded = encode_style(style_category, substyle)

            for category_name in category_mapping: 
                if category_name in rect_data and rect_data[category_name]:
                    for rect in rect_data[category_name]:
                        if rect:  
                            bbox = [
                                rect.get("X좌표", 0),
                                rect.get("Y좌표", 0),
                                rect.get("가로", 0),
                                rect.get("세로", 0)
                            ]
                            annotation = {
                                "id": annotation_id,
                                "image_id": item["이미지 정보"].get("이미지 식별자", -1),
                                "category_id": category_mapping[category_name],  
                                "bbox": bbox,
                                "area": rect.get("가로", 0) * rect.get("세로", 0),
                                "segmentation": [],  
                                "iscrowd": 0,
                                "style": style_encoded 
                            }

                            if category_name in polygon_data and polygon_data[category_name]:
                                for polygon in polygon_data[category_name]:
                                    if polygon:  
                                        segmentation = []
                                        for key in sorted(polygon.keys()):  
                                            if "X좌표" in key or "Y좌표" in key:
                                                segmentation.append(polygon[key])
                                        annotation["segmentation"].append(segmentation)

                            if category_name in attributes_data and attributes_data[category_name]:
                                encoded_attributes = encode_attributes(attributes_data[category_name][0])
                                annotation["attributes"] = encoded_attributes

                            coco_format["annotations"].append(annotation)
                            annotation_id += 1
    return coco_format

In [None]:
# Paths for input and output directories
test_path = "/root/evenT/Labels/Test"  
output_path = "/root/evenT/p_labels/test"  # Path of where I store the processed samples

if not os.path.exists(output_path):
    os.makedirs(output_path)

In [None]:
# Modifying each json file to each json file
for file_name in os.listdir(test_path):
    if file_name.endswith(".json"):  # Only process JSON files
        input_file = os.path.join(test_path, file_name)
        with open(input_file, 'r') as f:
            input_data = json.load(f)  # Load the data from the file

        # Convert data to COCO format
        converted_coco_data = convert_to_coco_format([input_data])  # Wrapping in list if function expects list

        # Extract file number or base name for naming the output
        file_number = os.path.splitext(file_name)[0]

        # Save to the specified output path with a modified filename
        output_file = os.path.join(output_path, f"modified_{file_number}.json")
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(converted_coco_data, f, ensure_ascii=False, indent=4)

        print(f"Converted COCO format data saved at: {output_file}")

Converted COCO format data saved at: /root/evenT/p_labels/test/modified_1016530.json
Converted COCO format data saved at: /root/evenT/p_labels/test/modified_1016553.json
Converted COCO format data saved at: /root/evenT/p_labels/test/modified_1016618.json
Converted COCO format data saved at: /root/evenT/p_labels/test/modified_1028690.json
Converted COCO format data saved at: /root/evenT/p_labels/test/modified_1029079.json
Converted COCO format data saved at: /root/evenT/p_labels/test/modified_1029101.json
Converted COCO format data saved at: /root/evenT/p_labels/test/modified_1029473.json
Converted COCO format data saved at: /root/evenT/p_labels/test/modified_1029756.json
Converted COCO format data saved at: /root/evenT/p_labels/test/modified_1030283.json
Converted COCO format data saved at: /root/evenT/p_labels/test/modified_1032387.json
Converted COCO format data saved at: /root/evenT/p_labels/test/modified_1040980.json
Converted COCO format data saved at: /root/evenT/p_labels/test/mo

In [12]:
put_path = "/root/evenT/p_labels/validation" 

i = 0 
for file_name in os.listdir(put_path):
    i += 1
print(i)

1000


In [None]:
# This file is to modify json files and combine them into one
import re

# 
input_dir = '/root/evenT/Labels/Test'  # Directory containing individual JSON files
output_file = '/root/evenT/test_labels.json'  # Output combined annotation file

# Initialize COCO-style dictionary
combined_coco = {
    "info": {
        "description": "K-Fashion Dataset to COCO format",
        "version": "1.0",
        "year": 2024,
        "contributor": "evenT",
        "date_created": "2024-11-15"
    },
    "licenses": [],
    "images": [],
    "annotations": [],
    "categories": [
        {"id": 1, "name": "outer", "supercategory": "clothing"},
        {"id": 2, "name": "pants", "supercategory": "clothing"},
        {"id": 3, "name": "dress", "supercategory": "clothing"},
        {"id": 4, "name": "top", "supercategory": "clothing"}
    ]
}

annotation_id = 1  # Unique annotation ID counter

def extract_image_id(file_name):
    """Extracts the numeric image ID from a file name (e.g., {num}.jpg)."""
    match = re.search(r'(\d+)', file_name)
    return int(match.group(1)) if match else -1

for file_name in os.listdir(input_dir):
    if file_name.endswith('.json'):
        input_path = os.path.join(input_dir, file_name)
        with open(input_path, 'r') as f:
            input_data = json.load(f)  # Load the data from the file

        # Use your convert_to_coco_format function (assuming it works with a list input)
        converted_data = convert_to_coco_format([input_data])

        # Merge converted data into combined_coco
        for image in converted_data["images"]:
            image["id"] = extract_image_id(image["file_name"])  # Extract ID from file name
            combined_coco["images"].append(image)

        for annotation in converted_data["annotations"]:
            annotation["id"] = annotation_id
            combined_coco["annotations"].append(annotation)
            annotation_id += 1

# Save the combined COCO JSON file
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(combined_coco, f, ensure_ascii=False, indent=4)

print(f"Combined COCO-style annotation file created at: {output_file}")

Combined COCO-style annotation file created at: /root/evenT/test_labels.json
