In [2]:
from pycocotools.coco import COCO
import os, sys
import json, random
import shutil
from datetime import datetime
sys.path.append("/home/knuvi/Desktop/song/occlusion-mask-generation/")
from coco_json import initialize_coco_json, save_coco_json
from config import INPUT_PATHS, OUTPUT_PATHS, HYPERPARAMETERS

In [2]:
def split_coco_json(coco_json, train_ratio=0.8):
    # 이미지와 주석을 섞기 위해 ID 기반 매핑 생성
    image_to_annotations = {img["id"]: [] for img in coco_json["images"]}
    for ann in coco_json["annotations"]:
        image_to_annotations[ann["image_id"]].append(ann)
    
    # 이미지 데이터를 섞기
    shuffled_images = coco_json["images"].copy()
    random.shuffle(shuffled_images)
    
    # train/test 분리
    split_idx = int(len(shuffled_images) * train_ratio)
    train_images = shuffled_images[:split_idx]
    test_images = shuffled_images[split_idx:]
    
    # train/test 데이터 생성
    train_json = initialize_coco_json()
    test_json = initialize_coco_json()

    # `info` 및 `licenses` 복사
    train_json["info"] = coco_json.get("info", {})
    test_json["info"] = coco_json.get("info", {})
    train_json["licenses"] = coco_json.get("licenses", None)
    test_json["licenses"] = coco_json.get("licenses", None)

    for img in train_images:
        train_json["images"].append(img)
        train_json["annotations"].extend(image_to_annotations[img["id"]])
    
    for img in test_images:
        test_json["images"].append(img)
        test_json["annotations"].extend(image_to_annotations[img["id"]])
    
    # 카테고리 복사
    train_json["categories"] = coco_json["categories"]
    test_json["categories"] = coco_json["categories"]
    
    return train_json, test_json


In [3]:
def split_images_and_annotations(coco_json, train_ratio=0.8):
    image_to_annotations = {img["id"]: [] for img in coco_json["images"]}
    for ann in coco_json["annotations"]:
        image_to_annotations[ann["image_id"]].append(ann)

    shuffled_images = coco_json["images"].copy()
    random.shuffle(shuffled_images)

    split_idx = int(len(shuffled_images) * train_ratio)
    train_images = shuffled_images[:split_idx]
    test_images = shuffled_images[split_idx:]

    train_annotations = [ann for img in train_images for ann in image_to_annotations[img["id"]]]
    test_annotations = [ann for img in test_images for ann in image_to_annotations[img["id"]]]

    return train_images, test_images, train_annotations, test_annotations


def save_split_data(coco_json, train_images, test_images, train_annotations, test_annotations, output_paths):


    # Output 디렉터리 설정
    train_image_dir = os.path.join(OUTPUT_PATHS["save_dir"], "train")
    test_image_dir = os.path.join(OUTPUT_PATHS["save_dir"], "test")
    train_json_dir = os.path.join(OUTPUT_PATHS["json_dir"], "train.json")
    test_json_dir = os.path.join(OUTPUT_PATHS["json_dir"], "test.json")
    os.makedirs(train_image_dir, exist_ok=True)
    os.makedirs(test_image_dir, exist_ok=True)

    # JSON 구조 생성
    train_json = initialize_coco_json()
    test_json = initialize_coco_json()

    train_json["images"] = train_images
    train_json["annotations"] = train_annotations
    test_json["images"] = test_images
    test_json["annotations"] = test_annotations
    train_json["categories"] = test_json["categories"] = coco_json["categories"]

    # JSON 저장
    save_coco_json(train_json, train_json_dir)
    save_coco_json(test_json, test_json_dir)

    # 이미지 저장
    copy_images_to_split_dirs(train_images, train_image_dir, output_paths["save_dir"])
    copy_images_to_split_dirs(test_images, test_image_dir, output_paths["save_dir"])


def copy_images_to_split_dirs(images, target_dir, source_dir):
    for img in images:
        src_path = os.path.join(source_dir, img["file_name"])
        dest_path = os.path.join(target_dir, img["file_name"])
        if os.path.exists(src_path):
            shutil.copy(src_path, dest_path)


In [None]:
file_path = os.path.join("/home/knuvi/Desktop/song/occlusion-mask-generation/data/synthesis/condition3_info_valid", "dataset.json")
with open(file_path, "r") as json_file:
    coco_json = json.load(json_file)
    print(f"COCO JSON 파일 로드됨: {file_path}")


COCO JSON 파일 로드됨: /home/knuvi/Desktop/song/occlusion-mask-generation/data/synthesis/amodal_info6/dataset.json


In [5]:
# 중복 확인 코드
image_ids = [image['id'] for image in coco_json['images']]
if len(image_ids) != len(set(image_ids)):
    print("중복된 image_id가 있습니다!")

In [6]:
train_images, test_images, train_annotations, test_annotations = split_images_and_annotations(coco_json, train_ratio=0.8)
save_split_data(coco_json, train_images, test_images, train_annotations, test_annotations, OUTPUT_PATHS)


COCO JSON 파일 저장됨: /home/knuvi/Desktop/song/occlusion-mask-generation/data/synthesis/amodal_info6/train.json
COCO JSON 파일 저장됨: /home/knuvi/Desktop/song/occlusion-mask-generation/data/synthesis/amodal_info6/test.json


In [10]:
coco = COCO("/home/knuvi/Desktop/song/occlusion-mask-generation/data/synthesis/amodal_info5/train.json")
print("Valid COCO Format!")

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Valid COCO Format!


In [6]:
import json

def validate_coco_format(json_path):
    with open(json_path, "r") as f:
        coco_data = json.load(f)

    required_keys = ["info", "images", "annotations", "categories"]
    missing_keys = [key for key in required_keys if key not in coco_data]
    if missing_keys:
        print(f"Missing keys in COCO JSON: {missing_keys}")
        return False

    # Validate `images`
    for img in coco_data["images"]:
        if not all(key in img for key in ["id", "width", "height", "file_name"]):
            print(f"Invalid image entry: {img}")
            return False

    # Validate `annotations`
    for ann in coco_data["annotations"]:
        if not all(key in ann for key in ["id", "image_id", "category_id", "bbox", "area", "segmentation", "iscrowd"]):
            print(f"Invalid annotation entry: {ann}")
            return False

    # Validate `categories`
    for cat in coco_data["categories"]:
        if not all(key in cat for key in ["id", "name", "supercategory"]):
            print(f"Invalid category entry: {cat}")
            return False

    print("COCO JSON format is valid!")
    return True

# Replace this path with your JSON file path
validate_coco_format("/home/knuvi/Desktop/song/cucumber-image/data/oi_seg/amodal_info4/dataset.json")


COCO JSON format is valid!


True

In [16]:
# images의 ID 목록
image_ids = set(img["id"] for img in coco_json["images"])
# annotations의 image_id 목록
annotation_image_ids = set(ann["image_id"] for ann in coco_json["annotations"])

# 차집합 확인
missing_ids = annotation_image_ids - image_ids
if missing_ids:
    print(f"Missing image IDs in 'images': {missing_ids}")
else:
    print("All image IDs in 'annotations' are present in 'images'.")


All image IDs in 'annotations' are present in 'images'.
