In [1]:
# Convert images + annotations.json into YOLO training format

import json
from pathlib import Path
from shutil import copyfile

# Paths
data_path = Path("/home/jupyter/novice/cv")
train_path = Path("/home/jupyter/tilai-bjjsql/cv/src/train")
images_in = data_path / "images"
images_out = train_path / "images"
labels_out = train_path / "labels"

# Create output directories if not exist
images_out.mkdir(parents=True, exist_ok=True)
labels_out.mkdir(parents=True, exist_ok=True)

# Load COCO annotations
with open(data_path / "annotations.json") as f:
    coco = json.load(f)

# Index image info
img_info = {img["id"]: img for img in coco["images"]}

# Track which images we’ve already copied
copied_images = set()

# Convert annotations to YOLO format and copy images
for ann in coco["annotations"]:
    image_id = ann["image_id"]
    cat_id = ann["category_id"]
    x, y, w, h = ann["bbox"]

    image = img_info[image_id]
    file_name = image["file_name"]
    width, height = image["width"], image["height"]

    # Convert bbox to YOLO format
    x_center = (x + w / 2) / width
    y_center = (y + h / 2) / height
    w_norm = w / width
    h_norm = h / height

    # Save label line
    label_path = labels_out / (Path(file_name).stem + ".txt")
    with open(label_path, "a") as f:
        f.write(f"{cat_id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}\n")

    # Copy image if not already done
    if file_name not in copied_images:
        copyfile(images_in / file_name, images_out / file_name)
        copied_images.add(file_name)

print(f"✅ Extracted {len(copied_images)} images and labels to {train_path}")

✅ Extracted 19253 images and labels to /home/jupyter/tilai-bjjsql/cv/src/train
