# Reorganize COCO Dataset into Category Folders

This notebook reads a COCO-format annotations JSON and reorganizes all images into subfolders named after each category.

**Instructions:**
- Update the `dataset_dir` to point to your `rc40cocodataset` folder.
- (Optional) Set `output_dir` to a separate location, otherwise images will be copied into subfolders of `dataset_dir`.


In [None]:
import os
import json
import shutil
import argparse


: 

In [None]:
# Path to the root of your COCO dataset
# e.g., "/home/user/data/rc40cocodataset"
dataset_dir = ".././rc40cocodataset"

coco_file="fixed_coco.json"

# Optional: separate output directory
# If left as None, category folders will be created under dataset_dir
out_root = ".././rc40-pet-can-v1" 

In [None]:
ann_path = os.path.join(dataset_dir, "annotations", coco_file)
img_dir = os.path.join(dataset_dir, "images")

if not os.path.isfile(ann_path):
    raise FileNotFoundError(f"Annotation file not found: {ann_path}")
if not os.path.isdir(img_dir):
    raise NotADirectoryError(f"Images folder not found: {img_dir}")

# Load COCO annotations
with open(ann_path, 'r', encoding='utf-8') as f:
    coco = json.load(f)

In [None]:
# Build mappings: category_id -> name, image_id -> filename
cat_map = {c['id']: c['name'] for c in coco.get('categories', [])}
img_map = {img['id']: img['file_name'] for img in coco.get('images', [])}

# %%
# Create subfolders for each category
for cat_id, cat_name in cat_map.items():
    folder = os.path.join(out_root, cat_name)
    os.makedirs(folder, exist_ok=True)


In [None]:
# %%
# Copy images into their category folders
copied = 0

for ann in coco.get('annotations', []):
    img_id = ann['image_id']
    cat_id = ann['category_id']
    file_name = img_map.get(img_id)
    cat_name = cat_map.get(cat_id)

    if not file_name or not cat_name:
        continue

    src = os.path.join(img_dir, file_name)
    dst = os.path.join(out_root, cat_name, file_name)

    if os.path.isfile(src) and not os.path.exists(dst):
        shutil.copy2(src, dst)
        copied += 1

print(f"✅ Completed: {copied} images copied into {len(cat_map)} category folders.")
