In [1]:
import os
import json
import glob
from PIL import Image
from tqdm import tqdm  # optional

In [None]:
# --- Configuration ---
# DATA_ROOT       = "/home/shardul.junagade/my-work/domain-adaptation-brick-kilns/rfdetr/data/train"
# OUTPUT_JSON     = "../coco_data/train_annotations.coco.json"

# DATA_ROOT       = "/home/shardul.junagade/my-work/domain-adaptation-brick-kilns/rfdetr/data/val"
# OUTPUT_JSON     = "../coco_data/valid_annotations.coco.json"

DATA_ROOT       = "/home/shardul.junagade/my-work/domain-adaptation-brick-kilns/rfdetr/data/test"
OUTPUT_JSON     = "../coco_data/test_annotations.coco.json"


IMAGE_DIR       = os.path.join(DATA_ROOT, "images")
ANNOT_DIR       = os.path.join(DATA_ROOT, "annfiles")
CATEGORIES      = [  # as defined above
    {"id": 0,  "name": "CFCBK",  "supercategory": "object"},
    {"id": 1,  "name": "FCBK",   "supercategory": "object"},
    {"id": 2,  "name": "Zigzag", "supercategory": "object"},
]

if not os.path.exists(IMAGE_DIR):
    raise FileNotFoundError(f"Image directory {IMAGE_DIR} does not exist.")
if not os.path.exists(ANNOT_DIR):
    raise FileNotFoundError(f"Annotation directory {ANNOT_DIR} does not exist.")

In [9]:
# --- Helper to map category name to ID ---
cat_name_to_id = {c["name"]: c["id"] for c in CATEGORIES}

# --- Initialize COCO structure ---
coco = {
    "info": {
        "description": "DOTA to COCO conversion",
        "version": "1.0",
        "year": 2025
    },
    "licenses": [],
    "images": [],
    "annotations": [],
    "categories": CATEGORIES
}

ann_id = 0
img_id = 0

# --- Process each annotation file ---

for img_path in tqdm(sorted(glob.glob(os.path.join(IMAGE_DIR, "*.jpg")))):
    base = os.path.basename(img_path).rsplit(".", 1)[0]
    ann_path = os.path.join(ANNOT_DIR, base + ".txt")

    # Load image to get size
    with Image.open(img_path) as img:
        width, height = img.size

    # Add image entry
    coco["images"].append({
        "id": img_id,
        "file_name": f"{base}.jpg",
        "width": width,
        "height": height
    })

    if os.path.isfile(ann_path):
    # Read DOTA OBB annotations
        with open(ann_path, 'r') as f:
            lines = [l for l in f if l.strip()]
        for line in lines:
            parts = line.strip().split()
            coords = list(map(float, parts[:8]))  # x1,y1,...,x4,y4
            category = parts[8]
            difficulty = int(parts[9])

            # Skip difficult if desired:
            # if difficulty == 1:
            #     continue

            xs = coords[0::2]
            ys = coords[1::2]
            x_min, y_min = min(xs), min(ys)
            x_max, y_max = max(xs), max(ys)
            w = x_max - x_min
            h = y_max - y_min

            coco["annotations"].append({
                "id": ann_id,
                "image_id": img_id,
                "category_id": cat_name_to_id[category],
                "bbox": [x_min, y_min, w, h],
                "area": w * h,
                "iscrowd": 0,
                "segmentation": [coords],  # single polygon
            })
            ann_id += 1

    img_id += 1


  0%|          | 0/15590 [00:00<?, ?it/s]

100%|██████████| 15590/15590 [00:01<00:00, 11109.11it/s]


In [10]:
# --- Save to JSON ---
with open(OUTPUT_JSON, 'w') as f:
    json.dump(coco, f, indent=2)

print(f"Saved COCO annotations to {OUTPUT_JSON}")

Saved COCO annotations to ../coco_data/test_annotations.coco.json
