In [1]:
import json

def update_category_ids(json_path, output_path, new_category_id):
    """
    Updates category_id in both categories[] and annotations[] for a COCO-style JSON file.
    Args:
        json_path (str): Input JSON annotation file path.
        output_path (str): Output JSON file path to save the updated version.
        new_category_id (int): New category ID (0 or 1).
    """
    with open(json_path, 'r') as f:
        data = json.load(f)

    # Update category ids in "categories"
    for cat in data.get("categories", []):
        cat["id"] = new_category_id

    # Update category ids in "annotations"
    for ann in data.get("annotations", []):
        ann["category_id"] = new_category_id

    # Save updated JSON
    with open(output_path, 'w') as f:
        json.dump(data, f, indent=4)

    print(f"âœ… Updated {json_path} â†’ Saved to {output_path} (category_id = {new_category_id})")


# ---- Example Usage ----
# Non-surgical file â†’ category_id = 0
update_category_ids("/Users/nebula/Desktop/Projects/Sanket/Data/non surgical 212 zip/annotations/person_keypoints_default.json", "/Users/nebula/Desktop/Projects/Sanket/Data/non surgical 212 zip/annotations/person_keypoints_updated.json", 0)

# Surgical file â†’ category_id = 1
# update_category_ids("surgical.json", "surgical_updated.json", 1)


âœ… Updated /Users/nebula/Desktop/Projects/Sanket/Data/non surgical 212 zip/annotations/person_keypoints_default.json â†’ Saved to /Users/nebula/Desktop/Projects/Sanket/Data/non surgical 212 zip/annotations/person_keypoints_updated.json (category_id = 0)


In [2]:
import json
import os
from tqdm import tqdm

def merge_annotations_from_folder(
    original_json_path,
    new_image_dir,
    output_json_path
):
    """
    Merge COCO-style annotations from original JSON into a target JSON,
    keeping only images that exist in new_image_dir and preserving category IDs.
    """
    # Load source JSON
    with open(original_json_path, 'r') as f:
        orig_data = json.load(f)

    # Load or initialize destination JSON
    if os.path.exists(output_json_path):
        print(f"ðŸ“‚ Loading existing file: {output_json_path}")
        with open(output_json_path, 'r') as f:
            merged_data = json.load(f)
    else:
        print(f"ðŸ†• Creating new file: {output_json_path}")
        merged_data = {
            "licenses": orig_data.get("licenses", []),
            "info": orig_data.get("info", {}),
            "categories": [],
            "images": [],
            "annotations": []
        }

    # âœ… Merge unique categories (avoid duplicates)
    existing_cat_names = {cat["name"] for cat in merged_data.get("categories", [])}
    for cat in orig_data.get("categories", []):
        if cat["name"] not in existing_cat_names:
            merged_data["categories"].append(cat)
            existing_cat_names.add(cat["name"])

    # Collect existing image filenames
    existing_filenames = {os.path.basename(img["file_name"]).lower() for img in merged_data["images"]}

    # List available images in folder
    available_images = {
        f.lower() for f in os.listdir(new_image_dir)
        if f.lower().endswith(('.jpg', '.jpeg', '.png'))
    }

    # Track next IDs
    next_image_id = max([img["id"] for img in merged_data["images"]], default=0) + 1
    next_ann_id = max([ann["id"] for ann in merged_data["annotations"]], default=0) + 1

    image_id_map = {}

    # --- Copy matching images ---
    for img in tqdm(orig_data["images"], desc="Processing new images"):
        base_name = os.path.basename(img["file_name"]).lower()
        if base_name in available_images and base_name not in existing_filenames:
            new_img = img.copy()
            new_img["file_name"] = os.path.join(os.path.basename(new_image_dir), os.path.basename(img["file_name"]))
            new_img["id"] = next_image_id
            image_id_map[img["id"]] = next_image_id
            merged_data["images"].append(new_img)

            existing_filenames.add(base_name)
            next_image_id += 1

    # --- Copy matching annotations ---
    for ann in tqdm(orig_data["annotations"], desc="Processing new annotations"):
        old_img_id = ann["image_id"]
        if old_img_id in image_id_map:
            new_ann = ann.copy()
            new_ann["image_id"] = image_id_map[old_img_id]
            new_ann["id"] = next_ann_id

            # âœ… Keep category_id unchanged
            # (e.g., 0 for non-surgical, 1 for surgical)
            merged_data["annotations"].append(new_ann)
            next_ann_id += 1

    # Save final JSON
    os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
    with open(output_json_path, 'w') as f:
        json.dump(merged_data, f, indent=4)

    print(f"\nâœ… Merged and saved: {output_json_path}")
    print(f"ðŸ“Š Total images: {len(merged_data['images'])}")
    print(f"ðŸ“Š Total annotations: {len(merged_data['annotations'])}")
    print(f"ðŸ“Š Categories: {[cat['name'] for cat in merged_data['categories']]}")


In [5]:
# --- Example usage ---
merge_annotations_from_folder(
    original_json_path="/Users/nebula/Desktop/Projects/Sanket/Data/non surgical 212 zip/annotations/person_keypoints_updated.json",
    new_image_dir="/Users/nebula/Desktop/Projects/Sanket/Data/Recent_data/val/images",
    output_json_path="/Users/nebula/Desktop/Projects/Sanket/Data/Recent_data/val/images/annotation/n-person_keypoints_default.json"
)

ðŸ†• Creating new file: /Users/nebula/Desktop/Projects/Sanket/Data/Recent_data/val/images/annotation/n-person_keypoints_default.json


Processing new images: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 213/213 [00:00<00:00, 245368.51it/s]
Processing new annotations: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 212/212 [00:00<00:00, 410902.24it/s]


âœ… Merged and saved: /Users/nebula/Desktop/Projects/Sanket/Data/Recent_data/val/images/annotation/n-person_keypoints_default.json
ðŸ“Š Total images: 44
ðŸ“Š Total annotations: 44
ðŸ“Š Categories: ['surgical', 'non surgical']





In [6]:
merge_annotations_from_folder(
    original_json_path="/Users/nebula/Desktop/Projects/Sanket/Data/surgical 204/annotations/person_keypoints_updated.json",
    new_image_dir="/Users/nebula/Desktop/Projects/Sanket/Data/Recent_data/val/images",
    output_json_path="/Users/nebula/Desktop/Projects/Sanket/Data/Recent_data/val/images/annotation/n-person_keypoints_default.json"
)

ðŸ“‚ Loading existing file: /Users/nebula/Desktop/Projects/Sanket/Data/Recent_data/val/images/annotation/n-person_keypoints_default.json


Processing new images: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 203/203 [00:00<00:00, 288722.86it/s]
Processing new annotations: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 201/201 [00:00<00:00, 443713.21it/s]


âœ… Merged and saved: /Users/nebula/Desktop/Projects/Sanket/Data/Recent_data/val/images/annotation/n-person_keypoints_default.json
ðŸ“Š Total images: 84
ðŸ“Š Total annotations: 82
ðŸ“Š Categories: ['surgical', 'non surgical']





In [7]:
import json
import os

def fix_image_paths(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    for img in data['images']:
        # remove "train/" or "validation/" or "val/" prefixes safely
        img['file_name'] = os.path.basename(img['file_name'])

    with open(json_path, 'w') as f:
        json.dump(data, f, indent=4)

    print(f"âœ… Fixed image paths in: {json_path}")

# Run for both splits
fix_image_paths("/Users/nebula/Desktop/Projects/Sanket/Data/Recent_data/train/images/annotation/n-person_keypoints_default.json")
fix_image_paths("/Users/nebula/Desktop/Projects/Sanket/Data/Recent_data/val/images/annotation/n-person_keypoints_default.json")

âœ… Fixed image paths in: /Users/nebula/Desktop/Projects/Sanket/Data/Recent_data/train/images/annotation/n-person_keypoints_default.json
âœ… Fixed image paths in: /Users/nebula/Desktop/Projects/Sanket/Data/Recent_data/val/images/annotation/n-person_keypoints_default.json
