In [3]:
import json
import os
from tqdm import tqdm  # Optional: for progress display (pip install tqdm)

def create_filtered_annotations(
    original_json_path,
    original_image_dir,
    new_image_dir,
    output_json_path
):
    """
    Create a new COCO-style JSON containing only annotations for images
    that exist in the new directory, with updated image paths.
    
    Args:
        original_json_path (str): Path to the original JSON file.
        original_image_dir (str): Folder where original images are stored.
        new_image_dir (str): Folder containing new filtered images.
        output_json_path (str): Output path for new JSON file.
    """
    # Load original annotations
    with open(original_json_path, 'r') as f:
        original_data = json.load(f)
    
    # Get available image names (without path) in the new directory
    available_images = {os.path.basename(f) for f in os.listdir(new_image_dir)
                        if f.lower().endswith(('.jpg', '.jpeg', '.png','.JPG'))}

    # Prepare new JSON structure
    new_data = {
        "licenses": original_data.get("licenses", []),
        "info": original_data.get("info", {}),
        "categories": original_data.get("categories", []),
        "images": [],
        "annotations": []
    }

    # Mapping from old image_id to new image_id
    image_id_map = {}
    new_image_id = 1
    new_annotation_id = 1

    # Filter and copy relevant images
    for img in tqdm(original_data["images"], desc="Processing images"):
        image_name = os.path.basename(img["file_name"])
        if image_name in available_images:
            # Update file path to new directory
            new_img_entry = img.copy()
            new_img_entry["file_name"] = os.path.join(
                os.path.basename(new_image_dir), image_name
            )
            new_img_entry["id"] = new_image_id

            image_id_map[img["id"]] = new_image_id
            new_data["images"].append(new_img_entry)
            new_image_id += 1

    # Copy annotations for matched images
    for ann in tqdm(original_data["annotations"], desc="Processing annotations"):
        old_image_id = ann["image_id"]
        if old_image_id in image_id_map:
            new_ann_entry = ann.copy()
            new_ann_entry["image_id"] = image_id_map[old_image_id]
            new_ann_entry["id"] = new_annotation_id
            new_data["annotations"].append(new_ann_entry)
            new_annotation_id += 1

    # Save the filtered JSON
    os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
    with open(output_json_path, "w") as f:
        json.dump(new_data, f, indent=4)

    print(f"âœ… New annotation file saved: {output_json_path}")
    print(f"ðŸ“Š Total images included: {len(new_data['images'])}")
    print(f"ðŸ“Š Total annotations included: {len(new_data['annotations'])}")


# Example usage:
# create_filtered_annotations(
#     original_json_path="COMPREESED-2/annotations/person_keypoints_default.json",
#     original_image_dir="COMPREESED-2",
#     new_image_dir="Dataset/train",
#     output_json_path="Dataset/train/annotation/n-person_keypoints_default.json"
# )


In [6]:
create_filtered_annotations(
    original_json_path="./COMPRESSED 2/annotations/person_keypoints_default.json",
    original_image_dir="./COMPRESSED 2",
    new_image_dir="./Dataset/train",
    output_json_path="Dataset/train/annotation/n-person_keypoints_default.json"
)

Processing images: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 90/90 [00:00<00:00, 70545.20it/s]
Processing annotations: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 88/88 [00:00<00:00, 95844.91it/s]

âœ… New annotation file saved: Dataset/train/annotation/n-person_keypoints_default.json
ðŸ“Š Total images included: 90
ðŸ“Š Total annotations included: 88





In [33]:
import json
import os
from tqdm import tqdm  # Optional progress bar

def merge_annotations_from_folder(
    original_json_path,
    new_image_dir,
    output_json_path
):
    """
    Merge COCO-style annotations from an original JSON into an existing or new JSON,
    keeping only images that exist in the specified image directory.
    
    Args:
        original_json_path (str): Path to the original COCO JSON file.
        new_image_dir (str): Directory with images to include.
        output_json_path (str): Path to the merged output JSON file.
    """
    # Load original JSON
    with open(original_json_path, 'r') as f:
        orig_data = json.load(f)

    # Load or create target JSON
    if os.path.exists(output_json_path):
        print(f"ðŸ“‚ Loading existing file: {output_json_path}")
        with open(output_json_path, 'r') as f:
            merged_data = json.load(f)
    else:
        print(f"ðŸ†• Creating new file: {output_json_path}")
        merged_data = {
            "licenses": orig_data.get("licenses", []),
            "info": orig_data.get("info", {}),
            "categories": orig_data.get("categories", []),
            "images": [],
            "annotations": []
        }

    # Track which image names already exist in merged data
    existing_filenames = {os.path.basename(img["file_name"]) for img in merged_data["images"]}

    # Get available image names in new folder
    available_images = {f for f in os.listdir(new_image_dir)
                        if f.lower().endswith(('.jpg', '.jpeg', '.png', '.JPEG', '.JPG'))}

    # Next available IDs
    next_image_id = max([img["id"] for img in merged_data["images"]], default=0) + 1
    next_ann_id = max([ann["id"] for ann in merged_data["annotations"]], default=0) + 1

    # Map old to new image IDs
    image_id_map = {}

    # --- Copy matching images ---
    for img in tqdm(orig_data["images"], desc="Processing new images"):
        base_name = os.path.basename(img["file_name"])
        if base_name in available_images and base_name not in existing_filenames:
            new_img = img.copy()
            new_img["file_name"] = os.path.join(os.path.basename(new_image_dir), base_name)
            new_img["id"] = next_image_id
            image_id_map[img["id"]] = next_image_id
            merged_data["images"].append(new_img)

            existing_filenames.add(base_name)
            next_image_id += 1

    # --- Copy matching annotations ---
    for ann in tqdm(orig_data["annotations"], desc="Processing new annotations"):
        old_img_id = ann["image_id"]
        if old_img_id in image_id_map:
            new_ann = ann.copy()
            new_ann["image_id"] = image_id_map[old_img_id]
            new_ann["id"] = next_ann_id
            merged_data["annotations"].append(new_ann)
            next_ann_id += 1

    # Save merged JSON
    os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
    with open(output_json_path, 'w') as f:
        json.dump(merged_data, f, indent=4)

    print(f"âœ… Merged and saved: {output_json_path}")
    print(f"ðŸ“Š Total images: {len(merged_data['images'])}")
    print(f"ðŸ“Š Total annotations: {len(merged_data['annotations'])}")


# --- Example usage ---
# merge_annotations_from_folder(
#     original_json_path="COMPREESED-2/annotations/person_keypoints_default.json",
#     new_image_dir="Dataset/train",
#     output_json_path="Dataset/train/annotation/n-person_keypoints_default.json"
# )

# To merge another folder later:
# merge_annotations_from_folder(
#     original_json_path="COMPREESED-3/annotations/person_keypoints_default.json",
#     new_image_dir="Dataset/train",
#     output_json_path="Dataset/train/annotation/n-person_keypoints_default.json"
# )


In [39]:
# --- Example usage ---
merge_annotations_from_folder(
    original_json_path="COMPREESED-2/annotations/person_keypoints_default.json",
    new_image_dir="Dataset/train",
    output_json_path="Dataset/train/annotation/n-person_keypoints_default.json"
)

ðŸ†• Creating new file: Dataset/train/annotation/n-person_keypoints_default.json


Processing new images: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 11/11 [00:00<00:00, 32173.88it/s]
Processing new annotations: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 11/11 [00:00<00:00, 10692.32it/s]

âœ… Merged and saved: Dataset/train/annotation/n-person_keypoints_default.json
ðŸ“Š Total images: 11
ðŸ“Š Total annotations: 11





In [40]:
# To merge another folder later:
merge_annotations_from_folder(
    original_json_path="/Users/nebula/Desktop/Projects/Sanket/COMPRESSED 2/annotations/person_keypoints_default.json",
    new_image_dir="/Users/nebula/Desktop/Projects/Sanket/Dataset/train",
    output_json_path="/Users/nebula/Desktop/Projects/Sanket/Dataset/train/annotation/n-person_keypoints_default.json"
)

ðŸ“‚ Loading existing file: /Users/nebula/Desktop/Projects/Sanket/Dataset/train/annotation/n-person_keypoints_default.json


Processing new images: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 90/90 [00:00<00:00, 83165.31it/s]
Processing new annotations: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 88/88 [00:00<00:00, 161743.54it/s]

âœ… Merged and saved: /Users/nebula/Desktop/Projects/Sanket/Dataset/train/annotation/n-person_keypoints_default.json
ðŸ“Š Total images: 93
ðŸ“Š Total annotations: 92





In [41]:
merge_annotations_from_folder(
    original_json_path="/Users/nebula/Desktop/Projects/Sanket/edited surgical /annotations/person_keypoints_default.json",
    new_image_dir="/Users/nebula/Desktop/Projects/Sanket/Dataset/train",
    output_json_path="/Users/nebula/Desktop/Projects/Sanket/Dataset/train/annotation/n-person_keypoints_default.json"
)

ðŸ“‚ Loading existing file: /Users/nebula/Desktop/Projects/Sanket/Dataset/train/annotation/n-person_keypoints_default.json


Processing new images: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 99/99 [00:00<00:00, 85580.40it/s]
Processing new annotations: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 99/99 [00:00<00:00, 211531.38it/s]

âœ… Merged and saved: /Users/nebula/Desktop/Projects/Sanket/Dataset/train/annotation/n-person_keypoints_default.json
ðŸ“Š Total images: 138
ðŸ“Š Total annotations: 137





In [42]:
merge_annotations_from_folder(
    original_json_path="/Users/nebula/Desktop/Projects/Sanket/task 4/annotations/person_keypoints_default.json",
    new_image_dir="/Users/nebula/Desktop/Projects/Sanket/Dataset/train",
    output_json_path="/Users/nebula/Desktop/Projects/Sanket/Dataset/train/annotation/n-person_keypoints_default.json"
)

ðŸ“‚ Loading existing file: /Users/nebula/Desktop/Projects/Sanket/Dataset/train/annotation/n-person_keypoints_default.json


Processing new images: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 100/100 [00:00<00:00, 140230.83it/s]
Processing new annotations: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 100/100 [00:00<00:00, 195721.14it/s]

âœ… Merged and saved: /Users/nebula/Desktop/Projects/Sanket/Dataset/train/annotation/n-person_keypoints_default.json
ðŸ“Š Total images: 188
ðŸ“Š Total annotations: 187





In [43]:
merge_annotations_from_folder(
    original_json_path="/Users/nebula/Desktop/Projects/Sanket/task 5 /annotations/person_keypoints_default.json",
    new_image_dir="/Users/nebula/Desktop/Projects/Sanket/Dataset/train",
    output_json_path="/Users/nebula/Desktop/Projects/Sanket/Dataset/train/annotation/n-person_keypoints_default.json"
)

ðŸ“‚ Loading existing file: /Users/nebula/Desktop/Projects/Sanket/Dataset/train/annotation/n-person_keypoints_default.json


Processing new images: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 102/102 [00:00<00:00, 235583.15it/s]
Processing new annotations: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 102/102 [00:00<00:00, 269917.36it/s]

âœ… Merged and saved: /Users/nebula/Desktop/Projects/Sanket/Dataset/train/annotation/n-person_keypoints_default.json
ðŸ“Š Total images: 199
ðŸ“Š Total annotations: 198





In [22]:
import json
import os

def normalize_yolo_categories(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    # Mapping for old category IDs
    id_map = {1: 0, 18: 1}

    # Update annotations
    for ann in data["annotations"]:
        if ann["category_id"] in id_map:
            ann["category_id"] = id_map[ann["category_id"]]

    # Update categories section
    new_categories = []
    for cat in data["categories"]:
        if cat["id"] == 1:
            cat["id"] = 0
            cat["name"] = "non surgical"
        elif cat["id"] == 18:
            cat["id"] = 1
            cat["name"] = "surgical"
        new_categories.append(cat)
    data["categories"] = new_categories

    # Save updated JSON
    with open(json_path, 'w') as f:
        json.dump(data, f, indent=4)

    print(f"âœ… Normalized category IDs in: {json_path}")

# Example usage:
normalize_yolo_categories("/Users/nebula/Desktop/Projects/Sanket/Dataset/train/annotation/n-person_keypoints_default.json")
normalize_yolo_categories("/Users/nebula/Desktop/Projects/Sanket/Dataset/validation/annotation/n-person_keypoints_default.json")


âœ… Normalized category IDs in: /Users/nebula/Desktop/Projects/Sanket/Dataset/train/annotation/n-person_keypoints_default.json
âœ… Normalized category IDs in: /Users/nebula/Desktop/Projects/Sanket/Dataset/validation/annotation/n-person_keypoints_default.json
