In [10]:
import json
import os
import shutil
from PIL import Image

In [11]:
def filter_coco_dataset(input_coco_dir, output_coco_dir, target_resolution):
    """
    Filters a COCO dataset, keeping only images that match the target resolution and saves to a new directory.

    :param input_coco_dir: Path to the input COCO dataset JSON file.
    :param output_coco_dir: Directory where the new COCO file will be saved.
    :param target_resolution: The desired resolution (width, height).
    """

    # Create the output directory if it doesn't exist
    if not os.path.exists(output_coco_dir):
        os.makedirs(output_coco_dir)
        
    # Copy the source directory and its contents to the destination
    shutil.copytree(os.path.join(input_coco_dir, "images"), os.path.join(output_coco_dir, "images"))

    # Load the COCO dataset
    with open(os.path.join(input_coco_dir, "result.json"), "r") as file:
        coco_data = json.load(file)

    # Filter out images that don't match the target resolution
    filtered_images = [img for img in coco_data['images'] if (img['width'], img['height']) == target_resolution]
    
    # Deleting images that are not the correct resolution
    del_images = [item for item in coco_data['images'] if item not in filtered_images]
    for each in del_images:
        os.remove(os.path.join(output_coco_dir, "images", each["file_name"][2:]))

    # Collect the IDs of the images that will be kept
    kept_image_ids = set([img['id'] for img in filtered_images])

    # Filter annotations to keep only those related to the images being kept
    filtered_annotations = [anno for anno in coco_data['annotations'] if anno['image_id'] in kept_image_ids]

    # Update the COCO data
    coco_data['images'] = filtered_images
    coco_data['annotations'] = filtered_annotations
    
    output_file_path = os.path.join(output_coco_dir, "result.json")

    # Write the updated data to the new file in the output directory
    with open(output_file_path, 'w') as file:
        json.dump(coco_data, file, indent=4)

In [12]:
# Example usage
in_dir = '/mnt/nis_lab_research/data/coco_files/raw/shah_b2_704_22-24'  # Replace with your input COCO JSON file path
out_dir = '/mnt/nis_lab_research/data/coco_files/raw/shah_b2_704_22-24_cln'     # Replace with your desired output directory path
target_resolution = (1920, 1080)

In [13]:
# Run the filter function
filter_coco_dataset(in_dir, out_dir, target_resolution)