# Sampling CoCo image dataset to train and use DPM on

In [1]:
import json
import random
from pycocotools.coco import COCO
import requests
import os
import shutil
from tqdm import tqdm

from xml.etree.ElementTree import Element, SubElement, ElementTree, tostring 
from xml.dom.minidom import parseString 

We clear the folders beforehand

In [2]:
def clear_folder(folder_path):
    """
    Clears all files and subdirectories in the specified folder.

    Args:
        folder_path (str): Path to the folder to clear.
    """
    if os.path.exists(folder_path):
        # Remove all contents of the folder
        for filename in os.listdir(folder_path):
            file_path = os.path.join(folder_path, filename)
            try:
                if os.path.isfile(file_path) or os.path.islink(file_path):
                    os.unlink(file_path)  # Remove file or symbolic link
                elif os.path.isdir(file_path):
                    shutil.rmtree(file_path)  # Remove directory
            except Exception as e:
                print(f"Failed to delete {file_path}. Reason: {e}")
    else:
        # If folder doesn't exist, create it
        os.makedirs(folder_path)


Helper function to make PASCAL VOC-style XML annotations.

In [64]:
# Function to create Pascal-style XML annotations
def create_pascal_xml(img_info, annotations, output_annotation_dir, categories):
    """
    Generates a Pascal VOC-style XML file for a given image and its annotations.

    Args:
    - img_info (dict): Image metadata from COCO.
    - annotations (list): List of annotations for the image.
    - output_annotation_dir (str): Directory to save the XML file.
    - categories (list): List of category names to include in the annotations.
    """
    from xml.etree.ElementTree import Element, SubElement, tostring
    from xml.dom.minidom import parseString
    import os

    xml_root = Element('annotation')
    folder = SubElement(xml_root, 'folder')
    folder.text = 'VOC_COCO'

    filename = SubElement(xml_root, 'filename')
    filename.text = img_info['file_name']

    size = SubElement(xml_root, 'size')
    SubElement(size, 'width').text = str(img_info['width'])
    SubElement(size, 'height').text = str(img_info['height'])
    SubElement(size, 'depth').text = '3'  # Assuming RGB images

    for ann in annotations:
        # Include only objects in the specified categories
        cat_id = ann['category_id']
        if cat_id not in id_categories.keys():
            continue

        obj = SubElement(xml_root, 'object')
        name = SubElement(obj, 'name')
        cat_name = id_categories[cat_id]
        name.text = cat_name  # Category name from COCO annotation

        # Default pose and truncated values
        pose = SubElement(obj, 'pose')
        pose.text = 'Unspecified'

        truncated = SubElement(obj, 'truncated')
        bbox = ann['bbox']  # COCO format: [xmin, ymin, width, height]
        x_min = bbox[0]
        y_min = bbox[1]
        x_max = bbox[0] + bbox[2]
        y_max = bbox[1] + bbox[3]
        is_truncated = (
            x_min < 0 or y_min < 0 or x_max > img_info['width'] or y_max > img_info['height']
        )
        truncated.text = '1' if is_truncated else '0'

        # Bounding box
        bndbox = SubElement(obj, 'bndbox')
        SubElement(bndbox, 'xmin').text = str(max(0, int(x_min)))  # Clip to image boundaries
        SubElement(bndbox, 'ymin').text = str(max(0, int(y_min)))
        SubElement(bndbox, 'xmax').text = str(min(img_info['width'], int(x_max)))
        SubElement(bndbox, 'ymax').text = str(min(img_info['height'], int(y_max)))

    # If no relevant annotations, skip saving the XML
    if not any(cat_name in categories for ann in annotations):
        print(f"Skipping XML generation for {img_info['file_name']} - no relevant annotations.")
        return

    # Pretty format 
    asstring = tostring(xml_root, 'utf-8')
    parsed_xml = parseString(asstring)
    pretty_xml = parsed_xml.toprettyxml(indent="    ")

    # Save XML
    output_file = os.path.join(output_annotation_dir, f"{os.path.splitext(img_info['file_name'])[0]}.xml")
    with open(output_file, 'w') as f:
        f.write(pretty_xml)


In [48]:
# downloading 2000 images of training data for category person
# Paths
keypoints_annotation_file = 'annotations/person_keypoints_train2017.json'  # Update with your COCO annotation file path
annotation_file = 'annotations/instances_train2017.json'  # Update with your COCO annotation file path
output_dir = 'coco_output'  # Folder to save downloaded images
annotations_dir = os.path.join(output_dir, 'Annotations')
sets_dir = os.path.join(output_dir, 'ImageSets', 'Main')
images_dir = os.path.join(output_dir, 'JPEGImages')
# os.makedirs(output_dir, exist_ok=True)

# Number of images to sample
num_images = 10

In [5]:
# Load COCO annotations
coco = COCO(annotation_file)

loading annotations into memory...
Done (t=12.51s)
creating index...
index created!


In [6]:
categories = [
    'airplane', 
    'bicycle', 
    'bird', 
    'boat', 
    'bottle', 
    'bus',
    'car',
    'cat',
    'chair',
    'cow',
    'dining table',
    'dog', 
    'horse', 
    'motorcycle', 
    'person', 
    'potted plant',
    'sheep',
    'couch',
    'train', 
    'TV',
]

In [54]:
category_ids = {cat['name']: cat['id'] for cat in coco.loadCats(coco.getCatIds()) if cat['name'] in categories}
id_categories = {v:k for (k,v) in category_ids.items()}
# category_ids

In [67]:
# Sampling for non-person categories
num_images_per_category = 5
person_instance_target = 10

## Clean all

In [70]:
# To freshly download (erase existing data) 
clear_folder(annotations_dir)
clear_folder(sets_dir)
clear_folder(images_dir)

## Sampling

In [71]:
all_images = set()
# Track globally selected image IDs to avoid duplicates
globally_selected_image_ids = set()
random.seed(429)

# Prepare data structures
train_txt_path = os.path.join(sets_dir, "train.txt")
category_txt_files = {category: open(os.path.join(sets_dir, f"{category}_train.txt"), "w") for category in categories}
positive_samples = {category: set() for category in categories}
annotations_by_image = {}

# Process each category
for category, category_id in tqdm(category_ids.items(), desc="Processing categories"):
    print(f"Processing category: {category}")

    # Handle 'person' category separately for 4100 instances
    if category == 'person':
        instance_target = person_instance_target
        instance_count = 0
        selected_image_ids = []
        image_ids = coco.getImgIds(catIds=[category_id])
        random.shuffle(image_ids)

        # Collect images until the instance target is reached
        for img_id in image_ids:
            if img_id in globally_selected_image_ids:
                continue  # Skip already selected image IDs

            ann_ids = coco.getAnnIds(imgIds=[img_id], catIds=[category_id])
            person_count = len(ann_ids)
            if instance_count + person_count <= instance_target:
                selected_image_ids.append(img_id)
                globally_selected_image_ids.add(img_id)  # Mark as globally selected
                instance_count += person_count
            if len(selected_image_ids) >= instance_target:  # Ensure no oversampling
                break

        print(f"Selected {len(selected_image_ids)} images to meet {instance_target} 'person' instances.")
    else:
        # Default behavior for other categories
        image_ids = coco.getImgIds(catIds=[category_id])
        random.shuffle(image_ids)

        selected_image_ids = []
        for img_id in image_ids:
            if img_id in globally_selected_image_ids:
                continue  # Skip already selected image IDs

            selected_image_ids.append(img_id)
            globally_selected_image_ids.add(img_id)  # Mark as globally selected

            if len(selected_image_ids) >= num_images_per_category:  # Stop once we have enough
                break

        print(f"Selected {len(selected_image_ids)} unique images for category {category}.")


    # Collect annotations for each selected image
    for img_id in selected_image_ids:
        in_this_image = coco.loadAnns(coco.getAnnIds(imgIds=[img_id]))
        annotations_by_image[img_id] = in_this_image

        # Update positive samples
        img_filename = os.path.splitext(coco.loadImgs(img_id)[0]['file_name'])[0]
        for ann in in_this_image: 
            cat_id = ann['category_id']
            if cat_id in id_categories.keys():
                positive_samples[id_categories[cat_id]].add(img_filename)

print(f"Annotations collected for {len(annotations_by_image)} images.")

# Download images and create XML annotations
for img_id, annotations in tqdm(annotations_by_image.items(), desc="Processing images"):
    img_info = coco.loadImgs(img_id)[0]
    img_filename = os.path.splitext(img_info['file_name'])[0]
    img_filepath = os.path.join(images_dir, img_info['file_name'])
    all_images.add(img_filename)

    # Download image if it doesn't already exist
    if not os.path.exists(img_filepath):
        try:
            response = requests.get(img_info['coco_url'], stream=True, timeout=10)
            response.raise_for_status()
            with open(img_filepath, 'wb') as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)
            # Create Pascal VOC XML annotation
            create_pascal_xml(img_info, annotations, annotations_dir, categories)
        except requests.exceptions.RequestException as e:
            print(f"Failed to download {img_info['file_name']}: {e}")



# Write ImageSets files
sorted_imgs = sorted(all_images)
for img_filename in tqdm(sorted_imgs, desc="Writing ImageSets"):
    for category in categories:
        # Label is 1 if image is in positive samples for the category, otherwise -1
        label = "1" if img_filename in positive_samples[category] else "-1"
        category_txt_files[category].write(f"{img_filename} {label}\n")

# Write train.txt
with open(train_txt_path, "w") as train_file:
    for img_filename in tqdm(sorted_imgs, desc="Writing train.txt"):
        train_file.write(f"{img_filename}\n")

# Close category-specific text files
for file in category_txt_files.values():
    file.close()

print(f"ImageSets folder created at {sets_dir}.")

Processing categories:   0%|                                                           | 0/19 [00:00<?, ?it/s]

Processing category: person


Processing categories: 100%|██████████████████████████████████████████████████| 19/19 [00:00<00:00, 31.08it/s]


Selected 5 images to meet 10 'person' instances.
Processing category: bicycle
Selected 5 unique images for category bicycle.
Processing category: car
Selected 5 unique images for category car.
Processing category: motorcycle
Selected 5 unique images for category motorcycle.
Processing category: airplane
Selected 5 unique images for category airplane.
Processing category: bus
Selected 5 unique images for category bus.
Processing category: train
Selected 5 unique images for category train.
Processing category: boat
Selected 5 unique images for category boat.
Processing category: bird
Selected 5 unique images for category bird.
Processing category: cat
Selected 5 unique images for category cat.
Processing category: dog
Selected 5 unique images for category dog.
Processing category: horse
Selected 5 unique images for category horse.
Processing category: sheep
Selected 5 unique images for category sheep.
Processing category: cow
Selected 5 unique images for category cow.
Processing category

Processing images: 100%|██████████████████████████████████████████████████████| 95/95 [00:15<00:00,  6.20it/s]
Writing ImageSets: 100%|███████████████████████████████████████████████████| 95/95 [00:00<00:00, 43642.81it/s]
Writing train.txt: 100%|██████████████████████████████████████████████████| 95/95 [00:00<00:00, 539186.58it/s]

ImageSets folder created at coco_output/ImageSets/Main.





### Playground

In [42]:
coco.loadImgs(ids=[10948])

[{'license': 6,
  'file_name': '000000010948.jpg',
  'coco_url': 'http://images.cocodataset.org/train2017/000000010948.jpg',
  'height': 376,
  'width': 500,
  'date_captured': '2013-11-22 00:28:09',
  'flickr_url': 'http://farm1.staticflickr.com/121/305482790_d063783500_z.jpg',
  'id': 10948}]

In [43]:
coco.getAnnIds(imgIds=[10948])

[47931,
 99444,
 200424,
 235251,
 1334779,
 1510594,
 1629956,
 1973648,
 1974781,
 1974896,
 1991653]

In [53]:
[a['category_id'] for a in coco.loadAnns([
     47931,
     99444,
     200424,
     235251,
     1334779,
     1510594,
     1629956,
     1973648,
     1974781,
     1974896,
     1991653
])]

[17, 63, 1, 1, 2, 47, 75, 75, 75, 75, 85]

In [45]:
category_ids

{'person': 1,
 'bicycle': 2,
 'car': 3,
 'motorcycle': 4,
 'airplane': 5,
 'bus': 6,
 'train': 7,
 'boat': 9,
 'bird': 16,
 'cat': 17,
 'dog': 18,
 'horse': 19,
 'sheep': 20,
 'cow': 21,
 'bottle': 44,
 'chair': 62,
 'couch': 63,
 'potted plant': 64,
 'dining table': 67}

In [60]:
coco.loadAnns(47931)

[{'segmentation': [[321.99,
    258.37,
    317.94,
    252.85,
    318.68,
    248.07,
    320.89,
    242.18,
    324.2,
    239.98,
    330.82,
    238.5,
    335.6,
    233.72,
    342.22,
    233.72,
    348.1,
    234.09,
    351.04,
    235.19,
    348.47,
    229.68,
    356.56,
    229.68,
    359.5,
    231.52,
    366.49,
    230.05,
    370.17,
    230.41,
    372.74,
    234.09,
    371.27,
    239.24,
    373.11,
    244.76,
    372.38,
    246.96,
    371.64,
    248.8,
    372.01,
    252.85,
    372.01,
    256.16,
    375.32,
    259.1,
    375.69,
    262.04,
    372.38,
    263.15,
    368.33,
    263.88,
    365.02,
    265.72,
    362.08,
    264.99,
    360.97,
    263.15,
    356.93,
    263.88,
    348.47,
    265.35,
    342.59,
    266.09,
    340.01,
    267.56,
    334.86,
    267.93,
    330.82,
    266.82,
    326.04,
    264.99,
    324.2,
    263.51,
    322.36,
    260.2,
    320.15,
    258.0]],
  'area': 1648.8485,
  'iscrowd': 0,
  'image_id': 10948