# Capstone Project 

### Install pip:

pip install torch torchvision matplotlib

### Import necessary libraries

In [None]:
import os
import json
import random
import shutil
from pycocotools.coco import COCO


### Initialize COCO API for instance annotations

In [None]:
dataDir = r'C:\Users\muhri\Untitled Folder 1\ML'  # adjust this to your data directory
dataType = 'val2017'
annDir = r'C:\Users\muhri\Untitled Folder 1\ML\annotations'
annFile = '{}\instances_{}.json'.format(annDir, dataType)

coco = COCO(annFile)


### Get all image ids and filter them



In [None]:
object_categories = [
	'bicycle', 'car', 'motorcycle', 'stop sign', 'cat', 'dog', 'backpack', 
    'umbrella', 'handbag', 'bottle', 'cup', 'bowl','orange', 'tv', 'laptop', 	
    'microwave', 'oven','scissors', 'toothbrush','hair drier'
]

In [None]:
object_cat_ids = coco.getCatIds(catNms=object_categories)
object_img_ids = coco.getImgIds(catIds=object_cat_ids)
total_images = 700
num_train = 500
num_test = 200

assert num_train + num_test == total_images, "The sum of training and test images should equal the total number of images"


In [None]:
random.shuffle(object_img_ids)

# Select the first 500 images for training
train_ids = object_img_ids[:num_train]

# Select the next 200 images for testing
test_ids = object_img_ids[num_train:num_train+num_test]


### Copy selected images to a new directory

In [None]:
train_img_dir = 'train_images'
test_img_dir = 'test_images'

os.makedirs(train_img_dir, exist_ok=True)
os.makedirs(test_img_dir, exist_ok=True)

for img_id in train_ids:
    img_info = coco.loadImgs(img_id)[0]
    img_path = os.path.join(dataDir, img_info['file_name'])  # Removed extra dataType
    shutil.copy(img_path, train_img_dir)

for img_id in test_ids:
    img_info = coco.loadImgs(img_id)[0]
    img_path = os.path.join(dataDir, img_info['file_name'])  # Removed extra dataType
    shutil.copy(img_path, test_img_dir)


### Extract corresponding annotations and save to new json files

In [None]:
def filter_annotations(coco, img_ids, save_path):
    ann_ids = coco.getAnnIds(imgIds=img_ids, catIds=object_cat_ids)  # Use object_cat_ids here
    anns = coco.loadAnns(ann_ids)
    img_metas = coco.loadImgs(img_ids)
    
    # Load categories and create a dictionary mapping category ids to names
    cats = coco.loadCats(object_cat_ids)  # Use object_cat_ids here
    cat_dict = {cat['id']: cat['name'] for cat in cats}
    
    # Add category names to the annotations
    for ann in anns:
        ann['category_name'] = cat_dict[ann['category_id']]

    data = {
        "images": img_metas,
        "annotations": anns,
        "categories": cats
    }

    with open(save_path, 'w') as f:
        json.dump(data, f)

filter_annotations(coco, train_ids, os.path.join(os.getcwd(), 'train_annotations.json'))
filter_annotations(coco, test_ids, os.path.join(os.getcwd(), 'test_annotations.json'))