# 1. Extract Multi-instance images from COCO

### 1.1 3x3 grid image with annotations

In [17]:
from PIL import Image
import os
from pycocotools.coco import COCO
import json


# Loads an image, resizes it to 256x256, scales its bounding boxes, and saves the resized image. Returns the updated annotations.
def save_image(img_info, anns,img_dir, output_dir):
    img_path = os.path.normpath(os.path.join(img_dir, img_info['file_name'])) # get the image path, normalizing the path to handle mixed separators
    # print("img_path = ",img_path)
    os.makedirs(output_dir, exist_ok=True)
    # print("output_dir = ",save_dir)


    # Check if the file exists before opening
    if not os.path.exists(img_path):
        print(f"Skipping {img_path} - File not found.")
        return  

    # load the image
    img = Image.open(img_path)

    # cal the scaling factor for width and height, for cal BB later 
    width, height = img.size
    scale_w = 256 / width
    scale_h = 256 / height

    # resize the image to 256*256
    img = img.resize((256, 256))

    save_path = os.path.join(output_dir, img_info['file_name'])
    img.save(save_path)

    # resize the BB according to the scaling factor
    new_anns = []
    for ann in anns:
        # get the BB
        x, y, w, h = ann['bbox']

        # scale the BB
        x *= scale_w
        y *= scale_h
        w *= scale_w
        h *= scale_h
        # update the BB
        ann['bbox'] = [x, y, w, h]
        ann['area'] = w*h

        new_anns.append(ann)

    return new_anns



def update_annotation_file(new_images, updated_anns, original_ann_file):
    # Load the original annotation file to retrieve the categories information
    with open(original_ann_file, 'r') as f:
        original_data = json.load(f)
    
    # Update the images and annotations fields
    original_data['images'] = new_images
    original_data['annotations'] = updated_anns
    
    # Write the updated data back to the original file
    with open(original_ann_file, 'w') as f:
        json.dump(original_data, f)



def extract_images(cat_names, img_dir, output_dir , original_ann_file):
    ## Requirements:
    # 1. Contains at least one foreground object. A foreground object must be from one of the 
    # three categories: [ ’pizza’, ’cat’, ’bus’].

    # 2. Additionally, the area of any foreground object must be larger than 200×200 = 40000 pixels. 
    # There can be multiple foreground objects in an image since we are dealing with multi-instance 
    # object localization for this homework. If there is none, that image should be discarded.

    # 3. When saving your images to disk, resize them to 256×256. Note that you would also need to 
    # scale the bounding box parameters accordingly after resizing

    cat_ids = coco.getCatIds(catNms=cat_names)

    # get all image IDs containing the above categories
    img_ids = coco.getImgIds(catIds=cat_ids)

    min_area = 40000  # 200x200


    # List to store valid images
    valid_images = []
    target_category = ["pizza", "cat", "bus"]
    updated_anns_total = []

    # loop through the images
    for img_id in img_ids:
        img_info = coco.loadImgs(img_id)[0] # get image info to save the data later

        # These are annotation IDs for objects detected in a specific image. (we are not using this)
        ann_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)

        # anns includes bounding box, category ID, and segmentation, area, imageID!!!!!!!!!!!!!!!!
        anns = coco.loadAnns(ann_ids)

        # Filter annotations: keep only those whose category is in foreground_categories 
        # and whose area is larger than min_area.
        valid_anns = []

        for ann in anns:
            # return object category name, like obj_category: umbrella, obj_category: carrot...
            obj_category = coco.loadCats(ann['category_id'])[0]['name']
            
            # if this object category is in target_category and area is larger than min_area
            if obj_category in target_category and ann['area'] > min_area:
                valid_anns.append(ann)
        
        
        # print(f"Found {len(valid_images)} images with valid annotations.") ## 3957 for training, 2062 for validation.

        # If at least one valid annotation exists, save the image and update the annotations
        if valid_anns:
            new_anns = save_image(img_info, valid_anns, img_dir, output_dir) 

            if new_anns:
                valid_images.append(img_info)
                updated_anns_total.extend(new_anns)

    update_annotation_file(valid_images, updated_anns_total, original_ann_file)



# Set COCO dataset paths
# data_dir = os.getcwd()

#  mac users
# ann_file = os.path.join(data_dir, "annotations/instances_train2014.json") 
# image_dir = os.path.join(data_dir, "train2014/train2014")  

# windows users
ann_file_train = "./../HW6/annotations/instances_train2014.json"
image_dir_train = "./../HW6/train2014/train2014" 
image_dir_val = "./val2014/val2014"

ann_file_val = "./../HW6/annotations/instances_val2014.json"
output_dir_train = "./../data/Multi-instance_images_from_COCO(HW7)/train"
output_dir_val = "./../data/Multi-instance_images_from_COCO(HW7)/val"

original_ann_file = "./../HW6/annotations/instances_train2014.json"


# Ensure output directories exist
os.makedirs(output_dir_train, exist_ok=True)
os.makedirs(output_dir_val, exist_ok=True)


# Load COCO dataset
coco = COCO(ann_file_train)

extract_images(["pizza"], img_dir = image_dir_train, output_dir = output_dir_train, original_ann_file =ann_file_train)
extract_images(["cat"], img_dir = image_dir_train, output_dir = output_dir_train, original_ann_file =ann_file_train)
extract_images(["bus"], img_dir = image_dir_train, output_dir = output_dir_train, original_ann_file =ann_file_train)

# Load COCO dataset
coco = COCO(ann_file_val)
extract_images(["pizza"], img_dir = image_dir_val, output_dir = output_dir_val, original_ann_file =ann_file_val)
extract_images(["cat"], img_dir = image_dir_val, output_dir = output_dir_val, original_ann_file =ann_file_val)
extract_images(["bus"], img_dir = image_dir_val, output_dir = output_dir_val, original_ann_file =ann_file_val)

loading annotations into memory...
Done (t=29.82s)
creating index...
index created!
loading annotations into memory...
Done (t=15.14s)
creating index...
index created!


In [None]:
import matplotlib.pyplot as plt
from skimage import io, color
import numpy as np
import cv2

# this code is borrowed from HW7.pdf
local_image_dir = "./../data/Multi-instance_images_from_COCO(HW7)/train"
ann_file_train = "./../HW6/annotations/instances_train2014.json"


class_list = ['pizza', 'cat', 'bus']


###########################
# Mapping from COCO label to Class indices
coco_labels_inverse = {}
coco = COCO(ann_file_train)
catIds = coco.getCatIds(catNms=class_list)
categories = coco.loadCats(catIds)
categories.sort(key=lambda x: x['id'])
print(categories)

for idx, in_class in enumerate(class_list):
    for c in categories:
        if c['name'] == in_class:
            coco_labels_inverse[c['id']] = idx
print(coco_labels_inverse)

#############################
# Retrieve Image list
imgIds = coco.getImgIds(catIds=catIds)

#############################
# Display one random image with annotation
idx = np.random.randint(0, len(imgIds))
img = coco.loadImgs(imgIds[idx])[0]


# I = io.imread(img['coco_url']) # not using URL
local_img_path = os.path.join(local_image_dir, img['file_name'])
I = io.imread(local_img_path)

# change from grayscale to color
if len(I.shape) == 2:
    I = color.gray2rgb(I)

# pay attention to the flag, iscrowd being set to False
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=False)
anns = coco.loadAnns(annIds)

fig, ax = plt.subplots(1, 1)
image = np.uint8(I)

for ann in anns:
    [x, y, w, h] = ann['bbox']
    label = coco_labels_inverse[ann['category_id']]
    image = cv2.rectangle(image, (int(x), int(y)), (int(x + w), int(y + h)), (36, 255, 12), 2)
    image = cv2.putText(image, class_list[label], (int(x), int(y - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (36, 255, 12), 2)

ax.imshow(image)
ax.set_axis_off()
plt.axis('tight')
plt.show()


AttributeError: 'numpy.ufunc' object has no attribute '__qualname__'

# 2. Dataloader

### 2.1 Code block showing all parameters for yolo vector are generated

### 2.2 explanationg showing all parameters for yolo vector are generated

# 3. Training

### 3.1 Code block showing how yolo tensor is built

### 3.2 explanation of building how yolo tensor is built

### 3.3 BCE, CE, MSE loss curves

# 4. Evaluation

### 4.1 Code block translate yolo tensor to BB pred and class label

### 4.2 Explanation translate yolo tensor to BB pred and class label

### 4.3 24 images

# 5. Bonus IoU loss with DIoU

### 5.1 BCE, CE, DIoU loss curves

### 5.2 24 images