### Mount Drive & Directory Setting

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
os.chdir("/content/drive/MyDrive/OpenImages")

### Import required libraries

In [None]:
!pip install detecto

In [None]:
from pycocotools.coco import COCO
from Sushree_Detecto_Custom.detecto import core, utils, visualize
from Sushree_Detecto_Custom.detecto.visualize import show_labeled_image, plot_prediction_grid
from torchvision import transforms
import matplotlib.pyplot as plt, numpy as np, pandas as pd, cv2, os, torch, shutil, random, pickle, json, tarfile, os
import torchvision
from pathlib import Path
from sklearn.utils import shuffle
%load_ext autoreload
%autoreload 2
from detecto.utils import reverse_normalize, normalize_transform, _is_iterable
import matplotlib.patches as patches
from torchvision.transforms.transforms import RandomHorizontalFlip

### Read the data and Prepare the data
#### Convert COCO format to Pascal VOC format

In [None]:
# Convert the COCO format (json) into a CSV

# Training annotation COCO path
train_annot_path = '/content/drive/MyDrive/OpenImages/trainval/annotations/bbox-annotations.json'

# Load the COCO for the training set
train_coco = COCO(train_annot_path)

# Function to Convert JSON format to CSV (As Detecto model requires Either Pascal VOC format/CSV)
def get_meta(coco):
  ids = list(coco.imgs.keys())
  for i, img_id in enumerate(ids):
    img_meta = coco.imgs[img_id]
    ann_ids = coco.getAnnIds(imgIds=img_id)
    # The basic parameters of the image
    img_file_name = img_meta['file_name']
    w = img_meta['width']
    h = img_meta['height']
    # Retrieve the metadata of everyone in the current image
    anns = coco.loadAnns(ann_ids)
    yield [img_id, img_file_name, w, h, anns]

def convert_to_df(coco):
  images_data = []
  persons_data = []
  # Traverse all images
  for img_id, img_fname, w, h, meta in get_meta(coco):
    images_data.append({
    'filename': str(img_fname),
    'image_id': int(img_id),
    'width': int(w),
    'height': int(h)
    })
    # Traverse all metadata
    for m in meta:
      persons_data.append({
      'class': m['category_id'],
      'image_id': m['image_id'],
      'bbox': m['bbox'],
      })
  # Create data frames with image paths
  images_df = pd.DataFrame(images_data)
  images_df.set_index('image_id', inplace=True)
  # Create human related data frames
  persons_df = pd.DataFrame(persons_data)
  persons_df.set_index('image_id', inplace=True)
  return images_df, persons_df

loading annotations into memory...
Done (t=1.58s)
creating index...
index created!


In [None]:
# Get classes
train_coco.getCatIds(['person','car'])

[1, 2]

In [None]:
images_df, persons_df = convert_to_df(train_coco)
train_coco_df = pd.merge(images_df, persons_df, right_index=True, left_index=True)
train_coco_df['xmin'] = 0
train_coco_df['ymin'] = 0
train_coco_df['xmax'] = 0
train_coco_df['ymax'] = 0
train_coco_df['image_id'] = train_coco_df.index
train_coco_df.reset_index(drop=True, inplace=True)

# Converting "bbox"(xmin,ymin,width,height) to Pascal format(xmin,ymin,xmax,ymax)
for i in range(len(train_coco_df)):
  train_coco_df["xmin"].iloc[i] = int(train_coco_df['bbox'].iloc[i][0])
  train_coco_df["ymin"].iloc[i] = int(train_coco_df['bbox'].iloc[i][1])
  train_coco_df["xmax"].iloc[i] = int(train_coco_df['bbox'].iloc[i][0]) + int(train_coco_df['bbox'].iloc[i][2])
  train_coco_df["ymax"].iloc[i] = int(train_coco_df['bbox'].iloc[i][1]) + int(train_coco_df['bbox'].iloc[i][3])

# Converting "class" (integer:1/2) to str("person/car")
for i in range(len(train_coco_df)):
  if train_coco_df['class'].iloc[i] == int(1): train_coco_df['class'].iloc[i] = str('person')
  else: train_coco_df['class'].iloc[i] = str('car')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [None]:
train_coco_df.describe()

Unnamed: 0,width,height,xmin,ymin,xmax,ymax,image_id
count,16772.0,16772.0,16772.0,16772.0,16772.0,16772.0,16772.0
mean,999.08222,742.922371,422.889339,270.059027,569.586692,435.526353,1090.303363
std,102.926163,125.908542,300.326034,174.522806,301.863908,206.517234,642.705912
min,575.0,282.0,0.0,0.0,4.0,3.0,0.0
25%,1024.0,683.0,152.75,133.0,316.0,282.0,523.0
50%,1024.0,696.0,407.5,261.0,571.0,422.0,1082.0
75%,1024.0,768.0,669.0,376.0,825.0,570.0,1630.0
max,4496.0,3000.0,3883.0,998.0,4493.0,2999.0,2238.0


In [None]:
# remove 'bbox' column from the dataframe
train_coco_df.drop(columns=['bbox'], axis=1, inplace=True)

In [None]:
# save the data into the drive
train_coco_df.to_csv("data.csv", index=False)

In [None]:
data = train_coco_df.sample(frac=1).reset_index(drop=True)

### Train-Validation-Test split
80% Train - 20% Test
80% of training - Train, 20% of train - Validation

In [None]:
train_val, test = data[:int(len(data)*0.80)], data[int(len(data)*0.80):]
train, val = train_val[:int(len(train_val)*0.80)], train_val[int(len(train_val)*0.80):]

In [None]:
train.to_csv("train.csv", index=False)
val.to_csv("val.csv", index=False)
test.to_csv("test.csv", index=False)

In [None]:
len(train), len(val), len(test)

(10733, 2684, 3355)

In [None]:

# image transormations

custom_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(1024),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(saturation=0.3),
    transforms.ToTensor(),
    utils.normalize_transform(),
])

In [None]:
train_dataset = core.Dataset('train.csv', 'trainval/images', transform=custom_transforms)
train_loader = core.DataLoader(train_dataset, batch_size=4, shuffle=True)
val_dataset = core.Dataset('val.csv', 'trainval/images', transform=custom_transforms)

### Model building and training

In [None]:
models = ['fasterrcnn_resnet50_fpn', 'fasterrcnn_mobilenet_v3_large_fpn', 'fasterrcnn_mobilenet_v3_large_320_fpn']
optimizers = ["RMSprop", "Adamax", "AdamW"]
learning_rate = [0.001, 0.01, 0.1]
epoch = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


  0%|          | 0.00/160M [00:00<?, ?B/s]

In [None]:
for i in models:
    for j in optimizers:
        for k in learning_rate:
            
            # create a folder to dump tensorboard versions (folde name: model_optimizer_learning_rate)
            path_tensorboard = "runs/trial_detecto/"+i+"_"+j+"_"+str(k)
            os.makedirs(path_tensorboard, exist_ok=True)
            with open(path_tensorboard+"/readme.txt", 'w') as text:
                text.writelines(["Model Name: "+i,"Optimizer: "+j, "Learning Rate : "+ str(k)])
            writer = SummaryWriter(path_tensorboard)
            model = core.Model(['person', 'car'], device = device, model_name=i)
            losses = model.fit(train_loader, writer=writer ,epochs=epoch, learning_rate=k, verbose=False, optimizer=j)
            # Open respective folders to dump losses of model
            with open(path_tensorboard+"losses.pkl",'wb') as loss:pickle.dump(losses, loss)
            # Save the models in respective folders
            torch.save(model._model,path_tensorboard+"/model.pth")
            writer.flush()

### Model Inference

In [None]:
model_load = core.Model.load("person_car_detecto_model.pth", ['person', 'car'])

##### Helper functions for inference

In [None]:
def show_labeled_image(image, boxes, labels=None, scores=None):
    fig, ax = plt.subplots(1)
    # If the image is already a tensor, convert it back to a PILImage
    # and reverse normalize it
    if isinstance(image, torch.Tensor):
        image = reverse_normalize(image)
        image = transforms.ToPILImage()(image)
    ax.imshow(image)

    # Show a single box or multiple if provided
    if boxes.ndim == 1:
        boxes = boxes.view(1, 4)

#     if labels is not None and not _is_iterable(labels):
#         labels = [labels]

    # Plot each box
    for i in range(boxes.shape[0]):
        box = boxes[i]
        width, height = (box[2] - box[0]).item(), (box[3] - box[1]).item()
        initial_pos = (box[0].item(), box[1].item())
        rect = patches.Rectangle(initial_pos,  width, height, linewidth=1,
                                 edgecolor='r', facecolor='none')
        if labels:
            ax.text(box[0] + 5, box[1] - 5, '{}'.format(labels[i]), color='red')
        if scores is not None:
            ax.text(box[0] + 15, box[1] - 15, '{}'.format(np.round(scores[i],1)), color='blue')

        ax.add_patch(rect)

    plt.show()

In [None]:
# Inference

def inference(image, model, thresh):
    img = utils.read_image(image)
    labels, boxes, scores = model.predict(img)
    filtered_indices=np.where(scores>thresh)
    filtered_scores=scores[filtered_indices]
    filtered_boxes=boxes[filtered_indices]
    num_list = filtered_indices[0].tolist()
    filtered_labels = [labels[i] for i in num_list]
    show_labeled_image(img, filtered_boxes, filtered_labels, filtered_scores)
    return filtered_labels, filtered_boxes, filtered_scores

#### IoU Calculation on Inference

In [None]:
def intersection_over_union(gt_box, pred_box):
    inter_box_top_left = [max(gt_box[0], pred_box[0]), max(gt_box[1], pred_box[1])]
    inter_box_bottom_right = [min(gt_box[0]+gt_box[2], pred_box[0]+pred_box[2]), min(gt_box[1]+gt_box[3], pred_box[1]+pred_box[3])]

    inter_box_w = inter_box_bottom_right[0] - inter_box_top_left[0]
    inter_box_h = inter_box_bottom_right[1] - inter_box_top_left[1]

    intersection = inter_box_w * inter_box_h
    union = gt_box[2] * gt_box[3] + pred_box[2] * pred_box[3] - intersection
    
    iou = intersection / union

    return iou, intersection, union

In [None]:
image_iou_dictionary = {}

for i in test['filename']:
    
    filtered_labels, filtered_boxes, filtered_scores = inference("/content/drive/MyDrive/OpenImages/trainval/images/"+i, model_load, thresh=0.2)
    print(i)
    ground_truth = data[data['filename']==i][['xmin','ymin','xmax','ymax']].values.tolist()
    
    iou = []
    for k in filtered_boxes:
        for j in ground_truth:
            ious, intersection, union = intersection_over_union(j, k)
            iou.append(ious)
    image_iou_dictionary[i] = np.mean(np.sort(iou)[::-1][:len(ground_truth)])

In [None]:
image_iou_dictionary