In [2]:
import os
import sys
import os
os.chdir('/scratch/nhl256/dl_project/code/')
from PIL import Image

import random

import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
matplotlib.rcParams['figure.figsize'] = [5, 5]
matplotlib.rcParams['figure.dpi'] = 200

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

from data_helper import *
from nhung_data_helper import FastRCNNLabeledDataset
from helper import *


import math
import pickle
import time
import copy

### 05/04/20

- Concat 6 images along 1 dim before passing through the model
- Saving the feature extractor and the fasterRCNN model
- ResNet50 for feature extraction using the weights from self-supervised task (/scratch/bva212/dl_project/jigsaw_best_model_fe.pth)
- optimizer = torch.optim.Adam(params, lr=0.0001)


In [3]:
# All the images are saved in image_folder
# All the labels are saved in the annotation_csv file

# image_folder = '../data'
# annotation_csv = '../data/annotation.csv'

image_folder = 'data/data'
annotation_csv = 'data/data/annotation.csv'


cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if cuda else "cpu")
print(device)

# image_folder = '/Users/nhungle/Downloads/dl20_data'
# annotation_csv = '/Users/nhungle/Downloads/dl20_data/annotation.csv'

cuda:0


In [4]:
print(cuda)

True


In [5]:
labeled_scene_index = np.arange(106, 134)
random.seed(1008)
random.shuffle(labeled_scene_index)
train_labeled_scene_index = labeled_scene_index[:22]
val_labeled_scene_index = labeled_scene_index[22:26]
test_labeled_scene_index = labeled_scene_index[26:]
BATCH_SIZE=1


transform = torchvision.transforms.ToTensor()
fasterRCNN_trainset = FastRCNNLabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=train_labeled_scene_index,
                                  transform=transform,
                                  extra_info=True
                                 )
train_loader = torch.utils.data.DataLoader(fasterRCNN_trainset,
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=2, collate_fn=collate_fn)


fasterRCNN_valset = FastRCNNLabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=val_labeled_scene_index,
                                  transform=transform,
                                  extra_info=True
                                 )
val_loader = torch.utils.data.DataLoader(fasterRCNN_valset,
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=2, collate_fn=collate_fn)


fasterRCNN_testset = FastRCNNLabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=test_labeled_scene_index,
                                  transform=transform,
                                  extra_info=True
                                 )
test_loader = torch.utils.data.DataLoader(fasterRCNN_testset,
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=2, collate_fn=collate_fn)

In [6]:
train_loader.__len__()

2772

### Get Feature Extractor

In [9]:
feature_extractor = torchvision.models.resnet50(pretrained=False)
feature_extractor = nn.Sequential(*list(feature_extractor.children())[:-2])
feature_extractor.load_state_dict(torch.load('/scratch/bva212/dl_project/jigsaw_best_model_fe.pth'))
feature_extractor.to(device)
for param in feature_extractor.parameters():
    param.requires_grad = True

In [10]:
def concat_features(features, dim = 2):
    #dim 0 ==> stacking the images in the channel dimension
    #dim 1 ==> stacking the images in row dimension
    #dim 2 ==> stacking the images in column dimension
    tensor_tuples = torch.unbind(features, dim=0)
    concatenated_fm = torch.cat(tensor_tuples, dim=dim)
    return concatenated_fm 

### Inspect if it works on a pretrained FasterRCNN

In [12]:
sample, targets = next(iter(train_loader))
sample = torch.stack(sample)
sample = sample.to(device)
batchsize = sample.shape[0]
fe_batch = []
for i in range(batchsize):
    image_tensor = sample[i]
    features = feature_extractor(image_tensor)
    #print(features.shape)
    features = concat_features(features)
    features = features.view(3, 2048, 160)
    #print(features.shape)
    fe_batch.append(features)

images = list(image.to(device) for image in fe_batch)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

In [13]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model = model.to(device)
output1 = model(images, targets)
output1

{'loss_classifier': tensor(0.2564, device='cuda:0', grad_fn=<NllLossBackward>),
 'loss_box_reg': tensor(0.0036, device='cuda:0', grad_fn=<DivBackward0>),
 'loss_objectness': tensor(4.5261, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>),
 'loss_rpn_box_reg': tensor(1.9608, device='cuda:0', grad_fn=<DivBackward0>)}

## Train One Epoch

In [14]:
import math
import sys
import time
import torch

import utils

In [16]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model = model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=0.0001)
# optimizer = torch.optim.SGD(params, lr=0.005,
#                             momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=3,
                                                gamma=0.1)

# let's train it for 10 epochs
num_epochs = 1
epoch = 0
print_freq = 20

In [19]:
def train_one_epoch(feature_extractor, model, optimizer, data_loader, device, epoch, print_freq):
    feature_extractor.train()
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    for sample, targets in metric_logger.log_every(data_loader, print_freq, header):
        sample = torch.stack(sample)
        sample = sample.to(device)
        batchsize = sample.shape[0]
        fe_batch = []
        for i in range(batchsize):
            image_tensor = sample[i]
            features = feature_extractor(image_tensor)
            #print(features.shape)
            features = concat_features(features)
            features = features.view(3, 2048, 160)
            #print(features.shape)
            fe_batch.append(features)

        images = list(image.to(device) for image in fe_batch)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        #print(loss_dict)

        losses = sum(loss for loss in loss_dict.values())
        #print(losses)

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

    return losses

### Inspect train_one_epoch

In [20]:
train_one_epoch(feature_extractor, model, optimizer, test_loader, device, epoch, print_freq)

Epoch: [0]  [  0/252]  eta: 0:04:00  lr: 0.000000  loss: 8.4684 (8.4684)  loss_classifier: 0.2124 (0.2124)  loss_box_reg: 0.0096 (0.0096)  loss_objectness: 5.2578 (5.2578)  loss_rpn_box_reg: 2.9887 (2.9887)  time: 0.9557  data: 0.2916  max mem: 3323
Epoch: [0]  [ 20/252]  eta: 0:00:54  lr: 0.000008  loss: 4.9731 (5.2540)  loss_classifier: 0.1493 (0.1541)  loss_box_reg: 0.0048 (0.0063)  loss_objectness: 2.8656 (3.1229)  loss_rpn_box_reg: 1.7637 (1.9708)  time: 0.2006  data: 0.0038  max mem: 3889
Epoch: [0]  [ 40/252]  eta: 0:00:46  lr: 0.000016  loss: 1.0549 (3.3331)  loss_classifier: 0.0761 (0.1229)  loss_box_reg: 0.0018 (0.0043)  loss_objectness: 0.1014 (1.6905)  loss_rpn_box_reg: 0.8280 (1.5153)  time: 0.2015  data: 0.0042  max mem: 3889
Epoch: [0]  [ 60/252]  eta: 0:00:40  lr: 0.000024  loss: 0.8383 (2.5197)  loss_classifier: 0.0880 (0.1098)  loss_box_reg: 0.0014 (0.0035)  loss_objectness: 0.0224 (1.1616)  loss_rpn_box_reg: 0.7036 (1.2448)  time: 0.1957  data: 0.0036  max mem: 3889


tensor(0.5374, device='cuda:0', grad_fn=<AddBackward0>)

In [22]:
sample, targets = next(iter(train_loader))
sample = torch.stack(sample)
sample = sample.to(device)
batchsize = sample.shape[0]
fe_batch = []
for i in range(batchsize):
    image_tensor = sample[i]
    features = feature_extractor(image_tensor)
    #print(features.shape)
    features = concat_features(features)
    features = features.view(3, 2048, 160)
    #print(features.shape)
    fe_batch.append(features)

images = list(image.to(device) for image in fe_batch)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

In [23]:
model.eval()
predictions = model(images)
print(predictions)

[{'boxes': tensor([[ 2.5249, 12.3105,  8.4614, 16.3655],
        [ 2.8026, 15.6608,  8.3191, 19.5138],
        [ 0.2205, 11.7770, 11.2676, 21.1414],
        [ 2.3730, 10.1673,  8.3150, 14.2311],
        [ 4.2386, 17.2535, 10.2578, 21.1323],
        [ 4.0106, 21.9735,  9.0826, 25.4802],
        [ 4.5081, 31.6906,  9.4630, 34.7576],
        [ 4.6023, 27.9308,  9.6325, 31.2239],
        [ 5.4734, 15.9906, 11.2388, 19.9430],
        [ 5.5933, 32.4770, 10.3822, 35.6822],
        [ 2.7287, 15.6712,  8.3585, 19.4111],
        [ 4.1645, 17.2663, 10.3018, 21.0387],
        [ 2.8582,  7.2621,  8.4121, 10.6865],
        [ 3.9470, 21.9863,  9.1202, 25.3956],
        [ 5.4094, 16.0035, 11.2775, 19.8541],
        [ 4.8536, 23.1879, 10.6860, 27.1027],
        [ 2.4670, 16.6693,  8.8344, 20.6607],
        [ 0.2549, 10.0921, 16.7064, 26.0459],
        [ 6.9108, 26.4959, 12.0508, 30.1776],
        [ 2.4453, 12.3143,  8.4999, 16.2561],
        [ 6.5598, 24.2177, 12.2212, 27.9088]], device='cuda:0',
     

In [None]:
# model_test = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# model_test = model_test.to(device)
# model_test.eval()
# x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
# x = list(image.to(device) for image in x)
# predictions = model_test(x)
# predictions

## Evaluate One Epoch

In [24]:
sample, targets = next(iter(val_loader))
sample = torch.stack(sample)
sample = sample.to(device)

In [31]:
def reorder_coord(pred_bboxes):
    xmin, ymin, xmax, ymax = pred_bboxes.unbind(1)
    return torch.stack((xmax, xmax, xmin, xmin, ymax, ymin, ymax, ymin), dim=1).view(-1, 2, 4)

def get_bounding_boxes(samples):
    # samples is a cuda tensor with size [batch_size, 6, 3, 256, 306]
    # You need to return a tuple with size 'batch_size' and each element is a cuda tensor [N, 2, 4]
    # where N is the number of object

    #Preparing inputs
    batchsize = samples.shape[0]
    fe_batch = []
    for i in range(batchsize):
        image_tensor = sample[i]
        features = feature_extractor(image_tensor)
        #print(features.shape)
        features = concat_features(features)
        features = features.view(3, 2048, 160)
        #print(features.shape)
        fe_batch.append(features)

    images = list(image.to(device) for image in fe_batch)
    predictions = model(images)
    res = []
    for i in range(len(predictions)):
        prediction = predictions[i]
        pred_bboxes = prediction['boxes']
        reorder_pred_bboxes = reorder_coord(pred_bboxes)
        res.append(reorder_pred_bboxes)

    return res

In [32]:
def eval_one_epoch(feature_extractor, model, dataloader):
    model.eval()
    feature_extractor.eval()
    total = 0
    total_ats_bounding_boxes = 0
    for i, data in enumerate(dataloader):
        total += 1
        sample, target = data
        sample = torch.stack(sample)
        sample = sample.cuda()

        predicted_bounding_boxes = get_bounding_boxes(sample)[0].cpu()
        

        ats_bounding_boxes = compute_ats_bounding_boxes(predicted_bounding_boxes,
                                                        target[0]['bounding_box'])
#         print('Number of pred bboxes {}'.format(predicted_bounding_boxes.shape))
#         print('ats_bounding_boxes {}'.format(ats_bounding_boxes))

        total_ats_bounding_boxes += ats_bounding_boxes
    return total_ats_bounding_boxes

In [33]:
total_ats_bounding_boxes = eval_one_epoch(feature_extractor, model, test_loader)


In [34]:
total_ats_bounding_boxes

tensor(0.0043)

## Train and Eval for multiple epochs

In [35]:
# Get model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model = model.to(device)

# Get feature extractor
feature_extractor = torchvision.models.resnet50(pretrained=False)
feature_extractor = nn.Sequential(*list(feature_extractor.children())[:-2])
feature_extractor.load_state_dict(torch.load('/scratch/bva212/dl_project/jigsaw_best_model_fe.pth'))
feature_extractor.to(device)
for param in feature_extractor.parameters():
    param.requires_grad = True


# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=0.0001)
# optimizer = torch.optim.SGD(params, lr=0.005,
#                             momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=3,
                                                gamma=0.1)

# let's train it for 10 epochs
num_epochs = 10
epoch = 0
print_freq = 500

In [36]:
import pickle

In [37]:
def train_val(feature_extractor, model, train_loader, val_loader, num_epochs=10):
    best_model_wts = {'feature_extractor': copy.deepcopy(feature_extractor.state_dict()),
                       'fasterRCNN': copy.deepcopy(model.state_dict())
                                      }
    losses = []
    total_ats = []
    best_total_ats = -1
    
    for epoch in range(num_epochs):
        loss = train_one_epoch(feature_extractor, model, optimizer, train_loader, device, epoch, print_freq)
        total_ats_bounding_boxes = eval_one_epoch(feature_extractor, model, val_loader)
        losses.append(loss)
        total_ats.append(total_ats_bounding_boxes)
        print('epoch {} loss {} total_ats {}'.format(epoch, loss, total_ats))

        if total_ats_bounding_boxes > best_total_ats:
            best_total_ats = total_ats_bounding_boxes
            best_model_wts = {'feature_extractor': copy.deepcopy(feature_extractor.state_dict()),
                               'fasterRCNN': copy.deepcopy(model.state_dict())
                                     }
            torch.save(best_model_wts, '/scratch/nhl256/dl_project/model/object_detection_resnet50_0504_epoch{}.pth'.format(epoch))

    return losses, total_ats, best_model_wts

In [38]:
losses, total_ats, best_model_wts = train_val(feature_extractor, model,
                                              train_loader, val_loader,
                                              num_epochs=10)

Epoch: [0]  [   0/2772]  eta: 0:31:18  lr: 0.000000  loss: 8.1537 (8.1537)  loss_classifier: 0.4193 (0.4193)  loss_box_reg: 0.0219 (0.0219)  loss_objectness: 5.4766 (5.4766)  loss_rpn_box_reg: 2.2359 (2.2359)  time: 0.6775  data: 0.2160  max mem: 6691
Epoch: [0]  [ 500/2772]  eta: 0:07:35  lr: 0.000050  loss: 0.6420 (1.1176)  loss_classifier: 0.0852 (0.1452)  loss_box_reg: 0.0005 (0.0052)  loss_objectness: 0.0040 (0.2946)  loss_rpn_box_reg: 0.5094 (0.6726)  time: 0.1987  data: 0.0038  max mem: 6691
Epoch: [0]  [1000/2772]  eta: 0:05:52  lr: 0.000100  loss: 0.5096 (0.8550)  loss_classifier: 0.0404 (0.1076)  loss_box_reg: 0.0001 (0.0046)  loss_objectness: 0.0048 (0.1535)  loss_rpn_box_reg: 0.4521 (0.5893)  time: 0.1979  data: 0.0038  max mem: 6691
Epoch: [0]  [1500/2772]  eta: 0:04:12  lr: 0.000100  loss: 0.4705 (0.7575)  loss_classifier: 0.0387 (0.0901)  loss_box_reg: 0.0001 (0.0045)  loss_objectness: 0.0080 (0.1058)  loss_rpn_box_reg: 0.3839 (0.5571)  time: 0.1974  data: 0.0038  max me

In [41]:
import pickle
pickle.dump(losses, open('/scratch/nhl256/dl_project/model/object_detection_resnet50_0504_trainlosses.pickle', "wb"))
pickle.dump(total_ats, open('/scratch/nhl256/dl_project/model/object_detection_resnet40_0504_evalTotalAts.pickle', "wb"))

## Evaluate - IoU by Class

In [None]:
def prepare_pred_results(predictions):
    pred_boxes = []
    pred_labels = []
    pred_scores = []
    for prediction in predictions:
        #print(prediction)
        if len(prediction) == 0:
            continue
        boxes = prediction["boxes"]
        boxes = reorder_coord(boxes).tolist()
        scores = prediction["scores"].tolist()
        labels = prediction["labels"].tolist()

        pred_boxes.append(boxes)
        pred_labels.append(labels)
        pred_scores.append(scores)

    return pred_boxes, pred_labels, pred_scores

def reorder_coord(boxes):
    xmin, ymin, xmax, ymax = boxes.unbind(1)
    return torch.stack((ymin, xmin, ymax, xmax), dim=1)

def prepare_gt(targets):
    gt_boxes = []
    gt_labels = []
    for target in targets:
        boxes = target['boxes']
        boxes = reorder_coord(boxes).tolist()
        labels = target["labels"].tolist()
        gt_boxes.append(boxes)
        gt_labels.append(labels)
    return gt_boxes, gt_labels

In [None]:
# Make sure that bbox_a, bbox_b = np.array

def bbox_iou(bbox_a, bbox_b):
    #print(type(bbox_a), type(bbox_b))
    bbox_a = np.array(bbox_a)
    bbox_b = np.array(bbox_b)

    # print(type(bbox_a), type(bbox_b))
    # print(bbox_a.shape, bbox_b.shape)
    if bbox_a.shape[1] != 4 or bbox_b.shape[1] != 4:
        raise IndexError

    # top left (i.e., ymin, xmin)
    tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
    # bottom right (i.e., ymax, xmax)
    br = np.minimum(bbox_a[:, None, 2:], bbox_b[:, 2:])

    # Area of intersection: (tl < br) = bool, (br-tl) = (ymax-ymin) 
    area_i = np.prod(br - tl, axis=2) * (tl < br).all(axis=2)
    area_a = np.prod(bbox_a[:, 2:] - bbox_a[:, :2], axis=1)
    area_b = np.prod(bbox_b[:, 2:] - bbox_b[:, :2], axis=1)

    return area_i / (area_a[:, None] + area_b - area_i)

In [None]:
def cal_TP_FP_iou(pred_bbox_c, gt_bbox_c, iou_thres=0.5):
    iou_table = bbox_iou(pred_bbox_c, gt_bbox_c)
    num_pred_bboxes = iou_table.shape[0]
    num_gt_bboxes = iou_table.shape[1]
    TP = np.zeros(num_pred_bboxes)
    FP = np.zeros(num_pred_bboxes)
    # For each pred_bounding box:
      # Find the most relevant gt_bbox (i.e., the gt_bbox with max IoU)
      # If IoU < threshold, then flag it as FP
      # If IoU >= threshold, then:
        # If that gt_bbox already has already matched with another pred_bbox:
          # Flag it as FP
        # Else:
          # Flag it as TP

    # TP only happens if the pred_bbox mathes with a gt_bbox
    for i in range(num_pred_bboxes):
        gt_bbox_index = np.argmax(iou_table[i])
        best_pred_bbox_index_for_selected_gt_bbox = np.argmax(iou_table[:,gt_bbox_index])
        if iou_table[i, gt_bbox_index] > iou_thres \
            and gt_bbox_index == best_pred_bbox_index_for_selected_gt_bbox:
            TP[i] = 1
        else:
            FP[i] = 1

    TP_cum = np.sum(TP)
    FP_cum = np.sum(FP)

    if (TP_cum + FP_cum) != num_pred_bboxes:
        print("WRONG CALCULATION OF FP")
    return TP_cum, FP_cum

In [None]:
# Test for cal_TP_FP_iou

def inspect_call_TP_FP_iou(test_images, test_targets):
    test_images = torch.stack(test_images)
    #print(test_images.shape)
    test_images = prepare_inputs(test_images)
    #print(test_images[0].shape)

    test_images = list(image.to(device) for image in test_images)
    test_targets = [{k: v.to(device) for k, v in t.items()} for t in test_targets]

    model.eval()
    predictions = model(test_images)

    pred_bboxes, pred_labels, pred_scores = prepare_pred_results(predictions)
    gt_bboxes, gt_labels = prepare_gt(test_targets)

    for pred_bbox, pred_label, pred_score, gt_bbox, gt_label in \
        zip(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels):
        pred_bbox = np.array(pred_bbox)
        pred_score = np.array(pred_score)
        pred_label = np.array(pred_label)
        gt_bbox = np.array(gt_bbox)
        gt_label = np.array(gt_label)
        unique_share_classes = (np.unique(np.concatenate((pred_label, gt_label))))
        
        for c in unique_share_classes:
            pred_class_c_index = np.where(pred_label == c)[0]
            pred_bbox_c = pred_bbox[pred_class_c_index]
            gt_class_c_index = np.where(gt_label == c)[0]
            #print(gt_class_c_index)
            gt_bbox_c = gt_bbox[gt_class_c_index]
            num_gt_bboxes = len(gt_class_c_index)
            num_pred_bboxes = len(pred_class_c_index)
            print('class {} with {} gt_bboxes and {} pred_bboxes'.format(c, num_gt_bboxes, num_pred_bboxes))
            # print(num_gt_bboxes)
            # print(num_pred_bboxes)
            if num_pred_bboxes == 0:
                class_TP = 0
                class_FP = 0
                class_FN = num_gt_bboxes
            elif num_gt_bboxes == 0:
                class_TP = 0
                class_FP = num_pred_bboxes
                class_FN = 0
            else:
                class_TP, class_FP = cal_TP_FP_iou(pred_bbox_c, gt_bbox_c, iou_thres)
                class_FN = num_gt_bboxes - class_TP
                print(class_TP + class_FP == num_pred_bboxes)

In [None]:
# for i in range(3):
#     print('Iter {}'.format(i))
#     test_images, test_targets = next(iter(test_loader))
#     inspect_call_TP_FP_iou(test_images, test_targets)

In [None]:
def evaluate_one_batch(predictions, test_targets, res, iou_thres=0.5):

    pred_bboxes, pred_labels, pred_scores = prepare_pred_results(predictions)
    gt_bboxes, gt_labels = prepare_gt(test_targets)
    # res stores the TP_FP dict for each class
    # Each TP_FP dict stores the TP_FP for each class 
    
    batch_total_TP = 0
    batch_total_FP = 0
    batch_total_FN = 0
    batch_total_num_object = 0
    batch_res = {c: {'TP':0, 'FP': 0, 'FN': 0} for c in range(9)}

    for pred_bbox, pred_label, pred_score, gt_bbox, gt_label in \
        zip(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels):

        pred_bbox = np.array(pred_bbox)
        pred_score = np.array(pred_score)
        pred_label = np.array(pred_label)
        gt_bbox = np.array(gt_bbox)
        gt_label = np.array(gt_label)
        unique_share_classes = (np.unique(np.concatenate((pred_label, gt_label))))
        
        for c in unique_share_classes:
            pred_class_c_index = np.where(pred_label == c)[0]
            pred_bbox_c = pred_bbox[pred_class_c_index]
            gt_class_c_index = np.where(gt_label == c)[0]
            #print(gt_class_c_index)
            gt_bbox_c = gt_bbox[gt_class_c_index]
            num_gt_bboxes = len(gt_class_c_index)
            num_pred_bboxes = len(pred_class_c_index)
            #print('class {} with {} gt_bboxes and {} pred_bboxes'.format(c, num_gt_bboxes, num_pred_bboxes))
            if num_pred_bboxes == 0:
                class_TP = 0
                class_FP = 0
                class_FN = num_gt_bboxes
            elif num_gt_bboxes == 0:
                class_TP = 0
                class_FP = num_pred_bboxes
                class_FN = 0
            else:
                class_TP, class_FP = cal_TP_FP_iou(pred_bbox_c, gt_bbox_c, iou_thres)
                class_FN = num_gt_bboxes - class_TP
                #print(class_TP + class_FP == num_pred_bboxes)

            batch_total_TP += class_TP
            batch_total_FP += class_FP
            batch_total_FN += class_FN
            batch_total_num_object += num_gt_bboxes

            batch_res[c]['TP'] += class_TP
            batch_res[c]['FP'] += class_FP
            batch_res[c]['FN'] += class_FN

            res[c]['TP'] += class_TP
            res[c]['FP'] += class_FP
            res[c]['FN'] += class_FN
            
    return res, batch_res, batch_total_TP, batch_total_FP, batch_total_FN, batch_total_num_object

In [None]:
# Inspect evaluate_one_batch
def inspect_evaluate_one_batch(test_images, test_targets, final_res):
    test_images = torch.stack(test_images)
    #print(test_images.shape)
    test_images = prepare_inputs(test_images)
    #print(test_images[0].shape)

    test_images = list(image.to(device) for image in test_images)
    test_targets = [{k: v.to(device) for k, v in t.items()} for t in test_targets]

    model.eval()
    predictions = model(test_images)

    final_res, batch_res, batch_total_TP, batch_total_FP, batch_total_FN, batch_total_num_object \
    = evaluate_one_batch(predictions, test_targets, final_res, iou_thres=0.5)

    return final_res, batch_res, batch_total_TP, batch_total_FP, batch_total_FN, batch_total_num_object

In [None]:

final_res = {c: {'TP':0, 'FP': 0, 'FN': 0} for c in range(9)}
final_TP = 0
final_FP = 0
final_FN = 0
final_num_objects = 0

# test for 2 batches
for i in range(2):
    test_images, test_targets = next(iter(test_loader))
    final_res, batch_res, batch_total_TP, batch_total_FP, batch_total_FN, batch_total_num_object \
    = inspect_evaluate_one_batch(test_images, test_targets, final_res)

    print('batch {}'.format(i))

    
    print(batch_total_TP, batch_total_FN, batch_total_num_object)
    print('cur batch res:', batch_res)
    print('final res after this batch:', final_res)

batch 0
0 20 20
cur batch res: {0: {'TP': 0, 'FP': 0, 'FN': 1}, 1: {'TP': 0, 'FP': 0, 'FN': 0}, 2: {'TP': 0, 'FP': 0, 'FN': 16}, 3: {'TP': 0, 'FP': 0, 'FN': 2}, 4: {'TP': 0, 'FP': 0, 'FN': 1}, 5: {'TP': 0, 'FP': 0, 'FN': 0}, 6: {'TP': 0, 'FP': 0, 'FN': 0}, 7: {'TP': 0, 'FP': 0, 'FN': 0}, 8: {'TP': 0, 'FP': 0, 'FN': 0}}
final res after this batch: {0: {'TP': 0, 'FP': 0, 'FN': 1}, 1: {'TP': 0, 'FP': 0, 'FN': 0}, 2: {'TP': 0, 'FP': 0, 'FN': 16}, 3: {'TP': 0, 'FP': 0, 'FN': 2}, 4: {'TP': 0, 'FP': 0, 'FN': 1}, 5: {'TP': 0, 'FP': 0, 'FN': 0}, 6: {'TP': 0, 'FP': 0, 'FN': 0}, 7: {'TP': 0, 'FP': 0, 'FN': 0}, 8: {'TP': 0, 'FP': 0, 'FN': 0}}
batch 1
0 20 20
cur batch res: {0: {'TP': 0, 'FP': 0, 'FN': 1}, 1: {'TP': 0, 'FP': 0, 'FN': 0}, 2: {'TP': 0, 'FP': 0, 'FN': 19}, 3: {'TP': 0, 'FP': 0, 'FN': 0}, 4: {'TP': 0, 'FP': 0, 'FN': 0}, 5: {'TP': 0, 'FP': 0, 'FN': 0}, 6: {'TP': 0, 'FP': 0, 'FN': 0}, 7: {'TP': 0, 'FP': 0, 'FN': 0}, 8: {'TP': 0, 'FP': 0, 'FN': 0}}
final res after this batch: {0: {'TP': 0

In [None]:
def evaluate_one_epoch(test_loader, iou_thres=0.5):
    # Evaluate. for all data point in the evaluaton set
    final_res = {c: {'TP':0, 'FP': 0, 'FN': 0} for c in range(9)}
    final_TP = 0
    final_FP = 0
    final_FN = 0
    final_num_objects = 0

    for iter_, (test_images, test_targets) in enumerate(test_loader):
        # if iter_ % 50 == 0:
        #     print('iter', iter_)
        #print('iter', iter_)
        test_images = torch.stack(test_images)
        #print(test_images.shape)
        test_images = prepare_inputs(test_images)
        #print(test_images[0].shape)

        test_images = list(image.to(device) for image in test_images)
        test_targets = [{k: v.to(device) for k, v in t.items()} for t in test_targets]

        model.eval()
        predictions = model(test_images)

        # Evaluate for one batch
        final_res, batch_res, batch_total_TP, batch_total_FP, batch_total_FN, batch_total_num_object \
                    = evaluate_one_batch(predictions, test_targets, final_res, iou_thres=0.5)

        
        final_TP += batch_total_TP
        final_FP += batch_total_FP
        final_FN += batch_total_FN
        final_num_objects += batch_total_num_object

    return final_res, final_TP, final_FP, final_FN, final_num_objects

In [None]:
final_res, final_TP, final_FP, final_FN, final_num_objects = evaluate_one_epoch(test_loader, iou_thres=0.5)

iter 0
iter 50
iter 100


In [None]:
def evaluate_threst_score(TP, FP, FN):
    return (TP / (TP + FP + FN))

In [None]:
final_res

{0: {'FN': 158, 'FP': 0, 'TP': 0},
 1: {'FN': 0, 'FP': 0, 'TP': 0},
 2: {'FN': 3074, 'FP': 0, 'TP': 0},
 3: {'FN': 390, 'FP': 0, 'TP': 0},
 4: {'FN': 27, 'FP': 0, 'TP': 0},
 5: {'FN': 0, 'FP': 0, 'TP': 0},
 6: {'FN': 20, 'FP': 0, 'TP': 0},
 7: {'FN': 0, 'FP': 0, 'TP': 0},
 8: {'FN': 0, 'FP': 0, 'TP': 0}}

## Train and Evaluate for Multiple Epochs

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model = model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=0.0001)
# optimizer = torch.optim.SGD(params, lr=0.005,
#                             momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=3,
                                                gamma=0.1)

# let's train it for 10 epochs
num_epochs = 10
epoch = 0
print_freq = 20

In [None]:
def train_eval(model, train_loader, test_loader, iou_thres=0.5, num_epochs=10):
    train_losses = []
    eval_threatscores = []
    eval_final_res = []
    best_eval_ts = 0

    best_model_wts = copy.deepcopy(model.state_dict())
    for epoch in range(num_epochs):
        loss = train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq)
        train_losses.append(loss)
        
        final_res, final_TP, final_FP, final_FN, final_num_objects = evaluate_one_epoch(test_loader, iou_thres=0.5)

        print("epoch: {}".format(epoch))
        print(final_TP, final_FP, final_FN, final_num_objects)
        eval_final_res.append(final_res)
        eval_ts = evaluate_threst_score(final_TP, final_FP, final_FN)
        eval_threatscores.append(eval_ts)
        if epoch % 2 == 0:
            print("epoch: {} eval_ts {}".format(epoch, eval_ts))

        if eval_ts > best_eval_ts:
            best_eval_ts = eval_ts 
            best_model_wts = copy.deepcopy(model.state_dict())

    return model, best_model_wts, train_losses, eval_final_res

In [None]:
model, best_model_wts, train_losses, eval_final_res = train_eval(model, train_loader,
                                                                 test_loader, iou_thres=0.5,
                                                                 num_epochs=10)

In [None]:
eval_final_res

[{0: {'FN': 158, 'FP': 0, 'TP': 0},
  1: {'FN': 0, 'FP': 0, 'TP': 0},
  2: {'FN': 3073.0, 'FP': 12599.0, 'TP': 1.0},
  3: {'FN': 390, 'FP': 0, 'TP': 0},
  4: {'FN': 27, 'FP': 0, 'TP': 0},
  5: {'FN': 0, 'FP': 0, 'TP': 0},
  6: {'FN': 20, 'FP': 0, 'TP': 0},
  7: {'FN': 0, 'FP': 0, 'TP': 0},
  8: {'FN': 0, 'FP': 0, 'TP': 0}},
 {0: {'FN': 158, 'FP': 0, 'TP': 0},
  1: {'FN': 0, 'FP': 0, 'TP': 0},
  2: {'FN': 3070.0, 'FP': 846.0, 'TP': 4.0},
  3: {'FN': 390, 'FP': 0, 'TP': 0},
  4: {'FN': 27, 'FP': 0, 'TP': 0},
  5: {'FN': 0, 'FP': 0, 'TP': 0},
  6: {'FN': 20, 'FP': 0, 'TP': 0},
  7: {'FN': 0, 'FP': 0, 'TP': 0},
  8: {'FN': 0, 'FP': 0, 'TP': 0}}]

## Evaluate the trained model on the test set

## Customize Fast RCNN

In [None]:
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

#### 1. Mobilenet_v2

In [None]:
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                    aspect_ratios=((0.5, 1.0, 2.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)
model = torchvision.models.detection.faster_rcnn.FasterRCNN(backbone,
                    num_classes=21,
                    rpn_anchor_generator=anchor_generator,
                    box_roi_pool=roi_pooler)

NameError: ignored

In [None]:
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)

#### 2. CustomVGG16

In [None]:
def customize_VGG16():
    model = torchvision.models.vgg16(pretrained=True)
    
    features = list(model.features)[:30]
    classifier = model.classifier
    
    classifier = list(classifier)
    # delete the Linear layer
    del classifier[6]
    classifier = nn.Sequential(*classifier)

    #freeze top4 conv layer
    for layer in features[:10]:
        for p in layer.parameters():
            p.requires_grad = False
    features = nn.Sequential(*features)
        
    return features, classifier
backbone, box_head = customize_VGG16()
backbone.out_channels = 512
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                           aspect_ratios=((0.5, 1.0, 2.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)
