In [19]:
import os
import sys
import os
os.chdir('/scratch/nhl256/dl_project/code/')
from PIL import Image

import random

import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
matplotlib.rcParams['figure.figsize'] = [5, 5]
matplotlib.rcParams['figure.dpi'] = 200

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

from data_helper import *
from helper import collate_fn, draw_box


import math
import pickle
import time
import copy

In [6]:
# !mkdir data
# !ls


coco_utils.py  data_helper.py  objectDectionFastRCNN.ipynb  transforms.py
data	       helper.py       __pycache__		    utils.py


In [2]:
#!unzip '/scratch/jz3224/DLSP20Dataset/student_data.zip' -d data

In [7]:
#!ls '/scratch/jz3224/DLSP20Dataset/student_data.zip'

/scratch/jz3224/DLSP20Dataset/student_data.zip


# Test an example code

In [0]:
# model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
# labels = torch.randint(1, 91, (4, 11))
# images = list(image for image in images)
# targets = []
# for i in range(len(images)):
#     d = {}
#     d['boxes'] = boxes[i]
#     d['labels'] = labels[i]
#     targets.append(d)
# output = model(images, targets)
#output

In [20]:
# All the images are saved in image_folder
# All the labels are saved in the annotation_csv file

# image_folder = '../data'
# annotation_csv = '../data/annotation.csv'

image_folder = 'data/data'
annotation_csv = 'data/data/annotation.csv'


cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if cuda else "cpu")
print(device)

# image_folder = '/Users/nhungle/Downloads/dl20_data'
# annotation_csv = '/Users/nhungle/Downloads/dl20_data/annotation.csv'

cuda:0


In [4]:
!#ls 'data/data'

In [5]:
#!pip install --upgrade pandas

# Labeled dataset

In [21]:
def inspect_target(index, labeled_scene_index):
    NUM_SAMPLE_PER_SCENE = 126
    NUM_IMAGE_PER_SAMPLE = 6
    image_names = [
        'CAM_FRONT_LEFT.jpeg',
        'CAM_FRONT.jpeg',
        'CAM_FRONT_RIGHT.jpeg',
        'CAM_BACK_LEFT.jpeg',
        'CAM_BACK.jpeg',
        'CAM_BACK_RIGHT.jpeg',
        ]
    scene_index = labeled_scene_index 
    scene_id = scene_index[index // NUM_SAMPLE_PER_SCENE]
    sample_id = index % NUM_SAMPLE_PER_SCENE
    sample_path = os.path.join(image_folder, f'scene_{scene_id}', f'sample_{sample_id}') 
    images = []
    for image_name in image_names:
        image_path = os.path.join(sample_path, image_name)
        image = Image.open(image_path)
        images.append(transform(image))
    image_tensor = torch.stack(images)
    annotation_file = annotation_csv 
    annotation_dataframe = pd.read_csv(annotation_file)
    data_entries = annotation_dataframe[(annotation_dataframe['scene'] == scene_id) & (annotation_dataframe['sample'] == sample_id)]
    corners = data_entries[['fl_x', 'fr_x', 'bl_x', 'br_x', 'fl_y', 'fr_y','bl_y', 'br_y']].to_numpy()
    categories = data_entries.category_id.to_numpy()
    num_objects = len(categories)
    boxes = []
    for i in range(num_objects):
        xmin = min(corners[i][:4])
        xmax = max(corners[i][:4])
        ymin = min(corners[i][4:])
        ymax = max(corners[i][4:])
        boxes.append([xmin, ymin, xmax, ymax])
    return data_entries, image_tensor

In [22]:
train_labeled_scene_index = np.arange(106, 125)
val_labeled_scene_index = np.arange(125, 131)
test_labeled_scene_index = np.arange(131, 134)


transform = torchvision.transforms.ToTensor()
fasterRCNN_trainset = FastRCNNLabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=train_labeled_scene_index,
                                  transform=transform,
                                  extra_info=True
                                 )
train_loader = torch.utils.data.DataLoader(fasterRCNN_trainset,
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=2, collate_fn=collate_fn)


fasterRCNN_valset = FastRCNNLabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=val_labeled_scene_index,
                                  transform=transform,
                                  extra_info=True
                                 )
val_loader = torch.utils.data.DataLoader(fasterRCNN_valset,
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=2, collate_fn=collate_fn)


fasterRCNN_testset = FastRCNNLabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=test_labeled_scene_index,
                                  transform=transform,
                                  extra_info=True
                                 )
test_loader = torch.utils.data.DataLoader(fasterRCNN_testset,
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=2, collate_fn=collate_fn)

In [23]:
train_loader.__len__()
sample, targets = iter(train_loader).next()


In [24]:
index = targets[0]['image_id'].item()
data_entries, idx_tensor = inspect_target(index, train_labeled_scene_index)
data_entries["category_id"].values == targets[0]['labels'].data.numpy()

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True], dtype=bool)

## Prepare inputs for the model

In [25]:
def extract_features(one_sample):
    feature_extractor = torchvision.models.resnet18(pretrained=False)
    feature_extractor = nn.Sequential(*list(feature_extractor.children())[:-2])
    #feature_extractor.to(device)
    # for param in feature_extractor.parameters():
    #     param.requires_grad = True
    return feature_extractor(one_sample)


def concat_features(features, dim = 2):
    #dim 0 ==> stacking the images in the channel dimension
    #dim 1 ==> stacking the images in row dimension
    #dim 2 ==> stacking the images in column dimension
    tensor_tuples = torch.unbind(features, dim=0)
    concatenated_fm = torch.cat(tensor_tuples, dim=dim)
    return concatenated_fm 

def prepare_inputs(sample):
    """
    Input: samples is a cuda tensor with size [batch_size, 6, 3, 256, 306]
    Output: a list of batch_size tensor, each tensor with size [512, 16, 114]
    """
    batchsize = sample.shape[0]
    fe_batch = []
    for i in range(batchsize):
        image_tensor = sample[i]
        features = extract_features(image_tensor)
        #print(features.shape)
        features = concat_features(features)
        features = features.view(3, 512, 160)
        #print(features.shape)
        fe_batch.append(features)
    
    return fe_batch

In [26]:
# sample = torch.stack(sample)
# images = prepare_inputs(sample)
# images = list(image.to(device) for image in images)
# targets = [{k: v.to(device) for k, v in t.items()} for t in targets]


## Train and Evaluate for 1 sence

In [0]:
# Refer to: https://github.com/pytorch/vision/blob/master/references/detection/engine.py

In [16]:
# train_labeled_scene_index = np.arange(131, 132)
# test_labeled_scene_index = np.arange(132, 133)
# fasterRCNN_trainset = FastRCNNLabeledDataset(image_folder=image_folder,
#                                   annotation_file=annotation_csv,
#                                   scene_index=train_labeled_scene_index,
#                                   transform=transform,
#                                   extra_info=True
#                                  )
# train_loader = torch.utils.data.DataLoader(fasterRCNN_trainset,
#                                           batch_size=1,
#                                           shuffle=True,
#                                           num_workers=2, collate_fn=collate_fn)
# fasterRCNN_testset = FastRCNNLabeledDataset(image_folder=image_folder,
#                                   annotation_file=annotation_csv,
#                                   scene_index=test_labeled_scene_index,
#                                   transform=transform,
#                                   extra_info=True
#                                  )
# test_loader = torch.utils.data.DataLoader(fasterRCNN_testset,
#                                           batch_size=1,
#                                           shuffle=True,
#                                           num_workers=2, collate_fn=collate_fn)

In [27]:
import math
import sys
import time
import torch

import utils


In [18]:
# model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# model = model.to(device)
# # construct an optimizer
# params = [p for p in model.parameters() if p.requires_grad]
# optimizer = torch.optim.SGD(params, lr=0.005,
#                             momentum=0.9, weight_decay=0.0005)
# # and a learning rate scheduler
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
#                                                 step_size=3,
#                                                 gamma=0.1)

# # let's train it for 10 epochs
# num_epochs = 1
# epoch = 0
# print_freq = 20

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /home/nhl256/.cache/torch/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:03<00:00, 53.0MB/s] 


In [28]:
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = torch.stack(images)
        #print(images.shape)
        images = prepare_inputs(images)
        #print(images[0].shape)

        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        #print(loss_dict)

        losses = sum(loss for loss in loss_dict.values())
        #print(losses)

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

    return losses

In [22]:
#train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq)

Epoch: [0]  [  0/126]  eta: 0:03:42  lr: 0.000045  loss: 0.6737 (0.6737)  loss_classifier: 0.1618 (0.1618)  loss_box_reg: 0.0234 (0.0234)  loss_objectness: 0.0381 (0.0381)  loss_rpn_box_reg: 0.4504 (0.4504)  time: 1.7663  data: 0.2147  max mem: 1302
Epoch: [0]  [ 20/126]  eta: 0:02:24  lr: 0.000844  loss: 0.6608 (0.6817)  loss_classifier: 0.2318 (0.2304)  loss_box_reg: 0.0281 (0.0323)  loss_objectness: 0.0456 (0.0450)  loss_rpn_box_reg: 0.3588 (0.3739)  time: 1.3432  data: 0.0035  max mem: 1302
Epoch: [0]  [ 40/126]  eta: 0:01:55  lr: 0.001643  loss: 0.7009 (0.6976)  loss_classifier: 0.2472 (0.2370)  loss_box_reg: 0.0404 (0.0385)  loss_objectness: 0.0333 (0.0437)  loss_rpn_box_reg: 0.3607 (0.3784)  time: 1.3328  data: 0.0037  max mem: 1302
Epoch: [0]  [ 60/126]  eta: 0:01:28  lr: 0.002443  loss: 0.6858 (0.6956)  loss_classifier: 0.2119 (0.2279)  loss_box_reg: 0.0345 (0.0371)  loss_objectness: 0.0378 (0.0430)  loss_rpn_box_reg: 0.3973 (0.3876)  time: 1.3332  data: 0.0037  max mem: 1302


tensor(0.7010, device='cuda:0', grad_fn=<AddBackward0>)

## Evaluate

In [29]:
def prepare_pred_results(predictions):
    pred_boxes = []
    pred_labels = []
    pred_scores = []
    for prediction in predictions:
        #print(prediction)
        if len(prediction) == 0:
            continue
        boxes = prediction["boxes"]
        boxes = reorder_coord(boxes).tolist()
        scores = prediction["scores"].tolist()
        labels = prediction["labels"].tolist()

        pred_boxes.append(boxes)
        pred_labels.append(labels)
        pred_scores.append(scores)

    return pred_boxes, pred_labels, pred_scores

def reorder_coord(boxes):
    xmin, ymin, xmax, ymax = boxes.unbind(1)
    return torch.stack((ymin, xmin, ymax, xmax), dim=1)

def prepare_gt(targets):
    gt_boxes = []
    gt_labels = []
    for target in targets:
        boxes = target['boxes']
        boxes = reorder_coord(boxes).tolist()
        labels = target["labels"].tolist()
        gt_boxes.append(boxes)
        gt_labels.append(labels)
    return gt_boxes, gt_labels

In [30]:
# Make sure that bbox_a, bbox_b = np.array

def bbox_iou(bbox_a, bbox_b):
    #print(type(bbox_a), type(bbox_b))
    bbox_a = np.array(bbox_a)
    bbox_b = np.array(bbox_b)

    # print(type(bbox_a), type(bbox_b))
    # print(bbox_a.shape, bbox_b.shape)
    if bbox_a.shape[1] != 4 or bbox_b.shape[1] != 4:
        raise IndexError

    # top left (i.e., ymin, xmin)
    tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
    # bottom right (i.e., ymax, xmax)
    br = np.minimum(bbox_a[:, None, 2:], bbox_b[:, 2:])

    # Area of intersection: (tl < br) = bool, (br-tl) = (ymax-ymin) 
    area_i = np.prod(br - tl, axis=2) * (tl < br).all(axis=2)
    area_a = np.prod(bbox_a[:, 2:] - bbox_a[:, :2], axis=1)
    area_b = np.prod(bbox_b[:, 2:] - bbox_b[:, :2], axis=1)

    return area_i / (area_a[:, None] + area_b - area_i)

In [31]:
def cal_TP_FP_iou(pred_bbox_c, gt_bbox_c, iou_thres=0.5):
    iou_table = bbox_iou(pred_bbox_c, gt_bbox_c)
    num_pred_bboxes = iou_table.shape[0]
    num_gt_bboxes = iou_table.shape[1]
    TP = np.zeros(num_pred_bboxes)
    FP = np.zeros(num_pred_bboxes)
    # For each pred_bounding box:
      # Find the most relevant gt_bbox (i.e., the gt_bbox with max IoU)
      # If IoU < threshold, then flag it as FP
      # If IoU >= threshold, then:
        # If that gt_bbox already has already matched with another pred_bbox:
          # Flag it as FP
        # Else:
          # Flag it as TP

    # TP only happens if the pred_bbox mathes with a gt_bbox
    for i in range(num_pred_bboxes):
        gt_bbox_index = np.argmax(iou_table[i])
        best_pred_bbox_index_for_selected_gt_bbox = np.argmax(iou_table[:,gt_bbox_index])
        if iou_table[i, gt_bbox_index] > iou_thres \
            and gt_bbox_index == best_pred_bbox_index_for_selected_gt_bbox:
            TP[i] = 1
        else:
            FP[i] = 1

    TP_cum = np.sum(TP)
    FP_cum = np.sum(FP)

    if (TP_cum + FP_cum) != num_pred_bboxes:
        print("WRONG CALCULATION OF FP")
    return TP_cum, FP_cum

In [32]:
# Test for cal_TP_FP_iou

def inspect_call_TP_FP_iou(test_images, test_targets):
    test_images = torch.stack(test_images)
    #print(test_images.shape)
    test_images = prepare_inputs(test_images)
    #print(test_images[0].shape)

    test_images = list(image.to(device) for image in test_images)
    test_targets = [{k: v.to(device) for k, v in t.items()} for t in test_targets]

    model.eval()
    predictions = model(test_images)

    pred_bboxes, pred_labels, pred_scores = prepare_pred_results(predictions)
    gt_bboxes, gt_labels = prepare_gt(test_targets)

    for pred_bbox, pred_label, pred_score, gt_bbox, gt_label in \
        zip(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels):
        pred_bbox = np.array(pred_bbox)
        pred_score = np.array(pred_score)
        pred_label = np.array(pred_label)
        gt_bbox = np.array(gt_bbox)
        gt_label = np.array(gt_label)
        unique_share_classes = (np.unique(np.concatenate((pred_label, gt_label))))
        
        for c in unique_share_classes:
            pred_class_c_index = np.where(pred_label == c)[0]
            pred_bbox_c = pred_bbox[pred_class_c_index]
            gt_class_c_index = np.where(gt_label == c)[0]
            #print(gt_class_c_index)
            gt_bbox_c = gt_bbox[gt_class_c_index]
            num_gt_bboxes = len(gt_class_c_index)
            num_pred_bboxes = len(pred_class_c_index)
            print('class {} with {} gt_bboxes and {} pred_bboxes'.format(c, num_gt_bboxes, num_pred_bboxes))
            # print(num_gt_bboxes)
            # print(num_pred_bboxes)
            if num_pred_bboxes == 0:
                class_TP = 0
                class_FP = 0
                class_FN = num_gt_bboxes
            elif num_gt_bboxes == 0:
                class_TP = 0
                class_FP = num_pred_bboxes
                class_FN = 0
            else:
                class_TP, class_FP = cal_TP_FP_iou(pred_bbox_c, gt_bbox_c, iou_thres)
                class_FN = num_gt_bboxes - class_TP
                print(class_TP + class_FP == num_pred_bboxes)

In [0]:
# for i in range(3):
#     print('Iter {}'.format(i))
#     test_images, test_targets = next(iter(test_loader))
#     inspect_call_TP_FP_iou(test_images, test_targets)

In [33]:
def evaluate_one_batch(predictions, test_targets, res, iou_thres=0.5):

    pred_bboxes, pred_labels, pred_scores = prepare_pred_results(predictions)
    gt_bboxes, gt_labels = prepare_gt(test_targets)
    # res stores the TP_FP dict for each class
    # Each TP_FP dict stores the TP_FP for each class 
    
    batch_total_TP = 0
    batch_total_FP = 0
    batch_total_FN = 0
    batch_total_num_object = 0
    batch_res = {c: {'TP':0, 'FP': 0, 'FN': 0} for c in range(9)}

    for pred_bbox, pred_label, pred_score, gt_bbox, gt_label in \
        zip(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels):

        pred_bbox = np.array(pred_bbox)
        pred_score = np.array(pred_score)
        pred_label = np.array(pred_label)
        gt_bbox = np.array(gt_bbox)
        gt_label = np.array(gt_label)
        unique_share_classes = (np.unique(np.concatenate((pred_label, gt_label))))
        
        for c in unique_share_classes:
            pred_class_c_index = np.where(pred_label == c)[0]
            pred_bbox_c = pred_bbox[pred_class_c_index]
            gt_class_c_index = np.where(gt_label == c)[0]
            #print(gt_class_c_index)
            gt_bbox_c = gt_bbox[gt_class_c_index]
            num_gt_bboxes = len(gt_class_c_index)
            num_pred_bboxes = len(pred_class_c_index)
            #print('class {} with {} gt_bboxes and {} pred_bboxes'.format(c, num_gt_bboxes, num_pred_bboxes))
            if num_pred_bboxes == 0:
                class_TP = 0
                class_FP = 0
                class_FN = num_gt_bboxes
            elif num_gt_bboxes == 0:
                class_TP = 0
                class_FP = num_pred_bboxes
                class_FN = 0
            else:
                class_TP, class_FP = cal_TP_FP_iou(pred_bbox_c, gt_bbox_c, iou_thres)
                class_FN = num_gt_bboxes - class_TP
                #print(class_TP + class_FP == num_pred_bboxes)

            batch_total_TP += class_TP
            batch_total_FP += class_FP
            batch_total_FN += class_FN
            batch_total_num_object += num_gt_bboxes

            batch_res[c]['TP'] += class_TP
            batch_res[c]['FP'] += class_FP
            batch_res[c]['FN'] += class_FN

            res[c]['TP'] += class_TP
            res[c]['FP'] += class_FP
            res[c]['FN'] += class_FN
            
    return res, batch_res, batch_total_TP, batch_total_FP, batch_total_FN, batch_total_num_object

In [34]:
# Inspect evaluate_one_batch
def inspect_evaluate_one_batch(test_images, test_targets, final_res):
    test_images = torch.stack(test_images)
    #print(test_images.shape)
    test_images = prepare_inputs(test_images)
    #print(test_images[0].shape)

    test_images = list(image.to(device) for image in test_images)
    test_targets = [{k: v.to(device) for k, v in t.items()} for t in test_targets]

    model.eval()
    predictions = model(test_images)

    final_res, batch_res, batch_total_TP, batch_total_FP, batch_total_FN, batch_total_num_object \
    = evaluate_one_batch(predictions, test_targets, final_res, iou_thres=0.5)

    return final_res, batch_res, batch_total_TP, batch_total_FP, batch_total_FN, batch_total_num_object

In [29]:

final_res = {c: {'TP':0, 'FP': 0, 'FN': 0} for c in range(9)}
final_TP = 0
final_FP = 0
final_FN = 0
final_num_objects = 0

# test for 2 batches
for i in range(2):
    test_images, test_targets = next(iter(test_loader))
    final_res, batch_res, batch_total_TP, batch_total_FP, batch_total_FN, batch_total_num_object \
    = inspect_evaluate_one_batch(test_images, test_targets, final_res)

    print('batch {}'.format(i))

    
    print(batch_total_TP, batch_total_FN, batch_total_num_object)
    print('cur batch res:', batch_res)
    print('final res after this batch:', final_res)

batch 0
0.0 31.0 31
cur batch res: {0: {'TP': 0, 'FP': 0, 'FN': 1}, 1: {'TP': 0, 'FP': 0, 'FN': 0}, 2: {'TP': 0.0, 'FP': 100.0, 'FN': 23.0}, 3: {'TP': 0, 'FP': 0, 'FN': 7}, 4: {'TP': 0, 'FP': 0, 'FN': 0}, 5: {'TP': 0, 'FP': 0, 'FN': 0}, 6: {'TP': 0, 'FP': 0, 'FN': 0}, 7: {'TP': 0, 'FP': 0, 'FN': 0}, 8: {'TP': 0, 'FP': 0, 'FN': 0}}
final res after this batch: {0: {'TP': 0, 'FP': 0, 'FN': 1}, 1: {'TP': 0, 'FP': 0, 'FN': 0}, 2: {'TP': 0.0, 'FP': 100.0, 'FN': 23.0}, 3: {'TP': 0, 'FP': 0, 'FN': 7}, 4: {'TP': 0, 'FP': 0, 'FN': 0}, 5: {'TP': 0, 'FP': 0, 'FN': 0}, 6: {'TP': 0, 'FP': 0, 'FN': 0}, 7: {'TP': 0, 'FP': 0, 'FN': 0}, 8: {'TP': 0, 'FP': 0, 'FN': 0}}
batch 1
0.0 29.0 29
cur batch res: {0: {'TP': 0, 'FP': 0, 'FN': 1}, 1: {'TP': 0, 'FP': 0, 'FN': 0}, 2: {'TP': 0.0, 'FP': 100.0, 'FN': 25.0}, 3: {'TP': 0, 'FP': 0, 'FN': 3}, 4: {'TP': 0, 'FP': 0, 'FN': 0}, 5: {'TP': 0, 'FP': 0, 'FN': 0}, 6: {'TP': 0, 'FP': 0, 'FN': 0}, 7: {'TP': 0, 'FP': 0, 'FN': 0}, 8: {'TP': 0, 'FP': 0, 'FN': 0}}
final re

In [35]:
def evaluate_one_epoch(test_loader, iou_thres=0.5):
    # Evaluate. for all data point in the evaluaton set
    final_res = {c: {'TP':0, 'FP': 0, 'FN': 0} for c in range(9)}
    final_TP = 0
    final_FP = 0
    final_FN = 0
    final_num_objects = 0

    for iter_, (test_images, test_targets) in enumerate(test_loader):
        # if iter_ % 50 == 0:
        #     print('iter', iter_)
        #print('iter', iter_)
        test_images = torch.stack(test_images)
        #print(test_images.shape)
        test_images = prepare_inputs(test_images)
        #print(test_images[0].shape)

        test_images = list(image.to(device) for image in test_images)
        test_targets = [{k: v.to(device) for k, v in t.items()} for t in test_targets]

        model.eval()
        predictions = model(test_images)

        # Evaluate for one batch
        final_res, batch_res, batch_total_TP, batch_total_FP, batch_total_FN, batch_total_num_object \
                    = evaluate_one_batch(predictions, test_targets, final_res, iou_thres=0.5)

        
        final_TP += batch_total_TP
        final_FP += batch_total_FP
        final_FN += batch_total_FN
        final_num_objects += batch_total_num_object

    return final_res, final_TP, final_FP, final_FN, final_num_objects

In [31]:
final_res, final_TP, final_FP, final_FN, final_num_objects = evaluate_one_epoch(test_loader, iou_thres=0.5)

In [36]:
def evaluate_threst_score(TP, FP, FN):
    return (TP / (TP + FP + FN))

In [33]:
evaluate_threst_score(final_TP, final_FP, final_FN)

0.0

In [34]:
#final_res

## Train and Evaluate for Multiple Epochs

In [37]:
train_labeled_scene_index = np.arange(106, 125)
val_labeled_scene_index = np.arange(125, 131)
test_labeled_scene_index = np.arange(131, 134)


transform = torchvision.transforms.ToTensor()
fasterRCNN_trainset = FastRCNNLabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=train_labeled_scene_index,
                                  transform=transform,
                                  extra_info=True
                                 )
train_loader = torch.utils.data.DataLoader(fasterRCNN_trainset,
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=2, collate_fn=collate_fn)


fasterRCNN_valset = FastRCNNLabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=val_labeled_scene_index,
                                  transform=transform,
                                  extra_info=True
                                 )
val_loader = torch.utils.data.DataLoader(fasterRCNN_valset,
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=2, collate_fn=collate_fn)


fasterRCNN_testset = FastRCNNLabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=test_labeled_scene_index,
                                  transform=transform,
                                  extra_info=True
                                 )
test_loader = torch.utils.data.DataLoader(fasterRCNN_testset,
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=2, collate_fn=collate_fn)

In [47]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model = model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=0.0001)

# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=3,
                                                gamma=0.1)

# let's train it for 10 epochs
num_epochs = 10
epoch = 0
print_freq = 500

In [48]:
for epoch in range(2):
    print('/scratch/nhl256/dl_project/model/object_detection_resnet18_epoch{}.pth'.format(epoch))

/scratch/nhl256/dl_project/model/object_detection_resnet18_epoch0.pth
/scratch/nhl256/dl_project/model/object_detection_resnet18_epoch1.pth


In [49]:
def train_eval(model, train_loader, val_loader, iou_thres=0.5, num_epochs=10):
    train_losses = []
    eval_threatscores = []
    eval_final_res = []
    best_eval_ts = 0

    best_model_wts = copy.deepcopy(model.state_dict())
    for epoch in range(num_epochs):
        loss = train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq)
        train_losses.append(loss)
        
        
        final_res, final_TP, final_FP, final_FN, final_num_objects \
        = evaluate_one_epoch(val_loader, iou_thres=0.5)

        print("epoch: {}".format(epoch))
        print("final_TP {}, final FP {}, final FN {}".format(final_TP, final_FP, final_FN))
        #print(final_TP, final_FP, final_FN, final_num_objects)
        
        eval_final_res.append(final_res)
        eval_ts = evaluate_threst_score(final_TP, final_FP, final_FN)
        eval_threatscores.append(eval_ts)
        if epoch % 2 == 0:
            print("epoch: {} eval_ts {}".format(epoch, eval_ts))

        if eval_ts > best_eval_ts:
            best_eval_ts = eval_ts 
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(best_model_wts, '/scratch/nhl256/dl_project/model/object_detection_resnet18_epoch{}.pth'.format(epoch))

    return model, best_model_wts, train_losses, eval_final_res

In [None]:
model, best_model_wts, train_losses, eval_final_res = train_eval(model, train_loader,
                                                                 val_loader, iou_thres=0.5,
                                                                 num_epochs=10)

Epoch: [0]  [   0/2394]  eta: 1:22:31  lr: 0.000000  loss: 1.6030 (1.6030)  loss_classifier: 0.0847 (0.0847)  loss_box_reg: 0.0013 (0.0013)  loss_objectness: 0.8603 (0.8603)  loss_rpn_box_reg: 0.6566 (0.6566)  time: 2.0682  data: 0.3301  max mem: 2009
Epoch: [0]  [ 500/2394]  eta: 0:43:39  lr: 0.000050  loss: 0.6638 (1.0159)  loss_classifier: 0.1096 (0.1735)  loss_box_reg: 0.0065 (0.0196)  loss_objectness: 0.0306 (0.2391)  loss_rpn_box_reg: 0.4105 (0.5837)  time: 1.4023  data: 0.0037  max mem: 2688


In [None]:
eval_final_res

In [None]:
train_losses


In [None]:
torch.save(best_model_wts, '/scratch/nhl256/dl_project/model/object_detection_resnet18.pth')

In [None]:
print('a')

## Customize Fast RCNN

In [0]:
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

#### 1. Mobilenet_v2

In [0]:
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                    aspect_ratios=((0.5, 1.0, 2.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)
model = torchvision.models.detection.faster_rcnn.FasterRCNN(backbone,
                    num_classes=21,
                    rpn_anchor_generator=anchor_generator,
                    box_roi_pool=roi_pooler)

NameError: ignored

In [0]:
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)

#### 2. CustomVGG16

In [0]:
def customize_VGG16():
    model = torchvision.models.vgg16(pretrained=True)
    
    features = list(model.features)[:30]
    classifier = model.classifier
    
    classifier = list(classifier)
    # delete the Linear layer
    del classifier[6]
    classifier = nn.Sequential(*classifier)

    #freeze top4 conv layer
    for layer in features[:10]:
        for p in layer.parameters():
            p.requires_grad = False
    features = nn.Sequential(*features)
        
    return features, classifier
backbone, box_head = customize_VGG16()
backbone.out_channels = 512
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                           aspect_ratios=((0.5, 1.0, 2.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)
