In [1]:
import os
import numpy as np 
import torch
from torch import nn
from torch.nn import functional as F 
import torch.utils.data as td
import torchvision as tv
import pandas as pd
from PIL import Image
from matplotlib import pyplot as plt
import sys
import skimage.io as io
import cv2

In [2]:
sys.path.append(sys.path[0]+'/src/lib') # Add library folder
sys.path.append(sys.path[0]+'/src/lib/models/networks/DCNv2')
from models.model import create_model, load_model, save_model
from trains.ctdet import CtdetTrainer

In [3]:
from opts import opts
from datasets.dataset_factory import get_dataset
from datasets.dataset.coco import COCO
from datasets.sample.ctdet import CTDetDataset
from trains.ctdet import CtdetTrainer
from trains.train_factory import train_factory

In [4]:
sys.path.append(sys.path[0]+'/src/project_tools')
from fcn_opts import fcn_opts
Dataset = get_dataset('coco', 'ctdet')
opt = fcn_opts(Dataset)

List of object categories

In [5]:
coco_class_name = [
     'person', 'bicycle', 'car', 'motorcycle', 'airplane',
     'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
     'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
     'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
     'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
     'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
     'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
     'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
     'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
     'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
     'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
     'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
     'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

## Demo 1 (Pre-Trained vs Our Baseline model with 2048 training samples)

In [6]:
#img_dir=sys.path[0]+'/Test_images/'
img_dir=sys.path[0]+'/images/'
# tst_id=1210
# im_id = valset[tst_id]['meta']['img_id']
# img = valset.coco.loadImgs(im_id)[0]
# f_name=img_dir
# f_name+=img['file_name']
f_name='1.jpg'
f_name=img_dir+f_name
I = io.imread(f_name)

In [7]:
sys.path.append(sys.path[0]+'/src/lib/models/networks/DCNv2')
from detectors.ctdet import CtdetDetector

Setting model parameters using opt

In [8]:
opt.load_model = sys.path[0]+'/models/ctdet_coco_resdcn18.pth' #Base model path
opt.dataset = 'coco' #type of dataset
opt.debugger_theme = 'white'
opt.flip_test = False
opt.K = 100 #maximum number of detections
opt.nms = False #Non-maximal suppresion
opt.vis_thresh = 0.3 #Visualization threshold

In [9]:
sys.path.append(sys.path[0]+"/src/project_tools")
import nntools as nt

Centernet Class defined using the Neural Network class used during the assignments

In [10]:
class Centernet_model(nt.NeuralNetwork,CtdetTrainer):
    def __init__(self,opt,model,optimizer=None,FineTune=True):
        nt.NeuralNetwork.__init__(self)
        CtdetTrainer.__init__(self,opt,model,optimizer=None)
        ## Partial Training of the Network 
        if FineTune:
            for name,param in model.named_parameters():
               if name[0:2]=='hm' or name[0:2]=='re' or name[0:2]=='wh':
                    param.data = 0.1*torch.randn(param.size()) # Random initialization
               else: 
                    param.requires_grad=False
            #print(name,param.requires_grad)
        self.model=model
        self.opt=opt
    def forward(self,x):
        return self.model(x)
    def criterion(self, y, d):
        return self.loss(y,d)

In [None]:
model = create_model(opt.arch, opt.heads, opt.head_conv) #Model creation with pre-trained weights
model =load_model(model,sys.path[0]+'/models/ctdet_coco_resdcn18.pth');
net = model;
net = net.to(opt.device)
net.eval(); #Set the network in eval mode for testing

=> loading pretrained model https://download.pytorch.org/models/resnet18-5c106cde.pth
=> init deconv weights from normal distribution
loaded /datasets/home/home-01/30/230/psarangi/Final_proj/MoDL_CenterNet/models/ctdet_coco_resdcn18.pth, epoch 140


In [None]:
detector = CtdetDetector(opt) #Detector class for performing object detection
detector.model=net
ret = detector.run(f_name)
show_txt = True
#Drawing Bounding Boxes
for c_id in range(80):
    for j in range(ret['results'][c_id+1].shape[0]):
        if ret['results'][c_id+1][j][4]>=opt.vis_thresh:
            bbox = ret['results'][c_id+1][j]
            cv2.rectangle(I, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 2)
            if show_txt:
                txt = '{}{:.1f}'.format(coco_class_name[c_id], bbox[4]) # text+confidence
                font = cv2.FONT_HERSHEY_SIMPLEX
                cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
                cv2.rectangle(I, (bbox[0], int(bbox[1] - cat_size[1] - 2)),(int(bbox[0] + cat_size[0]), int(bbox[1] - 2)), (0,255,0), -1)
                cv2.putText(I, txt, (bbox[0], int(bbox[1] - 2)), font, 0.5, (0, 0, 0),thickness=1, lineType=cv2.LINE_AA)

In [None]:
model2 = create_model(opt.arch, opt.heads, opt.head_conv)
model2 =load_model(model2,sys.path[0]+'/models/ctdet_coco_resdcn18.pth');
net2 = Centernet_model(opt,model2);
net2 = net2.to(opt.device)
#Load the baseline model trained with 2048 Training points-ResNet18 Backbone
checkpoint2 = torch.load(sys.path[0]+'/models/baseline_resnet_2048.pth.tar')
#print(checkpoint['Net'])
net2.load_state_dict(checkpoint2['Net'])
#net2.load_state_dict(torch.load(sys.path[0]+'/BaseExperiment/checkpoint.pth.tar'))
net2.eval();

In [None]:
#opt.load_model = sys.path[0]+'/Experiment_upsamp2/checkpoint.pth.tar'
I2 = io.imread(f_name)
detector2 = CtdetDetector(opt)
detector2.model=net2
ret = detector2.run(f_name)
show_txt = True
#Drawing Bounding Boxes
for c_id in range(80):
    for j in range(ret['results'][c_id+1].shape[0]):
        if ret['results'][c_id+1][j][4]>=opt.vis_thresh:
            bbox = ret['results'][c_id+1][j]
            cv2.rectangle(I2, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 2)
            if show_txt:
                txt = '{}{:.1f}'.format(coco_class_name[c_id], bbox[4]) # text+confidence
                font = cv2.FONT_HERSHEY_SIMPLEX
                cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
                cv2.rectangle(I2, (bbox[0], int(bbox[1] - cat_size[1] - 2)),(int(bbox[0] + cat_size[0]), int(bbox[1] - 2)), (0,255,0), -1)
                cv2.putText(I2, txt, (bbox[0], int(bbox[1] - 2)), font, 0.5, (0, 0, 0),thickness=1, lineType=cv2.LINE_AA)


#fig, axes = plt.subplots(figsize=(13,13))
fig, axes = plt.subplots(nrows=1, ncols=2,figsize=(20,20))
#fig, axes = plt.subplots(nrows=1, ncols=2)
axes[0].imshow(I)
axes[0].axis('off')
axes[1].imshow(I2)
axes[1].axis('off')
plt.show

## Demo 2: Architecture Demo (DLA vs Resnet Backbone)

In [None]:
f_name='2.jpg'
f_name=img_dir+f_name
I2 = io.imread(f_name)
print(f_name)

In [None]:
ret = detector2.run(f_name)
show_txt = True
#Drawing Bounding Boxes
for c_id in range(80):
    for j in range(ret['results'][c_id+1].shape[0]):
        if ret['results'][c_id+1][j][4]>=opt.vis_thresh:
            bbox = ret['results'][c_id+1][j]
            cv2.rectangle(I2, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 2)
            if show_txt:
                txt = '{}{:.1f}'.format(coco_class_name[c_id], bbox[4]) # text+confidence
                font = cv2.FONT_HERSHEY_SIMPLEX
                cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
                cv2.rectangle(I2, (bbox[0], int(bbox[1] - cat_size[1] - 2)),(int(bbox[0] + cat_size[0]), int(bbox[1] - 2)), (0,255,0), -1)
                cv2.putText(I2, txt, (bbox[0], int(bbox[1] - 2)), font, 0.5, (0, 0, 0),thickness=1, lineType=cv2.LINE_AA)



In [None]:
opt2 = fcn_opts(Dataset)
opt2.arch='dla_34'
opt2.head_conv = 256
model3 = create_model(opt2.arch, opt2.heads, opt2.head_conv)
net3 = Centernet_model(opt2,model3);
net3 = net3.to(opt2.device)
#print(sys.path[0]+'/Test_images/checkpoint.pth.tar')
#Load the baseline model trained with 2048 Training points-DLA Backbone
checkpoint = torch.load(sys.path[0]+'/models/baseline_dla_2048.pth.tar')
#print(checkpoint['Net'])
net3.load_state_dict(checkpoint['Net'])
#net2.load_state_dict(torch.load(sys.path[0]+'/BaseExperiment/checkpoint.pth.tar'))
net3.eval();

In [None]:
#opt.load_model = sys.path[0]+'/Experiment_upsamp2/checkpoint.pth.tar'
opt2.load_model = sys.path[0]+'/models/ctdet_coco_dla_1x.pth'
opt2.dataset = 'coco'
opt2.debugger_theme = 'white'
I3 = io.imread(f_name)
detector3 = CtdetDetector(opt2)
detector3.model=net3
ret = detector3.run(f_name)
show_txt = True
#Drawing Bounding Boxes
for c_id in range(80):
    for j in range(ret['results'][c_id+1].shape[0]):
        if ret['results'][c_id+1][j][4]>=opt2.vis_thresh:
            bbox = ret['results'][c_id+1][j]
            cv2.rectangle(I3, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 2)
            if show_txt:
                txt = '{}{:.1f}'.format(coco_class_name[c_id], bbox[4]) # text+confidence
                font = cv2.FONT_HERSHEY_SIMPLEX
                cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
                cv2.rectangle(I3, (bbox[0], int(bbox[1] - cat_size[1] - 2)),(int(bbox[0] + cat_size[0]), int(bbox[1] - 2)), (0,255,0), -1)
                cv2.putText(I3, txt, (bbox[0], int(bbox[1] - 2)), font, 0.5, (0, 0, 0),thickness=1, lineType=cv2.LINE_AA)


#fig, axes = plt.subplots(figsize=(13,13))
fig, axes = plt.subplots(nrows=1, ncols=2,figsize=(20,20))
#fig, axes = plt.subplots(nrows=1, ncols=2)
axes[0].imshow(I2)
axes[0].axis('off')
axes[1].imshow(I3)
axes[1].axis('off')
plt.show
plt.savefig('dla_v_resnet.eps',format='eps',bbox_inches='tight')

## Demo 3 (Our Baseline model with 2048 training samples with and without Augmentation during training)

In [None]:
f_name='12.jpg'
f_name=img_dir+f_name
I2 = io.imread(f_name)
(h, w) = I2.shape[:2]
M = cv2.getRotationMatrix2D((w / 2, h / 2),25, 0.6)
I2_ = cv2.warpAffine(I2, M, (w, h))

In [None]:
model2 = create_model(opt.arch, opt.heads, opt.head_conv)
model2 =load_model(model2,sys.path[0]+'/models/ctdet_coco_resdcn18.pth');
net2 = Centernet_model(opt,model2);
net2 = net2.to(opt.device)
#Load the baseline model trained with 2048 Training points-ResNet18 Backbone
checkpoint2 = torch.load(sys.path[0]+'/models/baseline_resnet_2048.pth.tar')
#print(checkpoint['Net'])
net2.load_state_dict(checkpoint2['Net'])
net2.eval();
detector = CtdetDetector(opt)
detector.model=net2
ret = detector.run(I2_)
show_txt = True
#Drawing Bounding Boxes
for c_id in range(80):
    for j in range(ret['results'][c_id+1].shape[0]):
        if ret['results'][c_id+1][j][4]>=opt.vis_thresh:
            bbox = ret['results'][c_id+1][j]
            cv2.rectangle(I2_, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 2)
            if show_txt:
                txt = '{}{:.1f}'.format(coco_class_name[c_id], bbox[4]) # text+confidence
                font = cv2.FONT_HERSHEY_SIMPLEX
                cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
                cv2.rectangle(I2_, (bbox[0], int(bbox[1] - cat_size[1] - 2)),(int(bbox[0] + cat_size[0]), int(bbox[1] - 2)), (0,255,0), -1)
                cv2.putText(I2_, txt, (bbox[0], int(bbox[1] - 2)), font, 0.5, (0, 0, 0),thickness=1, lineType=cv2.LINE_AA)



In [None]:
model3 = create_model(opt.arch, opt.heads, opt.head_conv)
net4 = Centernet_model(opt,model3);
net4 = net4.to(opt.device)
##Load the baseline model trained with 2048 Training points- But no Augmentation such as flip, rotation or scaling
checkpoint = torch.load(sys.path[0]+'/models/no_aug_model.pth.tar')
net4.load_state_dict(checkpoint['Net'])
net4.eval();

Adding a smal Rotation and scaling transformation leads to misidentification with the same visualization threshold

In [None]:
I3 = io.imread(f_name)
I3_ = cv2.warpAffine(I3, M, (w, h))
detector2 = CtdetDetector(opt)
detector2.model=net4
ret = detector2.run(I3_)
show_txt = True
#Drawing Bounding Boxes
for c_id in range(80):
    for j in range(ret['results'][c_id+1].shape[0]):
        if ret['results'][c_id+1][j][4]>=opt.vis_thresh:
            bbox = ret['results'][c_id+1][j]
            cv2.rectangle(I3_, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 2)
            if show_txt:
                txt = '{}{:.1f}'.format(coco_class_name[c_id], bbox[4]) # text+confidence
                font = cv2.FONT_HERSHEY_SIMPLEX
                cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
                cv2.rectangle(I3_, (bbox[0], int(bbox[1] - cat_size[1] - 2)),(int(bbox[0] + cat_size[0]), int(bbox[1] - 2)), (0,255,0), -1)
                cv2.putText(I3_, txt, (bbox[0], int(bbox[1] - 2)), font, 0.5, (0, 0, 0),thickness=1, lineType=cv2.LINE_AA)



fig, axes = plt.subplots(nrows=1, ncols=2,figsize=(20,20))
axes[0].imshow(I2_)
axes[0].axis('off')
axes[1].imshow(I3_)
axes[1].axis('off')
plt.show
plt.savefig('aug.eps',format='eps',bbox_inches='tight')

## Demo 4: Upsampling Architecture

In [None]:
f_name='10.jpg'
f_name=img_dir+f_name
I2 = io.imread(f_name)

## Illustrating small object detection (Tie) which was not recognized by base network

In [None]:
opt = fcn_opts(Dataset)
opt.load_model = sys.path[0]+'/models/ctdet_coco_resdcn18.pth'
opt.vis_thresh=0.2
model = create_model(opt.arch, opt.heads, opt.head_conv)
net2 = Centernet_model(opt,model);
net2 = net2.to(opt.device)
#Load the baseline model trained with 2048 Training points-ResNet18 Backbone
checkpoint = torch.load(sys.path[0]+'/models/baseline_resnet_2048.pth.tar')
net2.load_state_dict(checkpoint['Net'])
net2.eval();
detector = CtdetDetector(opt)
detector.model=net2
ret = detector.run(f_name)
show_txt = True
#Drawing Bounding Boxes for specific class = Tie
c_id=27
for j in range(ret['results'][c_id+1].shape[0]):
    if ret['results'][c_id+1][j][4]>=opt.vis_thresh:
        bbox = ret['results'][c_id+1][j]
        cv2.rectangle(I2, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 2)
        if show_txt:
            txt = '{}{:.1f}'.format(coco_class_name[c_id], bbox[4]) # text+confidence
            font = cv2.FONT_HERSHEY_SIMPLEX
            cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
            cv2.rectangle(I2, (bbox[0], int(bbox[1] - cat_size[1] - 2)),(int(bbox[0] + cat_size[0]), int(bbox[1] - 2)), (0,255,0), -1)
            cv2.putText(I2, txt, (bbox[0], int(bbox[1] - 2)), font, 0.5, (0, 0, 0),thickness=1, lineType=cv2.LINE_AA)

In [None]:
net5 = Centernet_model(opt,model);
net5 = net5.to(opt.device)
#Load the model trained with 2048 Training and With High-resolution Object detection output
#Objective: Show better detection of smaller scale objects
checkpoint = torch.load(sys.path[0]+'/models/upsample_model.pth.tar')
net5.load_state_dict(checkpoint['Net'])
net5.eval();

In [None]:
I3 = io.imread(f_name)
detector2 = CtdetDetector(opt)
detector2.model=net5
ret = detector2.run(f_name)
show_txt = True
#Drawing Bounding Boxes for specific class = Tie
#c_id=27
for j in range(ret['results'][c_id+1].shape[0]):
    if ret['results'][c_id+1][j][4]>=opt.vis_thresh:
        bbox = ret['results'][c_id+1][j]
        cv2.rectangle(I3, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 2)
        if show_txt:
            txt = '{}{:.1f}'.format(coco_class_name[c_id], bbox[4]) # text+confidence
            font = cv2.FONT_HERSHEY_SIMPLEX
            cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
            cv2.rectangle(I3, (bbox[0], int(bbox[1] - cat_size[1] - 2)),(int(bbox[0] + cat_size[0]), int(bbox[1] - 2)), (0,255,0), -1)
            cv2.putText(I3, txt, (bbox[0], int(bbox[1] - 2)), font, 0.5, (0, 0, 0),thickness=1, lineType=cv2.LINE_AA)


#fig, axes = plt.subplots(figsize=(13,13))
fig, axes = plt.subplots(nrows=1, ncols=2,figsize=(20,20))
#fig, axes = plt.subplots(nrows=1, ncols=2)
axes[0].imshow(I2)
axes[0].axis('off')
axes[1].imshow(I3)
axes[1].axis('off')
plt.show
plt.savefig('small.eps',format='eps',bbox_inches='tight')

In [None]:
opt = fcn_opts(Dataset)
opt.load_model = sys.path[0]+'/models/ctdet_coco_resdcn18.pth'
opt.vis_thresh=0.2
model = create_model(opt.arch, opt.heads, opt.head_conv)
net2 = Centernet_model(opt,model);
net2 = net2.to(opt.device)
checkpoint = torch.load(sys.path[0]+'/models/baseline_resnet_2048.pth.tar')
net2.load_state_dict(checkpoint['Net'])
net2.eval();
detector = CtdetDetector(opt)
detector.model=net2
ret = detector.run(f_name)
show_txt = True
#Drawing Bounding Boxes
for c_id in range(80):
    for j in range(ret['results'][c_id+1].shape[0]):
        if ret['results'][c_id+1][j][4]>=opt.vis_thresh:
            bbox = ret['results'][c_id+1][j]
            cv2.rectangle(I2, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 2)
            if show_txt:
                txt = '{}{:.1f}'.format(coco_class_name[c_id], bbox[4]) # text+confidence
                font = cv2.FONT_HERSHEY_SIMPLEX
                cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
                cv2.rectangle(I2, (bbox[0], int(bbox[1] - cat_size[1] - 2)),(int(bbox[0] + cat_size[0]), int(bbox[1] - 2)), (0,255,0), -1)
                cv2.putText(I2, txt, (bbox[0], int(bbox[1] - 2)), font, 0.5, (0, 0, 0),thickness=1, lineType=cv2.LINE_AA)

In [None]:
I3 = io.imread(f_name)
detector2 = CtdetDetector(opt)
detector2.model=net5
ret = detector2.run(f_name)
show_txt = True
#Drawing Bounding Boxes for specific class = Tie
for c_id in range(80):
    for j in range(ret['results'][c_id+1].shape[0]):
        if ret['results'][c_id+1][j][4]>=opt.vis_thresh:
            bbox = ret['results'][c_id+1][j]
            cv2.rectangle(I3, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 2)
            if show_txt:
                txt = '{}{:.1f}'.format(coco_class_name[c_id], bbox[4]) # text+confidence
                font = cv2.FONT_HERSHEY_SIMPLEX
                cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
                cv2.rectangle(I3, (bbox[0], int(bbox[1] - cat_size[1] - 2)),(int(bbox[0] + cat_size[0]), int(bbox[1] - 2)), (0,255,0), -1)
                cv2.putText(I3, txt, (bbox[0], int(bbox[1] - 2)), font, 0.5, (0, 0, 0),thickness=1, lineType=cv2.LINE_AA)


fig, axes = plt.subplots(nrows=1, ncols=2,figsize=(20,20))
axes[0].imshow(I2)
axes[0].axis('off')
axes[1].imshow(I3)
axes[1].axis('off')
plt.show
plt.savefig('small2.eps',format='eps',bbox_inches='tight')