In [1]:
## ALL IMPORTS FOR A NEW NOTEBOOK
__SEED = 0
__N_FOLDS = 5
__NROWS = None

from tqdm.notebook import tqdm
import os, sys, random, math
import matplotlib.pyplot as plt
# %matplotlib inline
plt.style.use('ggplot')

from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
pd.set_option('max_colwidth', 500)
pd.set_option('max_columns', 500)
pd.set_option('max_rows', 500)
import matplotlib.pylab as plt
import seaborn as sns
import itertools as it
import scipy
import glob
import matplotlib
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader
from torch.optim import Optimizer
import torchvision.transforms.transforms as txf
import torch.optim.lr_scheduler as lr_scheduler
from collections import OrderedDict

from sklearn import metrics
from sklearn import preprocessing as pp
from sklearn import model_selection as ms

import ml_utils as mu
import time
import time, datetime, pickle

import torchvision.transforms.functional as FT
# fold1 = ms.StratifiedKFold(n_splits=__N_FOLDS, shuffle=True, random_state=__SEED)
# fold2 = ms.StratifiedKFold(n_splits=__N_FOLDS, shuffle=True, random_state=__SEED+3)
# fold3 = ms.StratifiedKFold(n_splits=__N_FOLDS, shuffle=True, random_state=__SEED+5)
font = {'size'   : 14}
matplotlib.rc('font', **font)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

torch.manual_seed(947)

<torch._C.Generator at 0x7f5747a86d90>

# INPUT PROCESSING and DATASET READING

In [2]:
voc_labels = (
    "background",
    'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
    'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
label_map = {k:v for v,k in enumerate(voc_labels)}
rev_label_map = {v:k for k,v in label_map.items()}
distinct_colors = ["#FFFFFF",
                   '#e6194B', '#3cb44b', '#ffe119', '#4363d8', '#f58231', 
                   '#911eb4', '#42d4f4', '#f032e6', '#bfef45', '#fabebe',
                   '#469990', '#e6beff', '#9A6324', '#fffac8', '#800000',
                   '#aaffc3', '#808000', '#ffd8b1', '#000075', '#a9a9a9']
label_color_map = {k:distinct_colors[i] for i,k in enumerate(voc_labels)}

In [3]:
from xml.etree import ElementTree as ET
import json

### FOR PARSING THE XML ANNOTATIONS OF THE DATASETS ###
def parse_annotations(annotation_path):
    tree = ET.parse(annotation_path)
    
    root = tree.getroot()
    
    boxes = list()
    labels = list()
    difficulties = list()
    
    for object in root.iter('object'):
        difficult = int(object.find("difficult").text=="1")
        label = object.find("name").text.lower().strip()
        
        if label not in label_map:
            continue
        
        bbox = object.find('bndbox')
        xmin = int(bbox.find('xmin').text) - 1
        xmax = int(bbox.find('xmax').text) - 1
        ymin = int(bbox.find('ymin').text) - 1
        ymax = int(bbox.find('ymax').text) - 1
        
        boxes.append([xmin,ymin,xmax,ymax])
        labels.append(label_map[label])
        difficulties.append(difficult)
    return {"boxes":boxes, "labels":labels,"difficulties":difficulties}


### READ XML DESCRIPTIONS AND SAVE AS JSON FOR FASTER ACCESS ###
def create_data_lists(v07, v12, output_folder):
    v07_path = os.path.abspath(v07)
    v12_path = os.path.abspath(v12)
    
    ## PARSE TRAIN ##
    train_images = list()
    train_objects = list()
    n_objects = 0
    
    for path in [v07_path, v12_path]:
        with open(os.path.join(path,"ImageSets/Main/trainval.txt")) as f:
            ids = f.read().splitlines()
            
            for id in ids:
                objects = parse_annotations(os.path.join(path,"Annotations",id+".xml"))
                
                if len(objects["boxes"])==0:
                    continue
                n_objects+=len(objects["boxes"])
                
                train_objects.append(objects)
                train_images.append(os.path.join(path, "JPEGImages", id+'.jpg'))
                
        
    print("TOTAL TRAIN IMAGES: {} == TOTAL TRAIN OBJECTS {}".format(len(train_images), len(train_objects)))
    print("TOTAL OBJECTS IN ALL TRAIN IMAGES: {}\n".format(n_objects))
    
    with open(os.path.join(output_folder, "TRAIN_images.json"), 'w') as j:
        json.dump(train_images, j)
    with open(os.path.join(output_folder, "TRAIN_objects.json"), 'w') as j:
        json.dump(train_objects, j)
    with open(os.path.join(output_folder, "label_map.json"), 'w') as j:
        json.dump(label_map, j)
    
    
    ## PARSE TEST ##
    
    test_images = list()
    test_objects = list()
    n_objects = 0
    
    with open(os.path.join(v07_path, "ImageSets/Main/test.txt")) as f:
        ids = f.read().splitlines()
        
        for id in ids:
            objects = parse_annotations(os.path.join(v07_path, "Annotations", id+".xml"))
            
            if (len(objects["boxes"]))==0:
                continue
            test_objects.append(objects)
            test_images.append(os.path.join(v07_path,"JPEGImages",id+'.jpg'))
            n_objects+=len(objects['boxes'])
        
    print("TOTAL TEST IMAGES: {} == TOTAL TEST OBJECTS {}".format(len(test_images), len(test_objects)))
    print("TOTAL OBJECTS IN ALL TEST IMAGES: {}\n".format(n_objects))
    
    with open(os.path.join(output_folder, "TEST_images.json"), 'w') as j:
        json.dump(test_images, j)
    with open(os.path.join(output_folder, "TEST_objects.json"), 'w') as j:
        json.dump(test_objects, j)

In [4]:
%%time
create_data_lists(v07='./train/VOC2007/',
                      v12='./train/VOC2012',
                      output_folder='./')

TOTAL TRAIN IMAGES: 16551 == TOTAL TRAIN OBJECTS 16551
TOTAL OBJECTS IN ALL TRAIN IMAGES: 47223

TOTAL TEST IMAGES: 4952 == TOTAL TEST OBJECTS 4952
TOTAL OBJECTS IN ALL TEST IMAGES: 14976

CPU times: user 2.9 s, sys: 622 ms, total: 3.52 s
Wall time: 6.56 s


In [7]:
import torchvision.transforms.functional as FT
def transform(image, boxes, labels, difficulties, split, resize_dims=(300,300)):
    assert split in {"TRAIN","TEST", "VALID"}
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    
    new_image = image
    new_boxes = boxes 
    new_labels = labels
    new_difficulties = difficulties
    
    if split == "TRAIN":
        new_image = mu.photometric_distort(new_image)
        new_image = FT.to_tensor(new_image)
        
        if random.random()<0.5:
            new_image, new_boxes = mu.box_expand(new_image, boxes, filler=mean)
        
        new_image, new_boxes, new_labels, new_difficulties = mu.box_random_crop(new_image, new_boxes, new_labels, new_difficulties)
        
        new_image = FT.to_pil_image(new_image)
        
        if random.random()<0.5:
            new_image, new_boxes = mu.box_flip(new_image, new_boxes)
    
    new_image, new_boxes = mu.box_resize(new_image, new_boxes, dims=resize_dims)
    new_image = FT.to_tensor(new_image)
    new_image = FT.normalize(new_image, mean, std)
    return new_image, new_boxes, new_labels, new_difficulties

In [8]:
from PIL import Image


class PascalVOCDataset(Dataset):
    def __init__(self, data_folder, split, keep_diffcult=False):
        self.split = split.upper()
        assert self.split in {"TRAIN", "TEST", "VALID"}
        
        self.data_folder = data_folder
        self.keep_difficult = keep_diffcult
        
        with open(os.path.join(data_folder, self.split+"_images.json"),'r') as j:
            self.images = json.load(j)
        with open(os.path.join(data_folder, self.split+"_objects.json"), 'r') as j:
            self.objects = json.load(j)
        assert len(self.images) == len(self.objects)
    
    def __getitem__(self, i):
        image = self.images[i] # just the path
        objects = self.objects[i]
        
        return (image, objects)
    
    def __len__(self):
        return len(self.images)
    
    def collate_fn(self, batch):
        """
        Each image may have different number of objects and so,
        we need to provide a function to combine these tensors of different sizes.
        """
        
        def read_and_process_image(image_path):
            image = Image.open(image_path, mode='r')
            image = image.convert("RGB")
            return image
        def extract_labels_and_objects(obj_map):
            boxes = torch.FloatTensor(obj_map["boxes"]) # n_obs X 4
            labels = torch.LongTensor(obj_map["labels"]) # n_obs
            difficulties = (torch.ByteTensor(obj_map["difficulties"])==1) # n_obs
            if not self.keep_difficult:
                boxes = boxes[~difficulties]
                labels = labels[~difficulties]
                difficulties = difficulties[~difficulties]
            return boxes, labels, difficulties
        
        
        batched_images = list()
        batched_boxes = list()
        batched_labels = list()
        batched_difficulties = list()
        for b in batch:
            image = read_and_process_image(b[0])
            boxes, labels, difficulties = extract_labels_and_objects(b[1])
            image, boxes, labels, difficulties = transform(image, boxes, labels, difficulties, split=self.split) 
            
            batched_images.append(image)
            batched_boxes.append(boxes)
            batched_labels.append(labels)
            batched_difficulties.append(difficulties)
        batched_images = torch.stack(batched_images, dim=0)
        
        return batched_images, batched_boxes, batched_labels, batched_difficulties # tensor (N, 3, 300, 300) and 3 lists of N tensors each

# MODEL and LOSS FUNCTION

In [10]:
import torchvision

class VGGBase(nn.Module):
    
    def __init__(self):
        super(VGGBase, self).__init__()
        
        self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
        
        self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv5_1 = nn.Conv2d(512,512, kernel_size=3, padding=1)
        self.conv5_2 = nn.Conv2d(512,512, kernel_size=3, padding=1)
        self.conv5_3 = nn.Conv2d(512,512, kernel_size=3, padding=1)
        self.pool5 = nn.MaxPool2d(kernel_size=3, padding=1, stride=1)
        
        self.conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
        self.conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
        
        self.load_pretrained_layers()
    
    def forward(self, image):
        out = F.relu(self.conv1_1(image))
        out = F.relu(self.conv1_2(out))
        out = self.pool1(out)
        
        out = F.relu(self.conv2_1(out))
        out = F.relu(self.conv2_2(out))
        out = self.pool2(out)
        
        out = F.relu(self.conv3_1(out))
        out = F.relu(self.conv3_2(out))
        out = F.relu(self.conv3_3(out))
        out = self.pool3(out)
        
        out = F.relu(self.conv4_1(out))
        out = F.relu(self.conv4_2(out))
        out = F.relu(self.conv4_3(out))
        
        conv_4_3_feats = out
        
        out = self.pool4(out)
        
        out = F.relu(self.conv5_1(out))
        out = F.relu(self.conv5_2(out))
        out = F.relu(self.conv5_3(out))
        out = self.pool5(out)
        
        out = F.relu(self.conv6(out))
        out = F.relu(self.conv7(out))
        
        conv7_feats = out
        
        return conv_4_3_feats, conv7_feats
    
    def load_pretrained_layers(self):
        state_dict = self.state_dict()
        param_names = list(state_dict.keys())
        
        pretrained_state_dict = torchvision.models.vgg16(pretrained=True).state_dict()
        pretrained_param_names = list(pretrained_state_dict.keys())
        
        ## LOADING FIRST UNCHANGED LAYERS' WEIGHTS
        for i, param in enumerate(param_names[:-4]):
            state_dict[param] = pretrained_state_dict[pretrained_param_names[i]]
        
        conv_fc6_weight = pretrained_state_dict['classifier.0.weight'].view(4096, 512, 7, 7)
        conv_fc6_bias = pretrained_state_dict['classifier.0.bias']
        state_dict['conv6.weight'] = mu.decimate(conv_fc6_weight, m=[4,None, 3, 3])
        state_dict['conv6.bias'] = mu.decimate(conv_fc6_bias, m=[4])
        
        conv_fc7_weight = pretrained_state_dict['classifier.3.weight'].view(4096,4096,1,1)
        conv_fc7_bias = pretrained_state_dict['classifier.3.bias']
        state_dict['conv7.weight'] = mu.decimate(conv_fc7_weight, m=[4,4,None,None])
        state_dict['conv7.bias'] = mu.decimate(conv_fc7_bias, m=[4])
        
        self.load_state_dict(state_dict)
        
        print("BASE MODEL LOAD....COMPLETE\n")

In [11]:
class AuxiliaryConvolutions(nn.Module):
    def __init__(self):
        super(AuxiliaryConvolutions, self).__init__()
        
        self.conv8_1 = nn.Conv2d(1024, 256, kernel_size=1, padding=0)
        self.conv8_2 = nn.Conv2d(256, 512, kernel_size=3, padding=1, stride=2)
        
        self.conv9_1 = nn.Conv2d(512, 128, kernel_size=1, padding=0)
        self.conv9_2 = nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2)
        
        self.conv10_1 = nn.Conv2d(256, 128, kernel_size=1, padding=0)
        self.conv10_2 = nn.Conv2d(128, 256, kernel_size=3, padding=0)
        
        self.conv11_1 = nn.Conv2d(256, 128, kernel_size=1, padding=0)
        self.conv11_2 = nn.Conv2d(128, 256, kernel_size=3, padding=0)
        
        
        self.init_conv2d()
    
    def init_conv2d(self):
        for c in self.children():
            if isinstance(c, nn.Conv2d):
                nn.init.xavier_uniform_(c.weight)
                nn.init.constant_(c.bias, 0.0)
    
    def forward(self, conv7_feats):
        out = F.relu(self.conv8_1(conv7_feats))
        out = F.relu(self.conv8_2(out))
        conv8_2_feats = out
        
        out = F.relu(self.conv9_1(out))
        out = F.relu(self.conv9_2(out))
        conv9_2_feats = out
        
        out = F.relu(self.conv10_1(out))
        out = F.relu(self.conv10_2(out))
        conv10_2_feats = out
        
        out = F.relu(self.conv11_1(out))
        out = F.relu(self.conv11_2(out))
        conv11_2_feats = out
        
        return conv8_2_feats, conv9_2_feats, conv10_2_feats, conv11_2_feats

In [12]:
class PredictionConvolutions(nn.Module):
    def __init__(self, n_classes):
        super(PredictionConvolutions, self).__init__()
        
        self.n_classes = n_classes
        
        n_boxes = {
            'conv4_3':4,
            'conv7':6,
            'conv8_2':6,
            'conv9_2':6,
            'conv10_2':4,
            'conv11_2':4
        }
        
        
        self.loc_conv4_3 = nn.Conv2d(512, 4*n_boxes['conv4_3'], kernel_size=3, padding=1)
        self.loc_conv7 = nn.Conv2d(1024, 4*n_boxes['conv7'], kernel_size=3, padding=1)
        self.loc_conv8_2 = nn.Conv2d(512, 4*n_boxes['conv8_2'], kernel_size=3, padding=1)
        self.loc_conv9_2 = nn.Conv2d(256, 4*n_boxes['conv9_2'], kernel_size=3, padding=1)
        self.loc_conv10_2 = nn.Conv2d(256, 4*n_boxes['conv10_2'], kernel_size=3, padding=1)
        self.loc_conv11_2 = nn.Conv2d(256, 4*n_boxes['conv11_2'], kernel_size=3, padding=1)
        
        
        self.cl_conv4_3 = nn.Conv2d(512, self.n_classes*n_boxes['conv4_3'], kernel_size=3, padding=1)
        self.cl_conv7 = nn.Conv2d(1024, self.n_classes*n_boxes['conv7'], kernel_size=3, padding=1)
        self.cl_conv8_2 = nn.Conv2d(512, self.n_classes*n_boxes['conv8_2'], kernel_size=3, padding=1)
        self.cl_conv9_2 = nn.Conv2d(256, self.n_classes*n_boxes['conv9_2'], kernel_size=3, padding=1)
        self.cl_conv10_2 = nn.Conv2d(256, self.n_classes*n_boxes['conv10_2'], kernel_size=3, padding=1)
        self.cl_conv11_2 = nn.Conv2d(256, self.n_classes*n_boxes['conv11_2'], kernel_size=3, padding=1)
        
        self.init_conv2d()
    
    def init_conv2d(self):
        for c in self.children():
            if isinstance(c, nn.Conv2d):
                nn.init.xavier_uniform_(c.weight)
                nn.init.constant_(c.bias, 0.0)
                
    
    
    def forward(self, conv4_3_feats, conv7_feats, conv8_2_feats, conv9_2_feats, conv10_2_feats, conv11_2_feats):
        
        batch_size = conv4_3_feats.size(0)
        
        l_conv4_3 = self.loc_conv4_3(conv4_3_feats).permute(0,2,3,1).contiguous().view(batch_size, -1, 4)
        
        
        l_conv7 = self.loc_conv7(conv7_feats).permute(0,2,3,1).contiguous().view(batch_size, -1, 4)
        l_conv8_2 = self.loc_conv8_2(conv8_2_feats).permute(0,2,3,1).contiguous().view(batch_size, -1, 4)
        l_conv9_2 = self.loc_conv9_2(conv9_2_feats).permute(0,2,3,1).contiguous().view(batch_size, -1, 4)
        l_conv10_2 = self.loc_conv10_2(conv10_2_feats).permute(0,2,3,1).contiguous().view(batch_size, -1, 4)
        l_conv11_2 = self.loc_conv11_2(conv11_2_feats).permute(0,2,3,1).contiguous().view(batch_size, -1, 4)
        
        locs = torch.cat([l_conv4_3, l_conv7, l_conv8_2, l_conv9_2, l_conv10_2, l_conv11_2], dim=1)
        
        c_conv4_3 = self.cl_conv4_3(conv4_3_feats).permute(0,2,3,1).contiguous().view(batch_size, -1, self.n_classes)
        c_conv7 = self.cl_conv7(conv7_feats).permute(0,2,3,1).contiguous().view(batch_size, -1, self.n_classes)
        c_conv8_2 = self.cl_conv8_2(conv8_2_feats).permute(0,2,3,1).contiguous().view(batch_size, -1, self.n_classes)
        c_conv9_2 = self.cl_conv9_2(conv9_2_feats).permute(0,2,3,1).contiguous().view(batch_size, -1, self.n_classes)
        c_conv10_2 = self.cl_conv10_2(conv10_2_feats).permute(0,2,3,1).contiguous().view(batch_size, -1, self.n_classes)
        c_conv11_2 = self.cl_conv11_2(conv11_2_feats).permute(0,2,3,1).contiguous().view(batch_size, -1, self.n_classes)
        
        class_scores = torch.cat([c_conv4_3, c_conv7, c_conv8_2, c_conv9_2, c_conv10_2, c_conv11_2], dim=1)
        
        return locs, class_scores

In [13]:
from math import sqrt

class SSD300(nn.Module):
    def __init__(self, n_classes):
        super(SSD300, self).__init__()
        self.n_classes = n_classes
        
        self.base = VGGBase()
        self.aux_convs = AuxiliaryConvolutions()
        self.pred_convs = PredictionConvolutions(n_classes)
        
        self.rescale_factors = nn.Parameter(torch.FloatTensor(1, 512, 1, 1))
        nn.init.constant_(self.rescale_factors, 20)
        
        self.priors_cxcy = self.create_prior_boxes()
    
    def create_prior_boxes(self):
        fmap_dims = {
            "conv4_3":38,
            "conv7":19,
            "conv8_2":10,
            "conv9_2":5,
            "conv10_2":3,
            "conv11_2":1
        }
        obj_scales = {
            "conv4_3":0.1,
            "conv7":0.2,
            "conv8_2":0.375,
            "conv9_2":0.55,
            "conv10_2":0.725,
            "conv11_2":0.9
        }
        aspect_ratios = {
            "conv4_3": [1.0, 2.0, 0.5],
            "conv7":   [1.0, 2.0, 0.5, 3.0, 0.333],
            "conv8_2": [1.0, 2.0, 0.5, 3.0, 0.333],
            "conv9_2": [1.0, 2.0, 0.5, 3.0, 0.333],
            "conv10_2":[1.0, 2.0, 0.5],
            "conv11_2":[1.0, 2.0, 0.5]
        }
        
        fmaps = list(fmap_dims.keys())
        
        prior_boxes = []
        
        for k, fmap in enumerate(fmaps):
            for i in range(fmap_dims[fmap]):
                for j in range(fmap_dims[fmap]):
                    cx = (j+0.5)/fmap_dims[fmap]
                    cy = (i+0.5)/fmap_dims[fmap]
                    
                    for ratio in aspect_ratios[fmap]:
                        prior_boxes.append(
                            [cx, cy, obj_scales[fmap]*sqrt(ratio), obj_scales[fmap]/sqrt(ratio)])
                        
                        if ratio==1.0:
                            try:
                                additional_scale = sqrt(obj_scales[fmap]*obj_scales[fmaps[k+1]])
                            except IndexError:
                                additional_scale = 1.0
                            
                            prior_boxes.append([cx, cy, additional_scale, additional_scale])
        
        prior_boxes = torch.FloatTensor(prior_boxes).to(device)
        prior_boxes.clamp_(0, 1)
        
        return prior_boxes
    
    def forward(self, image):
        conv4_3_feats, conv7_feats = self.base(image)
        
        norm = conv4_3_feats.norm(dim=1, keepdim=True)+1e-16
        conv4_3_feats = conv4_3_feats/norm
        
        conv4_3_feats = conv4_3_feats*self.rescale_factors
        
        
        conv8_2_feats, conv9_2_feats, conv10_2_feats, conv11_2_feats = self.aux_convs(conv7_feats)
        
        locs, class_scores = self.pred_convs(conv4_3_feats, conv7_feats, conv8_2_feats, 
                                             conv9_2_feats, conv10_2_feats, conv11_2_feats)
        
        return locs, class_scores
    
    
    def detect_objects(self, predicted_locs, predicted_scores, min_score, max_overlap, top_k):
        batch_size = predicted_locs.size(0)
        n_priors = self.priors_cxcy.size(0)
        
        predicted_scores = F.softmax(predicted_scores, dim=-1)
        
        all_images_boxes = list()
        all_images_labels = list()
        all_images_scores = list()
        
        assert n_priors == predicted_locs.size(1) == predicted_scores.size(1)
        
        
        for i in range(batch_size):
            decoded_locs = mu.cxcy_to_xy(mu.gcxgcy_to_cxcy(predicted_locs[i], self.priors_cxcy))
            
            image_boxes = list()
            image_labels = list()
            image_scores = list()
            
#             max_scores, best_label = predicted_scores[i].max(dim=1)
            
            for c in range(1, self.n_classes):
                class_scores  = predicted_scores[i][:, c] # 8732
                scores_above_min_score = class_scores > min_score # 8732, True or False
                n_above_min_score = scores_above_min_score.sum().item() # 1
                
                if n_above_min_score == 0:
                    continue
                
                # valid scores
                class_scores = class_scores[scores_above_min_score] # N_QLF
                # boxes with valid scores
                class_decoded_locs = decoded_locs[scores_above_min_score] # N_QLF X 4
                
                class_scores, sort_ind = class_scores.sort(descending=True)
                class_decoded_locs = class_decoded_locs[sort_ind]
                
                overlap = mu.find_jaccard_overlap(class_decoded_locs, class_decoded_locs)
                
                suppress = torch.zeros((n_above_min_score), dtype=torch.bool).to(device)
                
                for box in range(class_decoded_locs.size(0)):
                    if suppress[box] == True:
                        continue
                    suppress = torch.max(suppress, overlap[box]> max_overlap)
                    suppress[box] = False
                
                image_boxes.append(class_decoded_locs[~suppress])
                image_labels.append(torch.LongTensor((~suppress).sum().item()*[c]).to(device))
                image_scores.append(class_scores[~suppress])
            
            if len(image_boxes)==0:
                image_boxes.append(torch.FloatTensor([[0.0,0.0,1.0,1.0]]).to(device))
                image_labels.append(torch.LongTensor([0]).to(device))
                image_scores.append(torch.FloatTensor([0.0]).to(device))
            
            image_boxes = torch.cat(image_boxes, dim=0)
            image_labels = torch.cat(image_labels, dim=0)
            image_scores = torch.cat(image_scores, dim=0)
            
            n_objects = image_scores.size(0)
            
            
            if n_objects>top_k:
                image_scores, sort_ind = image_scores.sort(dim=0, descending=True)
                image_scores = image_scores[:top_k]
                image_boxes = image_boxes[sort_ind][:top_k]
                image_labels = image_labels[sort_ind][:top_k]
            
            all_images_boxes.append(image_boxes)
            all_images_labels.append(image_labels)
            all_images_scores.append(image_scores)
        
        return all_images_boxes, all_images_labels, all_images_scores

In [14]:
class MultiBoxLoss(nn.Module):
    def __init__(self, priors_cxcy, threshold=0.5, neg_pos_ratio=3, alpha=1.0):
        super(MultiBoxLoss, self).__init__()
        
        self.priors_cxcy = priors_cxcy
        self.priors_xy = mu.cxcy_to_xy(priors_cxcy)
        self.threshold = threshold
        self.neg_pos_ratio = neg_pos_ratio
        self.alpha = alpha
        
        ## smooth??
        self.smooth_l1 = nn.SmoothL1Loss()
        self.cross_entropy = nn.CrossEntropyLoss(reduction='none')
    
    def forward(self, predicted_locs, predicted_scores, boxes, labels):
        
        batch_size = predicted_locs.size(0)
        n_priors = self.priors_cxcy.size(0)
        n_classes = predicted_scores.size(2)
        
        assert n_priors == predicted_locs.size(1) == predicted_locs.size(1)
        
        ## place holders for each image's true labels and boxes
        true_locs = torch.zeros((batch_size, n_priors, 4), dtype=torch.float).to(device)
        true_classes = torch.zeros((batch_size, n_priors), dtype=torch.long).to(device)
        
        # for each image, generate ground truth object class and locations using the priors, i.e. wrt of priors
        for i in range(batch_size):
            n_objects = boxes[i].size(0)
            
            overlap = mu.find_jaccard_overlap(boxes[i], self.priors_xy)
            
            overlap_for_each_prior, object_for_each_prior = overlap.max(dim=0)
            
            overlap_for_each_object, prior_for_each_object = overlap.max(dim=1)
            
            ## What an engineering solution to solve a corner case!!
            object_for_each_prior[prior_for_each_object] = torch.LongTensor(range(n_objects)).to(device)
            overlap_for_each_prior[prior_for_each_object] = 1.0
            
            ## assign object for each prior from the true labels
            label_for_each_prior = labels[i][object_for_each_prior]
            ## remove the object assignments for less than threshold overlap
            label_for_each_prior[overlap_for_each_prior<self.threshold] = 0.0
            
            ## save the true classes  and locs
            true_classes[i] = label_for_each_prior
            true_locs[i] = mu.cxcy_to_gcxgcy(mu.xy_to_cxcy(boxes[i][object_for_each_prior]), self.priors_cxcy)
            
#             if((boxes[i][object_for_each_prior]!=boxes[i][object_for_each_prior]).any()):
#                 print("NAN IN BOXES")
#             if((self.priors_cxcy!=self.priors_cxcy).any()):
#                 print("NAN IN PRIORS CXCY!!")
#                 break
            
#             if ((true_locs[i]!=true_locs[i]).any()):
#                 print("NAN IN TRUE_LOCS!!")
#                 break
        
        
        # find the priors with an object for all batches of images
        positive_priors = true_classes!=0
        
        ## localization loss
        loc_loss = self.smooth_l1(predicted_locs[positive_priors], true_locs[positive_priors])
        
        ## confidence loss
        n_positives = positive_priors.sum(dim=1) # dim=1, because positive_priors is in shape: (batch, 8732)
        n_hard_negatives = self.neg_pos_ratio*n_positives
        
        ## find cross entropy for all predictions
        conf_loss_all = self.cross_entropy(
            predicted_scores.view(-1, n_classes), 
            true_classes.view(-1)
        ).view(batch_size, n_priors) ## (batch_size, 8732)
        
        
        conf_loss_pos = conf_loss_all[positive_priors]
        
        conf_loss_neg = conf_loss_all.clone()
        conf_loss_neg[positive_priors] = 0
        conf_loss_neg, _ = conf_loss_neg.sort(dim=1, descending=True)
        
        # this is basically masking out the n_hard_negatives from each image in the batch, 
        # we could've done it in a easier way
        hardness_rank = torch.LongTensor(range(n_priors)).unsqueeze(0).expand_as(conf_loss_neg).to(device)
        hard_negatives = hardness_rank < n_hard_negatives.unsqueeze(dim=1)
        conf_loss_hard_neg = conf_loss_neg[hard_negatives]
        
        conf_loss = (conf_loss_hard_neg.sum()+conf_loss_pos.sum())/n_positives.sum().float()
        
        total_loss = conf_loss+self.alpha*loc_loss
        
#         if (total_loss!=total_loss).any():
#             file_name = "ERR.txt"
# #             print("MODEL PARAMS", file=open(file_name, "a"))
# #             print(model.named_parameters(), file=open(file_name, "a"))
#             print("LOC LOSS", file=open(file_name, "a"))
#             print(loc_loss, file=open(file_name, "a"))
#             print("CONF LOSS ALL", file=open(file_name, "a"))
#             print(conf_loss_all, file=open(file_name, "a"))
#             print("CONF LOSS POS", file=open(file_name, "a"))
#             print(conf_loss_pos, file=open(file_name, "a"))
#             print("CONF LOSS HARD NEG", file=open(file_name, "a"))
#             print(conf_loss_hard_neg, file=open(file_name, "a"))
#             print("TRUE LOCS", file=open(file_name, "a"))
#             print(true_locs, file=open(file_name, "a"))
#             print(total_loss)
        return total_loss

# TRAINING

In [15]:
data_folder = "./"
keep_difficult = True

n_classes = len(label_map)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

checkpoint = None
batch_size = 16
iterations = 200000
workers = 5
pin_memory = False

model = None

lr = 7e-4
decay_lr_at = [60000,100000,140000,180000]
decay_lr_to = 0.4
momentum = 0.95
weight_decay = 7e-4

grad_clip = 0.5

torch.backends.cudnn.benchmark = True
mu.clear_cuda()
history = pd.DataFrame()

In [22]:
import matplotlib.pyplot as plt
def train_single_epoch(epoch, train_loader, model, optimizer, criterion):
    model = model.train()
    l, a = 0.0, 0.0    
    for i,(images, boxes, labels,_) in tqdm(enumerate(train_loader), total=len(train_loader)):
        optimizer.zero_grad()
        
        images = images.to(device)
        boxes = [b.to(device) for b in boxes]
        labels = [l.to(device) for l in labels]
        
        predicted_locs, predicted_scores = model(images)
        
        loss = criterion(predicted_locs, predicted_scores, boxes, labels)
        
        l+=loss.item()
        loss.backward()
        mu.plot_grad_flow(model.named_parameters())
        
        if grad_clip is not None:
            mu.clip_gradients(model, grad_clip)
        
        optimizer.step()
    
    return l/(len(train_loader)), a/(len(train_loader))

In [17]:
def main():
    global n_classes, label_map, checkpoint, decay_lr_at, decay_lr_to, lr, weight_decay, momentum, history
    global grad_clip, workers, iterations, batch_size, device, data_folder, keep_difficult, pin_memory, model
    
    model = SSD300(n_classes)
    
    if checkpoint is None:
        start_epoch = 0
        biases = list()
        not_biases = list()
        
        for param_name, param in model.named_parameters():
            if param.requires_grad:
                if param_name.endswith(".bias"):
                    biases.append(param)
                else:
                    not_biases.append(param)
        
        optimizer = mu.RAdam(
            params=[{'params':biases,'lr':2*lr}, {'params':not_biases}],
            lr=lr,
            weight_decay=weight_decay
        )
    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch']+1
        print("\nResuming Training From Epoch: {}\n".format(start_epoch))
        state_dict = checkpoint["state_dict"]
        model.load_state_dict(state_dict)
        optimizer = checkpoint["optimizer"]
    
    model = model.to(device)
    
    criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy).to(device)
    
    train_dataset = PascalVOCDataset(data_folder, split='train', keep_diffcult=keep_difficult)
    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True, collate_fn=train_dataset.collate_fn,
        num_workers=workers, pin_memory=pin_memory
    )
    
    total_epochs = iterations//(len(train_dataset)//batch_size)
    decay_lr_at = [it//(len(train_dataset)//batch_size) for it in decay_lr_at]
    
    curt = time.time()
    
    for epoch in range(start_epoch, total_epochs):
        if epoch in decay_lr_at:
            optimizer = mu.adjust_learning_rate(optimizer, decay_lr_to)
        
        train_loss, train_acc = train_single_epoch(epoch, train_loader, model, optimizer, criterion)
        
        mu.print_epoch_stat(epoch, time.time()-curt, history=history, train_loss=train_loss)
        curt = time.time()
        mu.clear_cuda()
        

In [18]:
main()

BASE MODEL LOAD....COMPLETE



HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 1 Completed, Time Taken: 0:06:50.672544
	Train Loss 	5.86313271


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 2 Completed, Time Taken: 0:06:49.860741
	Train Loss 	4.69082259


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 3 Completed, Time Taken: 0:06:50.245378
	Train Loss 	4.40688999


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 4 Completed, Time Taken: 0:06:48.903173
	Train Loss 	4.22826002


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 5 Completed, Time Taken: 0:06:49.046901
	Train Loss 	4.13804521


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 6 Completed, Time Taken: 0:06:51.140551
	Train Loss 	4.02524096


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 7 Completed, Time Taken: 0:06:50.942126
	Train Loss 	3.93791597


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 8 Completed, Time Taken: 0:06:46.636186
	Train Loss 	3.89465265


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 9 Completed, Time Taken: 0:06:47.029177
	Train Loss 	3.84168841


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 10 Completed, Time Taken: 0:06:53.680433
	Train Loss 	3.79936261


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 11 Completed, Time Taken: 0:06:56.410275
	Train Loss 	3.77175202


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 12 Completed, Time Taken: 0:06:47.291773
	Train Loss 	3.76801732


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 13 Completed, Time Taken: 0:06:47.828612
	Train Loss 	3.74044268


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 14 Completed, Time Taken: 0:06:47.335701
	Train Loss 	3.71514846


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 15 Completed, Time Taken: 0:06:48.400446
	Train Loss 	3.7082793


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 16 Completed, Time Taken: 0:06:47.610862
	Train Loss 	3.68507817


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 17 Completed, Time Taken: 0:06:47.353243
	Train Loss 	3.67913531


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 18 Completed, Time Taken: 0:06:48.813681
	Train Loss 	3.68681375


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 19 Completed, Time Taken: 0:06:46.531604
	Train Loss 	3.68381043


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 20 Completed, Time Taken: 0:06:45.954384
	Train Loss 	3.66097912


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 21 Completed, Time Taken: 0:06:56.166275
	Train Loss 	3.67962239


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 22 Completed, Time Taken: 0:06:47.091582
	Train Loss 	3.66886414


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))




EPOCH 23 Completed, Time Taken: 0:06:47.572990
	Train Loss 	3.6801544


HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/numan947/anaconda3/envs/pytorch/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-18-263240bbee7e>", line 1, in <module>
    main()
  File "<ipython-input-17-d34e01bd4a21>", line 51, in main
    train_loss, train_acc = train_single_epoch(epoch, train_loader, model, optimizer, criterion)
  File "<ipython-input-16-d760ea920a51>", line 19, in train_single_epoch
    mu.clip_gradients(model, grad_clip)
  File "/home/numan947/MyHome/AIMLDL/KAGGLE/SSD_MultiBox_Detector/ml_utils.py", line 1295, in clip_gradients
    nn.utils.clip_grad_norm_(model.parameters(), clip_val)
  File "/home/numan947/anaconda3/envs/pytorch/lib/python3.7/site-packages/torch/nn/utils/clip_grad.py", line 33, in clip_grad_norm_
    total_norm += param_norm.item() ** norm_type
KeyboardInterrupt

During handling of the above exception, another exception occurred:



KeyboardInterrupt: 

In [18]:
model = SSD300(n_classes).to(device)
model.load_state_dict(torch.load("./MODEL.pt", map_location=device))

BASE MODEL LOAD....COMPLETE



<All keys matched successfully>

In [23]:
import matplotlib.pyplot as plt

In [25]:
train_dataset = PascalVOCDataset(data_folder, split='train', keep_diffcult=keep_difficult)
train_loader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, collate_fn=train_dataset.collate_fn,
    num_workers=workers, pin_memory=pin_memory
)


In [29]:
plt.figure(figsize=(20,40))
print(train_single_epoch(0,train_loader, model, mu.RAdam(model.parameters(), lr=10e-5, weight_decay=weight_decay),MultiBoxLoss(priors_cxcy=model.priors_cxcy)))
plt.show()

HBox(children=(FloatProgress(value=0.0, max=1035.0), HTML(value='')))

KeyboardInterrupt: 

Error in callback <function flush_figures at 0x7f57c1ff33b0> (for post_execute):


KeyboardInterrupt: 