In [131]:
#calculating ssd loss

In [1]:
import os
import json
import time
import copy
from copy import deepcopy
from collections import defaultdict

import numpy as np
import math
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, models

from skimage import io

import matplotlib.pyplot as plt
from matplotlib import patches, patheffects

import imgaug as ia
from imgaug import augmenters as iaa

from sklearn.model_selection import train_test_split

from tqdm import tqdm

In [2]:
IMG_SIZE = 224
BATCH_SIZE = 16
VAL_SIZE =0.33

In [3]:
DEVICE = torch.device("cpu")

In [5]:
NUM_CLASS = 21
NUM_CLASS_wo_BG = NUM_CLASS - 1 # as background

In [6]:
anc_grid = 4
k = 1

anc_offset = 1/(anc_grid*2)
anc_x = np.repeat(np.linspace(anc_offset, 1-anc_offset, anc_grid), anc_grid)
anc_y = np.tile(np.linspace(anc_offset, 1-anc_offset, anc_grid), anc_grid)

anc_ctrs = np.tile(np.stack([anc_x,anc_y], axis=1), (k,1))
anc_sizes = np.array([[1/anc_grid,1/anc_grid] for i in range(anc_grid*anc_grid)])

anchors = torch.tensor(np.concatenate([anc_ctrs, anc_sizes], axis=1), requires_grad=False).float()

In [11]:
anchors

tensor([[ 0.1250,  0.1250,  0.2500,  0.2500],
        [ 0.1250,  0.3750,  0.2500,  0.2500],
        [ 0.1250,  0.6250,  0.2500,  0.2500],
        [ 0.1250,  0.8750,  0.2500,  0.2500],
        [ 0.3750,  0.1250,  0.2500,  0.2500],
        [ 0.3750,  0.3750,  0.2500,  0.2500],
        [ 0.3750,  0.6250,  0.2500,  0.2500],
        [ 0.3750,  0.8750,  0.2500,  0.2500],
        [ 0.6250,  0.1250,  0.2500,  0.2500],
        [ 0.6250,  0.3750,  0.2500,  0.2500],
        [ 0.6250,  0.6250,  0.2500,  0.2500],
        [ 0.6250,  0.8750,  0.2500,  0.2500],
        [ 0.8750,  0.1250,  0.2500,  0.2500],
        [ 0.8750,  0.3750,  0.2500,  0.2500],
        [ 0.8750,  0.6250,  0.2500,  0.2500],
        [ 0.8750,  0.8750,  0.2500,  0.2500]])

In [7]:
grid_sizes = torch.tensor(np.array([1/anc_grid]), requires_grad=False).float().unsqueeze(1)

In [8]:
grid_sizes

tensor([[ 0.2500]])

In [9]:
def hw2corners(ctr, hw): return torch.cat([ctr-hw/2, ctr+hw/2], dim=1)

In [36]:
anchor_cnr = hw2corners(anchors[:,:2], anchors[:,2:])
anchor_cnr

tensor([[ 0.0000,  0.0000,  0.2500,  0.2500],
        [ 0.0000,  0.2500,  0.2500,  0.5000],
        [ 0.0000,  0.5000,  0.2500,  0.7500],
        [ 0.0000,  0.7500,  0.2500,  1.0000],
        [ 0.2500,  0.0000,  0.5000,  0.2500],
        [ 0.2500,  0.2500,  0.5000,  0.5000],
        [ 0.2500,  0.5000,  0.5000,  0.7500],
        [ 0.2500,  0.7500,  0.5000,  1.0000],
        [ 0.5000,  0.0000,  0.7500,  0.2500],
        [ 0.5000,  0.2500,  0.7500,  0.5000],
        [ 0.5000,  0.5000,  0.7500,  0.7500],
        [ 0.5000,  0.7500,  0.7500,  1.0000],
        [ 0.7500,  0.0000,  1.0000,  0.2500],
        [ 0.7500,  0.2500,  1.0000,  0.5000],
        [ 0.7500,  0.5000,  1.0000,  0.7500],
        [ 0.7500,  0.7500,  1.0000,  1.0000]])

In [155]:
#start experiment on bbox loss

In [37]:
#prep y_true_bboxes
y_true_bbox = np.array([
    (92, 0, 142, 74),
    (150, 81, 207, 206),
    (152, 109, 222, 140),
    (159, 19, 222, 123), 
    (115, 186, 150, 203)
])
y_true_bbox = torch.from_numpy(y_true_bbox).float()
y_true_bbox_tfm = y_true_bbox/IMG_SIZE

y_true_bbox_tfm

tensor([[ 0.4107,  0.0000,  0.6339,  0.3304],
        [ 0.6696,  0.3616,  0.9241,  0.9196],
        [ 0.6786,  0.4866,  0.9911,  0.6250],
        [ 0.7098,  0.0848,  0.9911,  0.5491],
        [ 0.5134,  0.8304,  0.6696,  0.9062]])

In [108]:
#prep y_pred_bboxes
y_pred_bboxes = torch.from_numpy(np.random.randn(anc_grid**2 * k, 4)).float()
y_pred_bboxes = F.tanh(y_pred_bboxes) #squish between -1 to +1
y_pred_bboxes_tfm_center = (y_pred_bboxes[:, :2] / 2 * grid_sizes) + anchors[:, :2]
y_pred_bboxes_tfm_hw = (y_pred_bboxes[:, 2:] / 2 + 1) * anchors[:, 2:]
y_pred_bboxes_tfm = hw2corners(y_pred_bboxes_tfm_center, y_pred_bboxes_tfm_hw)
y_pred_bboxes_tfm #min y,x max y, x

tensor([[ 0.1080, -0.0347,  0.2906,  0.3261],
        [ 0.1417,  0.2421,  0.2922,  0.4785],
        [ 0.0196,  0.5356,  0.2044,  0.8610],
        [ 0.0775,  0.6978,  0.3271,  0.8564],
        [ 0.4078, -0.0155,  0.5699,  0.1291],
        [ 0.2069,  0.2003,  0.4721,  0.4676],
        [ 0.2096,  0.6762,  0.4768,  0.8098],
        [ 0.0888,  0.8108,  0.4627,  1.1688],
        [ 0.5177,  0.0201,  0.8106,  0.3266],
        [ 0.4600,  0.1234,  0.6054,  0.4396],
        [ 0.5480,  0.5559,  0.8999,  0.9239],
        [ 0.5890,  0.7912,  0.8085,  0.9416],
        [ 0.7408,  0.0005,  1.0161,  0.3461],
        [ 0.6670,  0.1893,  0.9918,  0.3675],
        [ 0.5752,  0.6059,  0.9471,  0.8310],
        [ 0.6943,  0.6122,  0.9374,  0.9256]])

In [111]:
def intersect(box_a, box_b):
    """
    box_a & box_b: min-y, min-x, max-y, max-x
    """
    max_xy = torch.min(box_a[:, None, 2:], box_b[None, :, 2:])
    min_xy = torch.max(box_a[:, None, :2], box_b[None, :, :2])
    inter = torch.clamp((max_xy - min_xy), min=0)
    return inter[:, :, 0] * inter[:, :, 1]


def box_sz(b): return ((b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]))


def jaccard(box_a, box_b):
    """
    :param box_a: y_true_bbox x (min-y, min-x, max-y, max-x)
    :param box_b: anchors x (min-y, min-x, max-y, max-x)
    :return: iou
    """
    inter = intersect(box_a, box_b)
    union = box_sz(box_a).unsqueeze(1) + box_sz(box_b).unsqueeze(0) - inter
    return inter / union

In [109]:
overlaps = jaccard(y_true_bbox_tfm.data, anchor_cnr.data)

In [115]:
def map_to_ground_truth(overlaps):
    """
    for each piror-box/predictions assign ground true with highest iou
    then for each ground true assign its highest iou to piror-box;
    
    another way saying is
    
    for each ground true, assign to highest iou piror box
    then for the rest unassigned piror box, assign ground true base highest iou
    """
    prior_overlap, prior_idx = overlaps.max(1)
    gt_overlap, gt_idx = overlaps.max(0)
    gt_overlap[prior_idx] = 1.99
    for i, o in enumerate(prior_idx): 
        gt_idx[o] = i
    return gt_overlap, gt_idx

In [116]:
gt_overlap, gt_idx = map_to_ground_truth(overlaps) #gt_overlap is iou with assigned ground true, gt_idx is idx of assigned ground true

In [123]:
pos = gt_overlap > 0.4 #only count those piror-box : ground-true mapping with greater than 0.4; pos acts as a mask
pos

tensor([ 0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  1,  0,  1,
         1,  0], dtype=torch.uint8)

In [124]:
pos_idx = torch.nonzero(pos)[:, 0] #index of pos
pos_idx

tensor([  8,  11,  13,  14])

In [126]:
gt_bbox = y_pred_bboxes_tfm[gt_idx]

In [129]:
loc_loss = ((y_pred_bboxes_tfm[pos_idx] - gt_bbox[pos_idx]).abs()).mean()

In [130]:
loc_loss

tensor(0.4171)

In [156]:
#start experiment on clf loss

In [163]:
cate = ['BG', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
id2cat = {i: j for i, j in enumerate(cate)}
id2cat

{0: 'BG',
 1: 'aeroplane',
 2: 'bicycle',
 3: 'bird',
 4: 'boat',
 5: 'bottle',
 6: 'bus',
 7: 'car',
 8: 'cat',
 9: 'chair',
 10: 'cow',
 11: 'diningtable',
 12: 'dog',
 13: 'horse',
 14: 'motorbike',
 15: 'person',
 16: 'pottedplant',
 17: 'sheep',
 18: 'sofa',
 19: 'train',
 20: 'tvmonitor'}

In [176]:
#prep y_true_cate

y_true_cate = torch.from_numpy(np.random.randint(low = 1, high=20, size = 5))
y_true_cate

tensor([ 18,   2,  16,  15,   8])

In [175]:
#prep y_pred_cate

y_pred_cate = torch.from_numpy(np.random.randn(anc_grid**2 * k, len(NUM_CLASS))).float()
y_pred_cate

tensor([[-0.3536,  1.3111, -0.0026,  0.5725, -0.7467,  0.5713, -0.0483,
         -0.6868, -1.2230,  0.2332,  0.7010,  0.1773,  0.2344,  0.3787,
         -0.8836,  0.0327,  0.8899, -0.3161, -0.9071,  0.4377,  0.1266],
        [-2.4563,  2.9509,  0.6055,  0.4921, -0.5144, -1.7990, -0.6023,
         -0.4690,  1.9904,  2.0677,  0.0736, -1.4805, -0.0607,  2.1589,
         -0.4984,  2.6848,  0.2851,  0.7166,  0.0909, -2.1143,  1.1359],
        [-0.5366,  0.6620,  1.3101, -0.1347, -0.7330,  1.1757, -0.0354,
          0.2799,  0.5398,  0.5108,  0.5611,  1.6749, -0.8562,  0.5403,
         -0.1553,  1.1331,  0.1431,  0.4295, -0.1932,  1.9006,  0.4027],
        [ 0.8039, -0.0098,  0.9533, -0.0087,  1.1316,  0.6404,  0.6883,
         -1.7918,  0.4554, -0.5598,  0.1084,  0.3894, -0.5494, -0.2221,
         -1.0733,  0.5736, -0.0629,  0.4397, -0.4552, -2.2696, -0.6644],
        [-0.1020,  0.5858,  1.6664, -0.1038,  0.8735,  1.3725, -1.4403,
         -0.1914, -0.0473, -0.3023,  0.5078,  0.4402,  0.167

In [177]:
gt_clas = y_true_cate[gt_idx]

In [179]:
gt_idx

tensor([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  3,  3,
         2,  1])

In [180]:
gt_clas[1 - pos] = 0 #assign masked piror box to BG

In [181]:
gt_clas

tensor([  0,   0,   0,   0,   0,   0,   0,   0,  18,   0,   0,   8,
          0,  15,  16,   0])

In [182]:
def one_hot_embedding(labels, num_classes):
    return torch.eye(num_classes)[labels.data.cpu()]

In [209]:
t = one_hot_embedding(gt_clas, num_classes = NUM_CLASS) #one-hot encoded y_true

In [210]:
t

tensor([[ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        

In [211]:
t.shape

torch.Size([16, 21])

In [212]:
t = t[:, 1:] #remove background from ground true as it doesnt count in error
t.shape

torch.Size([16, 20])

In [213]:
x = y_pred_cate[:, 1:] #remove background from prediction as it doesnt count in error
x.shape

torch.Size([16, 20])

In [214]:
w = None #weight for BCE loss

In [217]:
loss_cate = F.binary_cross_entropy_with_logits(x, t, w, size_average=False) / NUM_CLASS_wo_BG
loss_cate

tensor(13.5746)