In [1]:
%matplotlib inline

In [2]:
import numpy as np
import cv2
import os
from collections import namedtuple
from pathlib import Path
import matplotlib.pyplot as plt
import torch
from torch import tensor as T
from torch import nn
from torch.nn import functional as F
from torch.autograd import Variable as V
import torchvision

In [3]:
from val_transforms import detransform
from vis import Vis
from models.resnet import resnet34
from models.ssd import SSDHead, SSDModel
import utils

In [5]:
from losses import BinaryCrossEntropyLoss
from losses import filter_ground_truth
from losses import intersect
from losses import area
from losses import jaccard_overlap
from losses import box_hw_to_corners
from losses import activation_to_bbox_corners
from losses import map_ground_truth
from losses import create_anchors
from losses import SSDLoss

In [None]:
train_ds, val_ds = utils.load_dataset(Path('../data/kitti_2d'))

In [None]:
len(train_ds), len(val_ds)

In [None]:
im, (boxes, labels) = train_ds[0]
print(im.shape)
print(boxes)
print(labels)
print(train_ds.labels[labels[0]])

In [None]:
im[:,0,0]

In [None]:
im.min(), im.max()

In [None]:
im_orig = train_ds.get_original_image(0)
im_denorm = detransform(im)

In [None]:
im_orig.shape, im_denorm.shape

In [None]:
vis = Vis(train_ds.labels)

In [None]:
vis.show_image(im_orig)

In [None]:
vis.show_image(im_denorm)

In [None]:
train_ds.labels

In [None]:
train_ds.get_label_id('Truck')

In [None]:
vis.show_image_with_boxes(detransform(im), boxes, labels)

In [None]:
im_orig.shape

Image size: 375x1242  
Or 370x1224  
...  
Not uniform size

In [None]:
im_orig.shape[1] / im_orig.shape[0]

Use PyTorch data loaders

In [None]:
train_dl = torch.utils.data.DataLoader(train_ds, collate_fn=train_ds.collate_fn, batch_size=4, shuffle=True, num_workers=4)
val_dl = torch.utils.data.DataLoader(val_ds, collate_fn=val_ds.collate_fn, batch_size=4, shuffle=False, num_workers=4)

In [None]:
for i, batch in enumerate(train_dl):
    print(i, batch[0].shape, len(batch[1]))
    break

In [None]:
batch[1][0]

## Visualize the dataset

In [None]:
it = iter(val_dl)

In [None]:
im_batch, y_batch = next(it)

In [None]:
im_batch.size()

In [None]:
im_batch.dtype

In [None]:
y_batch

In [None]:
im = im_batch[0]
y_boxes = y_batch[0][0]
y_classes = y_batch[0][1]

In [None]:
im.shape

In [None]:
im.dtype

In [None]:
y_boxes.shape

In [None]:
y_boxes

In [None]:
y_classes.shape

In [None]:
y_classes

In [None]:
[val_ds.labels[i] for i in y_classes]

In [None]:
detransform(im).shape

In [None]:
detransform(im).dtype

In [None]:
vis.show_image(detransform(im))

In [None]:
val_ds.get_filename(0)

In [None]:
vis.show_image_with_boxes(detransform(im), y_boxes.numpy(), y_classes.numpy())

# Anchors

Create an array of anchor box centers. Shape: (n * k, 4)  
n is a number of locations.  
k is a number of anchor boxes per location.  
The last dimension is (top, left, height, width) where top and left are coordinates of the center of the box.  

In [None]:
k = 1

In [6]:
anchor_grid_size = 4
anchors = create_anchors(anchor_grid_size)

In [7]:
anchors

tensor([[0.1250, 0.1250, 0.2500, 0.2500],
        [0.1250, 0.3750, 0.2500, 0.2500],
        [0.1250, 0.6250, 0.2500, 0.2500],
        [0.1250, 0.8750, 0.2500, 0.2500],
        [0.3750, 0.1250, 0.2500, 0.2500],
        [0.3750, 0.3750, 0.2500, 0.2500],
        [0.3750, 0.6250, 0.2500, 0.2500],
        [0.3750, 0.8750, 0.2500, 0.2500],
        [0.6250, 0.1250, 0.2500, 0.2500],
        [0.6250, 0.3750, 0.2500, 0.2500],
        [0.6250, 0.6250, 0.2500, 0.2500],
        [0.6250, 0.8750, 0.2500, 0.2500],
        [0.8750, 0.1250, 0.2500, 0.2500],
        [0.8750, 0.3750, 0.2500, 0.2500],
        [0.8750, 0.6250, 0.2500, 0.2500],
        [0.8750, 0.8750, 0.2500, 0.2500]])

Create an array of anchor box corners.  
Shape: (n * k, 4)  
The last dimension is (top, left, bottom, right)

In [None]:
anchor_corners = box_hw_to_corners(anchors)

In [None]:
anchor_corners

## Model

In [None]:
base_model = resnet34(pretrained=True)

In [None]:
print(base_model)

In [None]:
base_model(im.unsqueeze(0)).shape

In [None]:
head = SSDHead(k, len(train_ds.labels), -3.)

In [None]:
ssd = SSDModel(base_model, head)

# Test the model evaluation

In [None]:
loc_activation, class_activation = ssd(im_batch)

In [None]:
# Shape is (batch_size, num_anchors, (num_labels + 1) * k)
class_activation.shape

In [None]:
# Shape is (batch_size, num_anchors, 4 * k)
loc_activation.shape

# Functions for loss calculation

In [None]:
t = V(T(np.array([3,5,5]).astype(np.int64)), requires_grad=False)
one_hot_embedding(t, len(train_ds.labels))

In [None]:
sz=224

In [None]:
# TODO: Rewrite the code to allow height and width to be different
y_boxes_filtered, y_classes_filtered = filter_ground_truth(V(y_boxes), V(y_classes), sz)
y_boxes_filtered, y_classes_filtered

In [None]:
boxes1 = V(T(np.array([[10, 20, 40, 50], [0, 0, 10, 10]])), requires_grad=False)
boxes2 = V(T(np.array([[5, 5, 20, 20], [30, 40, 45, 55]])), requires_grad=False)
print(intersect(boxes1, boxes2))

In [None]:
area(boxes1), area(boxes2)

In [None]:
# We'll call jaccard_overlap with bounding boxes on the left and anchor boxes on the right
overlaps = jaccard_overlap(boxes1, boxes2)
overlaps

In [None]:
# Find the best bounding box for each anchor box
overlaps.max(0)

In [None]:
# Find the best anchor box for each bounding box
overlaps.max(1)

In [None]:
activation_to_bbox_corners(V(loc_activation[0,:,:]), V(anchors), anchor_size)

In [None]:
map_ground_truth(V(y_boxes_filtered), anchor_corners)

In [None]:
V(loc_activation).size()

In [None]:
ssd_loss = SSDLoss(anchors.cuda(), anchor_corners.cuda(), anchor_size, sz, len(train_ds.labels))

ssd_loss.calculate_example_loss(V(y_boxes).cuda(), V(y_classes).cuda(), V(loc_activation).cuda()[0,:,:], V(class_activation).cuda()[0,:,:])

In [None]:
device = 'cuda:0'

In [None]:
loc_v = V(loc_activation, requires_grad=True).to(device)
class_v = V(class_activation, requires_grad=True).to(device)

local_y_batch = [(l.to(device), c.to(device)) for l, c in y_batch]

l = ssd_loss.loss([loc_v, class_v], local_y_batch)
l

# Train the model

In [None]:
ssd.to(device)

In [None]:
import torch.optim as optim
optimizer = optim.Adam(ssd.parameters(), lr=0.001)

output_interval = 100

ssd.train()

for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    running_losses = np.zeros((2,))
    for i, data in enumerate(train_dl, 0):
        # get the inputs
        image_batch, y_batch = data
        local_image_match = image_batch.to(device)
        local_y_batch = [(l.to(device), c.to(device)) for l, c in y_batch]

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        predicted = ssd(local_image_match)
        
        # Just for visualization
        losses = ssd_loss.batch_losses(predicted, local_y_batch)
        running_losses += np.array([losses[0].item(), losses[1].item()])
        
        loss = ssd_loss.loss(predicted, local_y_batch)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % output_interval == output_interval - 1:
            print('[%d, %5d] loss: %.3f, class_loss: %.3f, loc_loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / output_interval, 
                   running_losses[0] / output_interval, running_losses[1] / output_interval))
            running_loss = 0.0
            running_losses = np.zeros((2,))

print('Finished Training')

# Display the results

In [None]:
x, y = next(iter(val_dl))
x = V(x).to(device)
ssd.eval()
batch = ssd(x)
loc_activation, class_activation = batch

In [None]:
x.size(), len(y), y[0][0].size(), y[0][1].size(), loc_activation.size(), class_activation.size()

In [None]:
index = 1
class_activation_one = class_activation[index]
loc_activation_one = loc_activation[index]
image = detransform(x[index], device)
y_boxes, y_classes = y[index][0].to(device), y[index][1].to(device)
y_boxes, y_classes

In [None]:
x.shape, x[index].shape

In [None]:
x.dtype

In [None]:
detransform(x[index], device).shape

In [None]:
type(detransform(x[index], device))

In [None]:
detransform(x[index], device).dtype

In [None]:
image.dtype, image.shape

In [None]:
vis.show_image_with_boxes(image, y_boxes.data.cpu().numpy(), y_classes.data.cpu().numpy())

In [None]:
loc_activation.size(), class_activation.size()

In [None]:
loc_activation_one.size(), class_activation_one.size()

In [None]:
class_activation_one.max(dim=1)[0]

In [None]:
class_activation_one.max(dim=1)[1]

In [None]:
list(enumerate(train_ds.labels))

In [None]:
vis.show_image_with_boxes(image, anchor_corners.data.cpu().numpy() * sz, class_activation_one.max(dim=1)[1].data.cpu().numpy())

In [None]:
loc_activation_one

In [None]:
loc_corners = activation_to_bbox_corners(loc_activation_one, V(anchors).to(device), anchor_size)
loc_corners

In [None]:
vis.show_image_with_boxes(image, loc_corners.data.cpu().numpy() * sz, class_activation_one.max(dim=1)[1].data.cpu().numpy())

In [None]:
import importlib
importlib.reload(transforms)
from transforms import to_numpy_image
import losses
importlib.reload(losses)
from losses import filter_ground_truth
import vis
importlib.reload(vis)
from vis import Vis
vis = Vis(train_ds.labels)