90% of the model is thanks to [Centernet-better](https://github.com/FateScript/CenterNet-better). I just made small modifications to it to make it easier to use for my dataset.

In [1]:
import torch
from torch import nn, optim
from config import config
from net import load_model
from Dataset.utils import *
from train import train

# Load model

If you provide a path to model weights, it loads them in, otherwise you must download the pretrained Resnet50 model that the author's of Centernet-better provide and put it in the weights folder. If freeze is True, it freezes the backbone and the upsample layers

In [2]:
model, cfg = load_model(config, num_classes=15, path=None, freeze=True)

# Load data
The output of the data loading are data loaders that have an image (normalized) and bounding boxes

In [3]:
train_data, val_data = load_train_data('../../data/train/files.txt')

In [4]:
train_dl = get_data_loader(train_data, '../../data/train', augment=True)
val_dl = get_data_loader(val_data, '../../data/train', shuffle=False)

The bounding box output is a max_boxesX5 tensor. The first 4 columns are the bounding box (pascal format) and the 5th column is the class. If there are ```k < max_boxes``` boxes, rows ```k:max_boxes-1``` will be -1

In [5]:
next(iter(train_dl))[1][0]

tensor([[440.0000, 262.5000, 626.5000, 454.5000,  14.0000],
        [282.0000, 282.0000, 438.5000, 502.0000,   5.0000],
        [ -1.0000,  -1.0000,  -1.0000,  -1.0000,  -1.0000],
        [ -1.0000,  -1.0000,  -1.0000,  -1.0000,  -1.0000],
        [ -1.0000,  -1.0000,  -1.0000,  -1.0000,  -1.0000],
        [ -1.0000,  -1.0000,  -1.0000,  -1.0000,  -1.0000],
        [ -1.0000,  -1.0000,  -1.0000,  -1.0000,  -1.0000],
        [ -1.0000,  -1.0000,  -1.0000,  -1.0000,  -1.0000],
        [ -1.0000,  -1.0000,  -1.0000,  -1.0000,  -1.0000],
        [ -1.0000,  -1.0000,  -1.0000,  -1.0000,  -1.0000]])

# Train
The train function takes in train and val data loaders, model, optimizer, and an optional lr scheduler. Early stopping is automatically applied if there are 10 epochs with no improvement.
Be sure to also adjust the number of epochs needed and checkpoint folder if you desire

In [6]:
optimizer = optim.Adam(model.head.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=.5)
train(train_dl, val_dl, model, optimizer, num_epochs=1, scheduler=scheduler)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch 0: train loss: 2968.8584, val loss: 1705.6133


In [7]:
model

CenterNet(
  (backbone): ResnetBackbone(
    (stage0): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (stage1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        

## Evaluate
I didn't get to implementing IOU and the like because it requires a lot of processing. It may be added later. What I do have for eval is: 
- Recall calculator for a given threshold. This is done simply by taking the difference between ground truth labels and predicted labels. I do not check if they have matching IOUs. If I did that, I could probably implement IOUs
- Deduping predicted boxes with high IOUs (selecting the box that has the higher score)

In [3]:
model, cfg = load_model(config, num_classes=15, path='model/weights/model_40_epochs.pt', freeze=True)

In [4]:
test_dl = load_test_data('../../data/test')

In [7]:
from collections import Counter

def remove_extra_boxes(boxes, classes):
    good_boxes, good_classes = [], []
    for box, c in zip(boxes, classes):
        if not_repeat(box, good_boxes):
            good_boxes.append(box), good_classes.append(c)
    return torch.stack(good_boxes), torch.stack(good_classes)
        
def not_repeat(box, boxes, threshold=.7):
    for b in boxes:
        if iou_pytorch(box, b) > threshold:
            return False
    return True
        
def iou_pytorch(outputs: torch.Tensor, labels: torch.Tensor):
    outputs_area = (outputs[2] - outputs[0]) * (outputs[3] - outputs[1])
    labels_area = (labels[2] - labels[0]) * (labels[3] - labels[1])
    
    # get area of intersection
    mins = torch.maximum(outputs[:2], labels[:2])
    maxs = torch.minimum(outputs[2:], labels[2:])
    intersection = (maxs[0] - mins[0]) * (maxs[1] - mins[1])
    union = outputs_area + labels_area - intersection
    return intersection / union

rec = 0
total = 0
for inp, gt, img in test_dl:
    model.eval()
    out = model(inp)
    gt = gt[0]
    gt = gt[gt[:,-1] > -1]
    good_boxes = out[0]['instances']._fields['scores'] > .4
    pred_boxes = out[0]['instances']._fields['pred_boxes'].tensor[good_boxes].cpu()#.numpy()
    pred_classes = out[0]['instances']._fields['pred_classes'][good_boxes].cpu()#.numpy()
    total += len(gt)
    try:
        boxes, classes = remove_extra_boxes(pred_boxes, pred_classes)
        classes = classes.numpy()
        true_classes = Counter(gt[:,-1].numpy())
        for c in classes:
            if c in true_classes and true_classes[c] > 0:
                true_classes[c] -= 1
        rec += sum(true_classes.values())
    except:
        continue # if error, it means that no boxes predicted
print('recall:', 1 - rec / total)   

recall: 0.8468085106382979
