# SSD for VOC Object Detection

In [None]:
import os
import copy
from pathlib import Path
from functools import partial
from typing import Dict

import torch
from torch.utils.data import Dataset
from fastai.vision.all import (RandomSplitter, DataLoader,
                               DataLoaders, Learner, SaveModelCallback)

from models import SSD, generate_map_data, ssd_body_resnet50
from data_utils import BoxMatcher, VOCDataset, tensor2boxes, predict_image
from training import (loss_func, localization_loss_metric,
                      confidence_loss_metric, recall, precision)

torch.device('cuda')

## Create DataLoaders

In [None]:
IMG_SIZE = 300
MATCH_THRESHOLD = 0.50
BATCH_SIZE = 32
NUM_ITERATIONS = 120_000

# The following values are derived from https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/Detection/SSD/src/utils.py
MAP_SIZES = [38, 19, 10, 5, 3, 1]
STEPS = [37.5, 18.75, 9.375, 4.6875, 3.0, 1.0]
SCALES = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
ASPECT_RATIOS = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]

In [None]:
matcher = BoxMatcher(MAP_SIZES, STEPS, SCALES, ASPECT_RATIOS, MATCH_THRESHOLD)

## Load Data and Create DataLoaders

In [None]:
def get_voc_dataset(data_dir: Path, categories: Dict[int, str] = None, ignore_difficult: bool = False) -> Dataset:
    """Create a dataset object using data in the VOC challenge format"""
    IMG_DIR = data_dir / "JPEGImages"
    ANN_DIR = data_dir / "Annotations"
    ann_fns = [ANN_DIR / path for path in os.listdir(ANN_DIR)]
    
    voc_dataset = VOCDataset(ann_fns, IMG_DIR, IMG_SIZE, matcher, True, categories, ignore_difficult)
    
    return voc_dataset

# Initialize training set with VOC 2012 data and initialise empty validation
# set with identical categories
train_ds = get_voc_dataset(Path("./data/voc2012"))
voc2007 = get_voc_dataset(Path("./data/voc2007"), categories=train_ds.categories)
train_ds.data.extend(voc2007.data)

# Use 5% of training set for validation
valid_ds = copy.deepcopy(train_ds)
_, valid_idxs = RandomSplitter(0.05)(valid_ds.data)
valid_ds.data = [valid_ds.data[idx] for idx in valid_idxs]

train_ds.is_train = True
valid_ds.is_train = False

# Initialize the test set using VOC 2007
test_ds = get_voc_dataset(Path("./data/voc2007_test"), categories=train_ds.categories, ignore_difficult=True)
test_ds.is_train = False

In [None]:
# Create DataLoaders
train_dl = DataLoader(train_ds, bs=BATCH_SIZE, num_workers=32, drop_last=False, shuffle=True, pin_memory=True)
valid_dl = DataLoader(valid_ds, bs=BATCH_SIZE, num_workers=32, drop_last=False, shuffle=True, pin_memory=True)
dls = DataLoaders(train_dl, valid_dl)
dls.to(torch.device('cuda'))

In [None]:
# Visualise Image in Training Dataset
img, targ = train_ds[0]
targ_boxes, targ_classes, _ = tensor2boxes(matcher.default_boxes, targ)
print(f"Matched {len(targ_boxes)} default boxes in this image.")
train_ds.show_img(img, (targ_boxes, targ_classes))

## Train Model

**Note:** the following models are available:
- Resnet18 (`from models import ssd_body_resnet18`)
- Resnet34 (`from models import ssd_body_resnet34`)
- Resnet50 (`from models import ssd_body_resnet50`)
- Resnet101 (`from models import ssd_body_resnet101`)
- Resnet152 (`from models import ssd_body_resnet152`)
- MobileNet v2 (`from models import ssd_body_mobilenet_v2`)

In [None]:
body = ssd_body_resnet50().cuda()
map_data = generate_map_data(IMG_SIZE, MAP_SIZES, body)
boxes_per_cell = [2 + 2*len(ratios) for ratios in ASPECT_RATIOS]
model = SSD(body, map_data, boxes_per_cell, train_ds.num_classes)
model = model.cuda()

In [None]:
learn = Learner(
    dls, model,
    loss_func=loss_func,
    metrics=[localization_loss_metric, confidence_loss_metric, recall, precision],
    model_dir='trained_models',
    cbs=[SaveModelCallback()],
    wd=5e-4,
)

In [None]:
learn.lr_find()

In [None]:
learn.fit_one_cycle(300, lr_max=1e-3, div=100, wd=5e-4)
learn.save('final_model')

## Visualise Model Results

In [None]:
learn.load('model')
model = learn.model
model.eval();

In [None]:
img, targ = train_ds[0]
targ_boxes, targ_classes, _ = tensor2boxes(matcher.default_boxes, targ)
pred_boxes, pred_classes, pred_confs = predict_image(model, img.cuda(), matcher.default_boxes.cuda(), conf_threshold = 0.50, iou_threshold = 0.45)
valid_ds.show_img(img, ([], []), (pred_boxes, pred_classes))

## Calculate mAP

In [None]:
mean_avg_precision = test_ds.calculate_map(model)
print(f"{round(mean_avg_precision * 100, 2)} mAP")