In [None]:
%matplotlib inline
import mxnet as mx
from mxnet import nd
from mxnet.contrib.ndarray import MultiBoxPrior
from mxnet.gluon.contrib import nn as nn_contrib
from mxnet.gluon import nn
ctx = mx.gpu()

### Predict classes
- channel `i*(num_class+1)` store the scores for this box contains only background
- channel `i*(num_class+1)+1+j` store the scores for this box contains an object from the *j*-th class

In [None]:
def class_predictor(num_anchors, num_classes):
    return nn.Conv2D(num_anchors * (num_classes + 1), 3, padding=1)

### Predict anchor boxes
- $t_x = (Y_x - b_x) / b_{width}$
- $t_y = (Y_y - b_y) / b_{height}$
- $t_{width} = (Y_{width} - b_{width}) / b_{width}$
- $t_{height} = (Y_{height} - b_{height}) / b_{height}$

In [None]:
def box_predictor(num_anchors):
    return nn.Conv2D(num_anchors * 4, 3, padding=1)

### Manage preditions from multiple layers

In [None]:
def flatten_prediction(pred):
    return nd.flatten(nd.transpose(pred, axes=(0, 2, 3, 1)))

def concat_predictions(preds):
    return nd.concat(*preds, dim=1)

### Down-sample features

In [None]:
def dp_layer(nfilters, stride, expension_constant):
    out = nn.HybridSequential()
    out.add(nn.Conv2D(nfilters, 3, strides=stride, padding=1, groups=nfilters, use_bias=False))
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    out.add(nn.Conv2D(nfilters*expension_constant, 1, strides=1, padding=0, use_bias=False))
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    return out

### Scale units

In [None]:
global alpha
alpha = 0.25
num_filters = int(32*alpha)

### Body network

In [None]:
from mxnet import gluon
def s16():
    out = nn.HybridSequential()
    # conv_0 layer
    out.add(nn.Conv2D(num_filters, 3, strides=2, padding=1, use_bias=False))
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    # conv_1 layer
    out.add(dp_layer(num_filters, 1, 2))
    # conv_2 layer
    out.add(dp_layer(num_filters*2, 2, 2))
    # conv_3 layer
    out.add(dp_layer(num_filters*4, 1, 1))
    out.add(nn.Conv2D(num_filters*4, 3, strides=2, padding=1, groups=num_filters*4, use_bias=False))
    out.load_parameters("weights/mobilenet_0_25_s16_org.params", ctx=ctx)
    out.hybridize()
    return out

def s32():
    out = nn.HybridSequential()
    # from last layer
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    out.add(nn.Conv2D(num_filters*8, 1, strides=1, padding=0, use_bias=False))
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    # conv_4_layer
    out.add(dp_layer(num_filters*8, 1, 1))
    out.add(nn.Conv2D(num_filters*8, 3, strides=2, padding=1, groups=num_filters*8, use_bias=False))
    out.load_parameters("weights/mobilenet_0_25_s32_org.params", ctx=ctx)
    out.hybridize()
    return out

def b1():
    out = nn.HybridSequential()
    # from last layer
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    out.add(nn.Conv2D(num_filters*16, 1, strides=1, padding=0, use_bias=False))
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    # conv_6_layer
    out.add(dp_layer(num_filters*16, 1, 1))
    out.add(nn.Conv2D(num_filters*16, 3, strides=2, padding=1, groups=num_filters*16, use_bias=False))
    out.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
    out.hybridize()
    return out

def b2():
    out = nn.HybridSequential()
    # from last layer
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    out.add(nn.Conv2D(num_filters*16, 1, strides=1, padding=0, use_bias=False))
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    # conv_7_layer
    out.add(dp_layer(num_filters*16, 1, 1))
    out.add(nn.Conv2D(num_filters*16, 3, strides=2, padding=1, groups=num_filters*16, use_bias=False))
    out.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
    out.hybridize()
    return out

def b3():
    out = nn.HybridSequential()
    # from last layer
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    out.add(nn.Conv2D(num_filters*16, 1, strides=1, padding=0, use_bias=False))
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    # conv_8_layer
    out.add(dp_layer(num_filters*16, 1, 1))
    out.add(nn.Conv2D(num_filters*16, 3, strides=2, padding=1, groups=num_filters*16, use_bias=False))
    out.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
    out.hybridize()
    return out

def b4():
    out = nn.HybridSequential()
    # from last layer
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    out.add(nn.Conv2D(num_filters*16, 1, strides=1, padding=0, use_bias=False))
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    # conv_9_layer
    out.add(dp_layer(num_filters*16, 1, 1))
    out.add(nn.Conv2D(num_filters*16, 3, strides=2, padding=1, groups=num_filters*16, use_bias=False))
    out.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
    out.hybridize()
    return out

### Create an SSD model

In [None]:
def ssd_model(num_anchors, num_classes):
    class_preds = nn.Sequential()
    box_preds = nn.Sequential()
    
    for scale in range(6):
        class_preds.add(class_predictor(num_anchors, num_classes))
        box_preds.add(box_predictor(num_anchors))
    
    class_preds.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
    box_preds.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
    return s16(), s32(), b1(), b2(), b3(), b4(), class_preds, box_preds

### Forward

In [None]:
def ssd_forward(x, s16, s32, b1, b2, b3, b4, class_preds, box_preds, sizes, ratios):
    default_anchors = []
    predicted_boxes = []  
    predicted_classes = []

    x = s16(x)
    default_anchors.append(MultiBoxPrior(x, sizes=sizes[0], ratios=ratios[0]))
    predicted_boxes.append(flatten_prediction(box_preds[0](x)))
    predicted_classes.append(flatten_prediction(class_preds[0](x)))
    
    x = s32(x).detach()
    default_anchors.append(MultiBoxPrior(x, sizes=sizes[1], ratios=ratios[1]))
    predicted_boxes.append(flatten_prediction(box_preds[1](x)))
    predicted_classes.append(flatten_prediction(class_preds[1](x)))
    
    x = b1(x)
    default_anchors.append(MultiBoxPrior(x, sizes=sizes[2], ratios=ratios[2]))
    predicted_boxes.append(flatten_prediction(box_preds[2](x)))
    predicted_classes.append(flatten_prediction(class_preds[2](x)))
    
    x = b2(x)
    default_anchors.append(MultiBoxPrior(x, sizes=sizes[3], ratios=ratios[3]))
    predicted_boxes.append(flatten_prediction(box_preds[3](x)))
    predicted_classes.append(flatten_prediction(class_preds[3](x)))
    
    x = b3(x)
    default_anchors.append(MultiBoxPrior(x, sizes=sizes[4], ratios=ratios[4]))
    predicted_boxes.append(flatten_prediction(box_preds[4](x)))
    predicted_classes.append(flatten_prediction(class_preds[4](x)))
    
    x = b4(x)
    default_anchors.append(MultiBoxPrior(x, sizes=sizes[5], ratios=ratios[5]))
    predicted_boxes.append(flatten_prediction(box_preds[5](x)))
    predicted_classes.append(flatten_prediction(class_preds[5](x)))

    return default_anchors, predicted_classes, predicted_boxes

### Put all things together

In [None]:
from mxnet import gluon
class SSD(gluon.Block):
    def __init__(self, num_classes, **kwargs):
        super(SSD, self).__init__(**kwargs)
        self.anchor_sizes = [[0.04, 0.1],[0.1,0.26],[0.26,0.42],[0.42,0.58],[0.58,0.74],[0.74,0.9],[0.9,1.06]]
        self.anchor_ratios = [[1, 2, .5]] * 6
        self.num_classes = num_classes

        with self.name_scope():
            self.s16, self.s32, self.b1, self.b2, self.b3, self.b4, self.class_preds, self.box_preds = ssd_model(4, num_classes)
            
    def forward(self, x):
        default_anchors, predicted_classes, predicted_boxes = ssd_forward(x, self.s16, self.s32, self.b1, self.b2, self.b3, self.b4,
            self.class_preds, self.box_preds, self.anchor_sizes, self.anchor_ratios)
        anchors = concat_predictions(default_anchors)
        box_preds = concat_predictions(predicted_boxes)
        class_preds = concat_predictions(predicted_classes)
        class_preds = nd.reshape(class_preds, shape=(0, -1, self.num_classes + 1))
        
        return anchors, class_preds, box_preds

### Outputs of SSD

In [None]:
net = SSD(2)
#net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
net.load_parameters("process/ssd_324.params",ctx=ctx)
x = nd.zeros((1, 3, 512, 512),ctx=ctx)
default_anchors, class_predictions, box_predictions = net(x)
print('Outputs:', 'anchors', default_anchors.shape, 'class prediction', class_predictions.shape, 'box prediction', box_predictions.shape)

### Load dataset

In [None]:
from source.NACDDetection import NACDDetection

train_dataset = NACDDetection(splits=[('NACDwNegswAugCropped', 'train')])
test_dataset = NACDDetection(splits=[('NACDwNegswAugCropped', 'test')])

print('Training images:', len(train_dataset))
print('Test images:', len(test_dataset))

In [None]:
from source import NACDTransform
width, height = 512, 512
train_transform = NACDTransform.NACDDefaultTransform(width, height, False)
test_transform = NACDTransform.NACDDefaultTransform(width, height, True)

In [None]:
from gluoncv.data.transforms import presets
from gluoncv import utils
from mxnet import nd
from matplotlib import pyplot as plt
from gluoncv.utils import viz

In [None]:
train_image, train_label = test_dataset[0]
bboxes = train_label[:, :4]
cids = train_label[:, 4:5]
print('image:', train_image.shape)
print('bboxes:', bboxes.shape, 'class ids:', cids.shape)

In [None]:
train_image2, train_label2 = train_transform(train_image, train_label)
print('tensor shape:', train_image2.shape)

In [None]:
from gluoncv.data.batchify import Tuple, Stack, Pad
from mxnet.gluon.data import DataLoader

batch_size = 16
num_workers = 4

batchify_fn = Tuple(Stack(), Pad(pad_val=-1))
train_loader = DataLoader(train_dataset.transform(train_transform), batch_size, shuffle=True,
                          batchify_fn=batchify_fn, last_batch='rollover', num_workers=num_workers)
test_loader = DataLoader(test_dataset.transform(test_transform), batch_size, shuffle=False,
                        batchify_fn=batchify_fn, last_batch='keep', num_workers=num_workers)

for ib, batch in enumerate(test_loader):
    if ib > 3:
        break
    print('data:', batch[0].shape, 'label:', batch[1].shape)

In [None]:
train_image2 = train_image2.transpose((1, 2, 0)) * nd.array((0.229, 0.224, 0.225)) + nd.array((0.485, 0.456, 0.406))
train_image2 = (train_image2 * 255).clip(0, 255)
ax = viz.plot_bbox(train_image2.asnumpy(), train_label2[:, :4],
                   labels=train_label2[:, 4:5],
                   class_names=train_dataset.classes)
plt.show()

## Train

In [None]:
from mxnet.contrib.ndarray import MultiBoxTarget
def training_targets(default_anchors, class_predicts, labels):
    class_predicts = nd.transpose(class_predicts, axes=(0, 2, 1))
    z = MultiBoxTarget(anchor=default_anchors.as_in_context(mx.cpu()), label=labels.as_in_context(mx.cpu()), cls_pred=class_predicts.as_in_context(mx.cpu()))
    box_target = z[0].as_in_context(ctx)  # box offset target for (x, y, width, height)
    box_mask = z[1].as_in_context(ctx)  # mask is used to ignore box offsets we don't want to penalize, e.g. negative samples
    cls_target = z[2].as_in_context(ctx)  # cls_target is an array of labels for all anchors boxes
    return box_target, box_mask, cls_target

In [None]:
def convertlbl(y):
    mtrx = y[:,:,0:4]
    mtrx = mtrx.asnumpy()
    mtrx[mtrx == -1] = -width
    mtrx = mtrx/512
    return mx.nd.concat(nd.expand_dims(y[:,:,4],2),mx.nd.array(mtrx),dim=2)

In [None]:
class FocalLoss(gluon.loss.Loss):
    def __init__(self, axis=-1, alpha=0.25, gamma=2, batch_axis=0, **kwargs):
        super(FocalLoss, self).__init__(None, batch_axis, **kwargs)
        self._axis = axis
        self._alpha = alpha
        self._gamma = gamma
    
    def hybrid_forward(self, F, output, label):
        output = F.softmax(output)
        pt = F.pick(output, label, axis=self._axis, keepdims=True)
        loss = -self._alpha * ((1 - pt) ** self._gamma) * F.log(pt)
        return F.mean(loss, axis=self._batch_axis, exclude=True)

# cls_loss = gluon.loss.SoftmaxCrossEntropyLoss()
cls_loss = FocalLoss()
print(cls_loss)

In [None]:
class SmoothL1Loss(gluon.loss.Loss):
    def __init__(self, batch_axis=0, **kwargs):
        super(SmoothL1Loss, self).__init__(None, batch_axis, **kwargs)
    
    def hybrid_forward(self, F, output, label, mask):
        loss = F.smooth_l1((output - label) * mask, scalar=1.0)
        return F.mean(loss, self._batch_axis, exclude=True)

box_loss = SmoothL1Loss()
print(box_loss)

### Initialize parameters

In [None]:
import time
from mxnet import autograd as ag
from gluoncv.loss import SSDMultiBoxLoss

In [None]:
# loop params
epochs = 351
start_epoch = 325

# initialize trainer
net.collect_params().reset_ctx(ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 1e-1, 'wd': 4e-5})

# evaluation metrics
cls_metric = mx.metric.Accuracy()
box_metric = mx.metric.MAE()
cls_metric_test = mx.metric.Accuracy()
box_metric_test = mx.metric.MAE()

# training loop
for epoch in range(start_epoch, epochs):
    # reset iterator and tick
    #train_data.reset()
    cls_metric.reset()
    box_metric.reset()
    tic = time.time()
    train_loss = 0
    # iterate through all batch
    for i, batch in enumerate(train_loader):
        # record gradients
        with ag.record():
            x = batch[0].as_in_context(ctx)
            y = batch[1].as_in_context(ctx)
            lbl = convertlbl(batch[1])
            default_anchors, class_predictions, box_predictions = net(x)
            box_target, box_mask, cls_target = training_targets(default_anchors, class_predictions, lbl)
            # losses
            loss1 = cls_loss(class_predictions, cls_target)
            loss2 = box_loss(box_predictions, box_target, box_mask)
            # sum all losses
            loss = loss1 + loss2
            train_loss += nd.sum(loss).asscalar()
            # backpropagate
            loss.backward()
        # apply 
        trainer.step(batch_size, ignore_stale_grad=True)
        # update metrics
        cls_metric.update([cls_target], [nd.transpose(class_predictions, (0, 2, 1))])
        box_metric.update([box_target], [box_predictions * box_mask])
        #if (i + 1) % log_interval == 0:
    toc = time.time()
    name1_train, val1_train = cls_metric.get()
    name2_train, val2_train = box_metric.get()

    cls_metric_test.reset()
    box_metric_test.reset()
    tic = time.time()
    test_loss = 0
    for i, batch in enumerate(test_loader):
        # record gradients
        x = batch[0].as_in_context(ctx)
        y = batch[1].as_in_context(ctx)
        lbl = convertlbl(batch[1])
        default_anchors, class_predictions, box_predictions = net(x)
        box_target, box_mask, cls_target = training_targets(default_anchors, class_predictions, lbl)
        # losses
        loss1 = cls_loss(class_predictions, cls_target)
        loss2 = box_loss(box_predictions, box_target, box_mask)
        # sum all losses
        loss = loss1 + loss2
        test_loss += nd.sum(loss).asscalar()
        # update metrics
        cls_metric_test.update([cls_target], [nd.transpose(class_predictions, (0, 2, 1))])
        box_metric_test.update([box_target], [box_predictions * box_mask])
        #if (i + 1) % log_interval == 0:
    toc = time.time()
    name1_test, val1_test = cls_metric_test.get()
    name2_test, val2_test = box_metric_test.get()
    print('epoch:%3d;\t train:%.6e;%f;%.6e;\t test:%.6e;%f;%.6e'
          %(epoch, train_loss/len(train_dataset), val1_train, val2_train, test_loss/len(test_dataset), val1_test, val2_test))

    net.save_parameters('process/ssd_%d.params' % epoch)

## Test

### Prepare the test data

In [None]:
test_image, test_label = test_dataset[0]
test_image2, test_label2 = train_transform(test_image, test_label)
test_image2 = nd.expand_dims(test_image2,0)
print('tensor shape:', test_image2.shape)

### Network inference

In [None]:
anchors, cls_preds, box_preds = net(test_image2.as_in_context(ctx))

### Convert predictions to real object detection results

In [None]:
from mxnet.contrib.ndarray import MultiBoxDetection
cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)), mode='channel')
output = MultiBoxDetection(cls_prob=cls_probs, loc_pred=box_preds, anchor=anchors, force_suppress=True, clip=True, nms_topk=250)

### Display results

In [None]:
class_names = ('cluster')
def display(img, out, thresh=0.5):
    import random
    import matplotlib as mpl
    import numpy as np
    mpl.rcParams['figure.figsize'] = (10,10)
    img = img.asnumpy()
    img = np.transpose(img,(2,3,1,0))
    img = np.squeeze(img)
    plt.clf()
    plt.imshow(img)
    for det in out:
        cid = int(det[0])
        if cid == 0:
            continue
        score = det[1]
        if score < thresh:
            continue
        scales = [img.shape[1], img.shape[0]] * 2
        xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)]
        rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False,
                             edgecolor='red', linewidth=3)
        plt.gca().add_patch(rect)
        text = class_names[cid]
        plt.gca().text(xmin, ymin-2, '{:s} {:.3f}'.format(text, score),
                       bbox=dict(facecolor='red', alpha=0.5),
                       fontsize=12, color='white')

display(test_image2, output[0].asnumpy(), thresh=0.52)