<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Building-&amp;-training-the-model" data-toc-modified-id="Building-&amp;-training-the-model-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Building &amp; training the model</a></span></li></ul></div>

# Building & training the model

In [None]:
from fastai.vision.all import *


### Params ###
im_size      = 224
batch_size   = 64
path         = Path('/home/rory/data/coco2017')
train_json   = 'annotations/instances_train2017.json'
valid_json   = 'annotations/instances_val2017.json'
train_im_dir = 'train2017'
valid_im_dir = 'val2017'


### Get files and annos ###
def get_annos(path, anno_file, im_folder):
    xs, ys = get_annotations(path/anno_file)
    return L(xs).map(lambda x: path/im_folder/x), ys
train_files, train_annos = get_annos(path, train_json, train_im_dir)
valid_files, valid_annos = get_annos(path, valid_json, valid_im_dir)
files  = train_files + valid_files
annos  = train_annos + valid_annos
bboxes = [a[0] for a in annos]
lbls   = [a[1] for a in annos]


### Get largest anno ###
def transpose(anno): return list(zip(*anno)) # tensor.t()
def bbox_area(transposed_anno):
    b = transposed_anno[0]
    return((b[2]-b[0])*(b[3]-b[1])) # b-t * l-r
def sort_annos(o): return sorted(transpose(o), key=bbox_area, reverse=True)
sorted_annos = L(sort_annos(i) for i in annos)
largest_anno = L(i[0] for i in sorted_annos)
largest_bbox = L(i[0] for i in largest_anno)
largest_lbl  = L(i[1] for i in largest_anno)
# get_xyz helpers (used in following sections)
files2lbl  = {f:l for f,l in zip(files,largest_lbl)}
files2bbox = {f:b for f,b in zip(files,largest_bbox)}
def get_lbl(f):  return files2lbl[f]
def get_bbox(f): return files2bbox[f]


### Get singles ###
# identify singles
lbls_per_im = L(len(l) for l in lbls)
tuples = L(zip(files, largest_lbl, largest_bbox))
singles = tuples[lbls_per_im.map(lambda n:n==1)]
singles_tp = transpose(singles)
# identify lbls with at least 500 singles
lbl2paths = {l:[p for p in singles_tp[0] if get_lbl(p) == l] 
             for l in set(singles_tp[1])}
lbl_subset=[]
for lbl in lbl2paths:
    l = len(lbl2paths[lbl])
    if l > 500: lbl_subset += [lbl]
# create subset of ims in lbl_subset
subset = L(s for s in singles if s[1] in lbl_subset)
files_subset = L(i[0] for i in subset)

In [None]:
### Datasets & DataLoaders ###
# dss for im,bb,lbl
dss_tfms = [[PILImage.create],
            [get_bbox, TensorBBox.create],
            [get_lbl, Categorize()]]
splits = RandomSplitter(.15)(files_subset)
dss = Datasets(files_subset, tfms=dss_tfms, splits=splits)

In [None]:
# Experimental: dss for im,lblbb
# def get_labeledbbox(p):
#     return LabeledBBox(TensorBBox([get_bbox(p)]), [get_lbl(p)])
# dss_tfms = [[PILImage.create], [get_labeledbbox]]
# splits = RandomSplitter(.15)(files_subset)
# dss = Datasets(files_subset, tfms=dss_tfms, splits=splits)

In [None]:
dss.train[0]

(PILImage mode=RGB size=640x427,
 TensorBBox([[ 92.1200, 113.2300, 640.0000, 361.7500]]),
 TensorCategory(7))

In [None]:
# dls
cpu_tfms = [BBoxLabeler(), PointScaler(), Resize(im_size, method=ResizeMethod.Squish), ToTensor()]
gpu_tfms = [IntToFloatTensor(), Normalize()]
dls = dss.dataloaders(bs=64, after_item=cpu_tfms, after_batch=gpu_tfms)

In [None]:
x,y,z = dls.one_batch()
i=0; x[i],y[i],z[i]

(tensor([[[ 2.0032,  2.0184,  2.0335,  ..., -0.8235, -0.7782, -0.6421],
          [ 1.9881,  2.0184,  2.0335,  ..., -0.8991, -0.8235, -0.7328],
          [ 1.9730,  2.0184,  2.0335,  ..., -0.9596, -0.8689, -0.7933],
          ...,
          [-1.4282, -1.3828, -1.3375,  ..., -1.0200, -1.1258, -1.2014],
          [-1.4735, -1.3828, -1.3375,  ..., -0.9747, -1.0654, -1.1410],
          [-1.4433, -1.3526, -1.3072,  ..., -0.9596, -1.0049, -1.0654]],
 
         [[ 1.9981,  1.9981,  1.9981,  ..., -0.5758, -0.5167, -0.3983],
          [ 1.9833,  1.9833,  1.9981,  ..., -0.6942, -0.6054, -0.5167],
          [ 1.9537,  1.9833,  1.9981,  ..., -0.7681, -0.6794, -0.5906],
          ...,
          [-1.3450, -1.3303, -1.3007,  ..., -1.0936, -1.2119, -1.2267],
          [-1.3598, -1.3303, -1.2859,  ..., -1.0492, -1.1527, -1.1823],
          [-1.3894, -1.3155, -1.2859,  ..., -1.0048, -1.0936, -1.1232]],
 
         [[ 1.8727,  1.8859,  1.8859,  ..., -0.4608, -0.4873, -0.3415],
          [ 1.8727,  1.8727,

In [None]:
### Model & Loss ### (i'm using siamese example)
class custom_module(Module):
    def __init__(self, encoder, head):
        self.encoder, self.head = encoder, head

    def forward(self, x):
        params = self.encoder(x)
        return self.head(params)

In [None]:
enc = create_body(resnet34)
head = create_head(512*2*2, 2, ps=0.5)
mod = custom_module(enc, head)

In [None]:
### Train ###
lr_min, _ = learn.lr_find()

learn.fit_one_cycle(10, lr=lr_min) # valid_loss of .0786 after 9 epochs

### Showing results ###
def _descale(x,sz): return (x+1)*sz//2
def show_learner_results(learner, n=4, nrows=1, ncols=4, sz=224):
    xb, yb = learner.dls.one_batch()
    yp = learner.model(xb) # this is GPU hungry; need to get inference on one item
    xb, yb, yp = xb.cpu(), yb.cpu(), yp.cpu()
    
    ctxs = get_grid(n, nrows, ncols)
    for i,ctx in enumerate(ctxs):
        im, actual, forecast  = xb[i], yb[i], yp[i]
        loss = learner.loss_func(actual, forecast).item()
        
        im = F.relu(_descale(im.int(),sz))
        actual = TensorBBox(_descale(actual,sz))
        forecast = TensorBBox(_descale(forecast,sz))

        show_image(im, ctx=ctx, title=f'Loss: {round(loss,4)}')
        actual.show(ctx=ctx, color='magenta')
        forecast.show(ctx=ctx)
show_learner_results(learn)