<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Building-&amp;-training-the-model" data-toc-modified-id="Building-&amp;-training-the-model-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Building &amp; training the model</a></span><ul class="toc-item"><li><span><a href="#Singles" data-toc-modified-id="Singles-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Singles</a></span></li><li><span><a href="#DataLoaders" data-toc-modified-id="DataLoaders-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>DataLoaders</a></span></li><li><span><a href="#Here-on-down-I'm-exploring-how-my-custom-loss_func-must-be-formatted." data-toc-modified-id="Here-on-down-I'm-exploring-how-my-custom-loss_func-must-be-formatted.-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Here on down I'm exploring how my custom loss_func must be formatted.</a></span></li></ul></li></ul></div>

# Building & training the model

## Singles

In [None]:
from fastai.vision.all import *


### Params ###
im_size      = 224
batch_size   = 64
path         = Path('/home/rory/data/coco2017')
train_json   = 'annotations/instances_train2017.json'
valid_json   = 'annotations/instances_val2017.json'
train_im_dir = 'train2017'
valid_im_dir = 'val2017'


### Get files and annos ###
def get_annos(path, anno_file, im_folder):
    xs, ys = get_annotations(path/anno_file)
    return L(xs).map(lambda x: path/im_folder/x), ys
train_files, train_annos = get_annos(path, train_json, train_im_dir)
valid_files, valid_annos = get_annos(path, valid_json, valid_im_dir)
files  = train_files + valid_files
annos  = train_annos + valid_annos
bboxes = [a[0] for a in annos]
lbls   = [a[1] for a in annos]


### Get largest anno ###
def transpose(anno): return list(zip(*anno)) # tensor.t()
def bbox_area(transposed_anno):
    b = transposed_anno[0]
    return((b[2]-b[0])*(b[3]-b[1])) # b-t * l-r
def sort_annos(o): return sorted(transpose(o), key=bbox_area, reverse=True)
sorted_annos = L(sort_annos(i) for i in annos)
largest_anno = L(i[0] for i in sorted_annos)
largest_bbox = L(i[0] for i in largest_anno)
largest_lbl  = L(i[1] for i in largest_anno)
# get_xyz helpers (used in following sections)
files2lbl  = {f:l for f,l in zip(files,largest_lbl)}
files2bbox = {f:b for f,b in zip(files,largest_bbox)}
def get_lbl(f):  return files2lbl[f]
def get_bbox(f): return files2bbox[f]


### Get singles ###
# identify singles
lbls_per_im = L(len(l) for l in lbls)
tuples = L(zip(files, largest_lbl, largest_bbox))
singles = tuples[lbls_per_im.map(lambda n:n==1)]
singles_tp = transpose(singles)
# identify lbls with at least 500 singles
lbl2paths = {l:[p for p in singles_tp[0] if get_lbl(p) == l] 
             for l in set(singles_tp[1])}
lbl_subset=[]
for lbl in lbl2paths:
    l = len(lbl2paths[lbl])
    if l > 500: lbl_subset += [lbl]
# create subset of ims in lbl_subset
subset = L(s for s in singles if s[1] in lbl_subset)
files_subset = L(i[0] for i in subset)

## DataLoaders

In [None]:
### Datasets & DataLoaders ###
# dss for im,bb,lbl
dss_tfms = [[PILImage.create],
            [get_bbox, TensorBBox.create],
            [get_lbl, Categorize()]]
splits = RandomSplitter(.15)(files_subset)
dss = Datasets(files_subset, tfms=dss_tfms, splits=splits)
dss.train[0]

(PILImage mode=RGB size=640x482,
 TensorBBox([[359.8800, 262.5600, 529.5500, 433.3100]]),
 TensorCategory(1))

In [None]:
# dls
cpu_tfms = [BBoxLabeler(), PointScaler(),
            Resize(im_size, method=ResizeMethod.Squish), ToTensor()]
gpu_tfms = [IntToFloatTensor(), Normalize.from_stats(*imagenet_stats)]
dls = dss.dataloaders(bs=64, after_item=cpu_tfms, after_batch=gpu_tfms, n_inp=1)

x,y,z = dls.one_batch()
x.shape, y.shape, z.shape

(torch.Size([64, 3, 224, 224]), torch.Size([64, 1, 4]), torch.Size([64]))

In [None]:
### Datasets & DataLoaders ###
# dss for im,labeledbbox
categorize = Categorize(lbl_subset)
def get_labeledbbox(p):
    return LabeledBBox(TensorBBox.create(get_bbox(p)), categorize(get_lbl(p)))
dss_tfms = [[PILImage.create], [get_labeledbbox]]
splits = RandomSplitter(.15)(files_subset)
dss = Datasets(files_subset, tfms=dss_tfms, splits=splits)

fastai.vision.core.LabeledBBox

In [None]:
# dls for im,labeledbbox
cpu_tfms = [PointScaler(), Resize(im_size, method=ResizeMethod.Squish), ToTensor()]
gpu_tfms = [IntToFloatTensor(), Normalize.from_stats(*imagenet_stats)]
dls = dss.dataloaders(bs=64, after_item=cpu_tfms, after_batch=gpu_tfms)

In [None]:
x,y = dls.one_batch()
print(x.shape, y[0].shape, y[1].shape)

torch.Size([64, 3, 224, 224]) torch.Size([64, 1, 4]) torch.Size([64])


In [None]:
### Model ###
class custom_module(Module):
    def __init__(self, body, head):
        self.body, self.head = body, head

    def forward(self, x):
        return self.head(self.body(x))
body = create_body(resnet34, pretrained=True)
head = create_head(1024, 4+len(categorize.vocab), ps=0.5)
mod = custom_module(body, head)

## Here on down I'm exploring how my custom loss_func must be formatted.

In [None]:
# use this to check shape of y batch
learn = Learner(dls, mod, loss_func=MSELossFlat())
b = dls.one_batch()
learn._split(b)
learn.yb[0]

(#2) [TensorBBox([[[  1.2500,   0.0000, 640.0000, 480.0000]],

        [[129.4300, 165.3200, 164.7900, 203.8700]],

        [[ 49.9000,  62.3800, 640.0000, 229.3500]],

        [[  3.1600,   2.6400, 480.8300, 420.2000]],

        [[ 99.3000,  40.2600, 605.6600, 362.3200]],

        [[  8.6300, 122.2500, 392.6300, 336.5400]],

        [[410.4400, 125.1100, 516.0900, 238.0100]],

        [[ 30.6300, 111.5600, 404.1600, 562.6800]],

        [[166.1100, 103.5500, 463.8200, 429.3000]],

        [[229.5700, 108.3600, 577.6000, 258.9500]],

        [[216.8600,  85.4000, 492.2500, 421.2400]],

        [[138.0700, 363.8700, 430.0200, 631.3700]],

        [[ 99.9400, 238.2900, 159.7400, 305.7800]],

        [[ 26.9700,   0.0000, 640.0000, 474.6100]],

        [[159.3700,  87.9300, 329.6800, 319.7700]],

        [[180.6900, 144.1400, 366.8900, 195.5000]],

        [[ 90.0400,  38.0100, 409.8900, 591.1000]],

        [[236.2200, 156.2200, 408.8100, 335.2800]],

        [[288.3100, 127.5200, 320.57

In [None]:
type(learn.yb)

tuple

In [None]:
len(learn.yb)

1

In [None]:
type(learn.yb[0])

fastai.vision.core.LabeledBBox

In [None]:
learn.yb[0]

(#2) [TensorBBox([[[  1.2500,   0.0000, 640.0000, 480.0000]],

        [[129.4300, 165.3200, 164.7900, 203.8700]],

        [[ 49.9000,  62.3800, 640.0000, 229.3500]],

        [[  3.1600,   2.6400, 480.8300, 420.2000]],

        [[ 99.3000,  40.2600, 605.6600, 362.3200]],

        [[  8.6300, 122.2500, 392.6300, 336.5400]],

        [[410.4400, 125.1100, 516.0900, 238.0100]],

        [[ 30.6300, 111.5600, 404.1600, 562.6800]],

        [[166.1100, 103.5500, 463.8200, 429.3000]],

        [[229.5700, 108.3600, 577.6000, 258.9500]],

        [[216.8600,  85.4000, 492.2500, 421.2400]],

        [[138.0700, 363.8700, 430.0200, 631.3700]],

        [[ 99.9400, 238.2900, 159.7400, 305.7800]],

        [[ 26.9700,   0.0000, 640.0000, 474.6100]],

        [[159.3700,  87.9300, 329.6800, 319.7700]],

        [[180.6900, 144.1400, 366.8900, 195.5000]],

        [[ 90.0400,  38.0100, 409.8900, 591.1000]],

        [[236.2200, 156.2200, 408.8100, 335.2800]],

        [[288.3100, 127.5200, 320.57

In [None]:
# from doc(Learner._split) – if n_inp=1, xb = b[0] and yb=b[1:].
    def _split(self, b):
        i = getattr(self.dls, 'n_inp', 1 if len(b)==1 else len(b)-1)
        self.xb,self.yb = b[:i],b[i:]

# from doc(Learner._do_one_batch). Notice that self.loss_func is passed (pred, *yb). My custom loss will conform.
    def _do_one_batch(self):
        self.pred = self.model(*self.xb)
        self('after_pred')
        if len(self.yb): self.loss = self.loss_func(self.pred, *self.yb) # here it is
        self('after_loss')
        if not self.training or not len(self.yb): return
        self('before_backward')
        self._backward()
        self('after_backward')
        self._step()
        self('after_step')
        self.opt.zero_grad()

I'm going to pause for now. I think the way forward is to make my loss function work for dss=im,bb,lbl; not im,lblbb.

In [None]:
def labeledbbox_loss(input, target):
    # RMSE
    ...
    # CEL
    ...

In [None]:
### Train ###
lr_min, _ = learn.lr_find()
# learn.fit_one_cycle(10, lr=lr_min)
# learn.save(...)

def show_preds(dss=dss, inf=inf, pipe=pipe, n=8, offset=0, nrows=2, ncols=4, sz=224):
    ctxs = get_grid(n, nrows, ncols)
    for i,ctx in enumerate(ctxs):
        im,bb = pipe(dss[i+offset]) # tfms for resizing ims and bbs
        pred = inf.predict(im)
        show_image(im, ctx=ctx)
        ((bb+1)*sz//2).show(ctx=ctx)
        pred[0].show(ctx=ctx, color='magenta')
# p = Pipeline([PointScaler(), Resize(im_size, method=ResizeMethod.Squish)])
# inf = load_learner('_20201002_coco_tensorbox_learner_20201005.pkl')
# show_preds(dss, inf, p)