In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [None]:
#export
from exp.nb_12 import *

## Imagenet(te) training

In [None]:
path = datasets.untar_data(datasets.URLs.IMAGENETTE_320)

In [None]:
size = 128
#tfms = [make_rgb, RandomResizedCrop(128,scale=(0.35,1)), to_byte_tensor, to_float_tensor, PilRandomFlip()]
tfms = [make_rgb, PilTiltRandomCrop(size, 160, magnitude=0.2), to_byte_tensor, to_float_tensor, PilRandomFlip()]
il = ImageItemList.from_files(path, tfms=tfms)
sd = SplitData.split_by_func(il, partial(grandparent_splitter, valid_name='val'))
ll = label_by_func(sd, parent_labeler)

ll.valid.x.tfms = [make_rgb, CenterCrop(size), to_byte_tensor, to_float_tensor]

In [None]:
bs=2

train_dl,valid_dl = get_dls(ll.train,ll.valid,bs, num_workers=4)
data = DataBunch(train_dl, valid_dl, 3, 10)

## XResNet

In [None]:
#export
def noop(x): return x

class Flatten(nn.Module):
    def forward(self, x): return x.view(x.size(0), -1)

def conv(ni, nf, ks=3, stride=1, bias=False):
    return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=bias)

In [None]:
#export
act_fn = nn.ReLU(inplace=True)

def init_cnn(m, a=0):
    if getattr(m, 'bias', None) is not None: nn.init.constant_(m.bias, 0)
    if isinstance(m, (nn.Conv2d,nn.Linear)): nn.init.kaiming_normal_(m.weight, a=a)
    for l in m.children(): init_cnn(l, a)

def conv_layer(ni, nf, ks=3, stride=1, zero_bn=False, act=True):
    bn = nn.BatchNorm2d(nf)
    nn.init.constant_(bn.weight, 0. if zero_bn else 1.)
    layers = [conv(ni, nf, ks, stride=stride), bn]
    if act: layers.append(act_fn)
    return nn.Sequential(*layers)

In [None]:
#export
class ResBlock(nn.Module):
    def __init__(self, expansion, ni, nh, stride=1):
        super().__init__()
        nf,ni = nh*expansion,ni*expansion
        layers  = [conv_layer(ni, nh, 1)]
        layers += [
            conv_layer(nh, nf, 3, stride=stride, zero_bn=True, act=False)
        ] if expansion==1 else [
            conv_layer(nh, nh, 3, stride=stride),
            conv_layer(nh, nf, 1, zero_bn=True, act=False)
        ]
        self.convs = nn.Sequential(*layers)
        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1)
        self.pool = noop if stride==1 else nn.AvgPool2d(2)

    def forward(self, x): return act_fn(self.convs(x) + self.pool(self.idconv(x)))

In [None]:
#export
class XResNet(nn.Sequential):
    def __init__(self, expansion, layers, c_in=3, c_out=1000):
        stem = []
        sizes = [c_in,32,32,64]
        for i in range(3):
            stem.append(conv_layer(sizes[i], sizes[i+1], stride=2 if i==0 else 1))
            #nf = filt_sz(c_in*9)
            #stem.append(conv_layer(c_in, nf, stride=2 if i==1 else 1))
            #c_in = nf

        block_szs = [64//expansion,64,128,256,512]
        blocks = [self._make_layer(expansion, block_szs[i], block_szs[i+1], l, 1 if i==0 else 2)
                  for i,l in enumerate(layers)]
        super().__init__(
            *stem,
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            *blocks,
            nn.AdaptiveAvgPool2d(1), Flatten(),
            nn.Linear(block_szs[-1]*expansion, c_out),
        )
        init_cnn(self)

    def _make_layer(self, expansion, ni, nf, blocks, stride):
        return nn.Sequential(
            *[ResBlock(expansion, ni if i==0 else nf, nf, stride if i==0 else 1)
              for i in range(blocks)])

def xresnet18 (**kwargs): return XResNet(1, [2, 2, 2, 2], **kwargs)
def xresnet34 (**kwargs): return XResNet(1, [3, 4, 6, 3], **kwargs)
def xresnet50 (**kwargs): return XResNet(4, [3, 4, 6, 3], **kwargs)
def xresnet101(**kwargs): return XResNet(4, [3, 4, 23, 3], **kwargs)
def xresnet152(**kwargs): return XResNet(4, [3, 8, 36, 3], **kwargs)

## Train

In [None]:
lr = 4e-3 * bs/256
sched_lr  = combine_scheds([0.5,0.5], cos_1cycle_anneal(lr/10.,lr, 0))
sched_mom = combine_scheds([0.5,0.5], cos_1cycle_anneal(0.95,0.85, 0.95))

In [None]:
cbfs = [partial(AvgStatsCallback,accuracy), ProgressCallback, CudaCallback,
        partial(BatchTransformXCallback, norm_imagenette),
        partial(MixUp, alpha=0.2), 
        partial(ParamScheduler, 'lr', sched_lr),
        partial(ParamScheduler, 'mom', sched_mom)]

In [None]:
stats = [AverageGrad(dampening=True), AverageSqrGrad(), StepCount()]
loss_func = LabelSmoothingCrossEntropy()
arch = partial(xresnet34, c_out=10)
opt_func = partial(StatefulOptimizer, steppers=AdamStep(), stats=stats,
               mom=0.9, mom_sqr=0.99, eps=1e-7)

In [None]:
learn = Learner(arch(), data, loss_func, lr=lr, cb_funcs=cbfs, opt_func=opt_func)

In [None]:
#export
def get_batch(dl, learn):
    learn.xb,learn.yb = next(iter(dl))
    learn.do_begin_fit(0)
    learn('begin_batch')
    learn('after_fit')
    return learn.xb,learn.yb

def model_summary(model, find_all=False):
    xb,yb = get_batch(data.valid_dl, learn)
    mods = find_modules(model, is_lin_layer) if find_all else model.children()
    f = lambda hook,mod,inp,out: print(out.shape)
    with Hooks(mods, f) as hooks: learn.model(xb)

In [None]:
learn.model = learn.model.cuda()
model_summary(learn.model)

epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time


torch.Size([4, 32, 64, 64])
torch.Size([4, 32, 64, 64])
torch.Size([4, 64, 64, 64])
torch.Size([4, 64, 32, 32])
torch.Size([4, 64, 32, 32])
torch.Size([4, 128, 16, 16])
torch.Size([4, 256, 8, 8])
torch.Size([4, 512, 4, 4])
torch.Size([4, 512, 1, 1])
torch.Size([4, 512])
torch.Size([4, 10])


In [None]:
learn.fit(5)

epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.939471,0.395067,1.567447,0.578,00:15
1,1.581493,0.592834,1.441457,0.606,00:15
2,1.457034,0.655266,1.150521,0.736,00:14
3,1.337099,0.715216,1.046517,0.782,00:15
4,1.244254,0.768032,0.939585,0.84,00:15


## Export

In [None]:
!./notebook2script.py 13_train_imagenette.ipynb

Converted 13_train_imagenette.ipynb to exp/nb_13.py
