In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from exp.nb_06 import *

## ConvNet

In [4]:
x_train, y_train, x_valid, y_valid = get_data()
x_train, x_valid = normalize_to(x_train, x_valid)

train_ds, valid_ds = Dataset(x_train, y_train), Dataset(x_valid, y_valid)

nh, bs = 50, 512
c = y_train.max().item() + 1
loss_func = F.cross_entropy

data = DataBunch(*get_dls(train_ds, valid_ds, bs), c)

In [5]:
mnist_view = view_tfm(1, 28, 28)
cbfs = [Recorder,
        partial(AvgStatsCallback, accuracy),
        CudaCallback,
        partial(BatchTransformXCallback, mnist_view)]

In [6]:
nfs = [8, 16, 32, 64, 64]

In [8]:
learn, run = get_learn_run(nfs, data, .4, conv_layer, cbs=cbfs)

In [9]:
%time run.fit(2, learn)

train: [0.982319765625, tensor(0.6755, device='cuda:0')]
valid: [0.257804541015625, tensor(0.9209, device='cuda:0')]
train: [0.162429169921875, tensor(0.9506, device='cuda:0')]
valid: [0.10530166015625, tensor(0.9671, device='cuda:0')]
CPU times: user 4.24 s, sys: 2.88 s, total: 7.11 s
Wall time: 30.3 s


## BatchNorm

### Custom

In [41]:
class BatchNorm(nn.Module):
    def __init__(self, nf, mom=.1, eps=1e-5):
        super().__init__()
        self.mom, self.eps = mom, eps
        self.mults = nn.Parameter(torch.ones(nf, 1, 1))
        self.adds = nn.Parameter(torch.zeros(nf, 1, 1))
        self.register_buffer('means', torch.zeros(1, nf, 1, 1))
        self.register_buffer('vars', torch.ones(1, nf, 1, 1))
        
    def update_stats(self, x):
        m = x.mean((0, 2, 3), keepdim=True)
        v = x.var((0, 2, 3), keepdim=True)
        self.means.lerp_(m, self.mom)
        self.vars.lerp_(v, self.mom)
        return m, v
        
    def forward(self, x):
        if self.training:
            with torch.no_grad(): m, v = self.update_stats(x)
        else: m, v = self.means, self.vars
        x = (x - m) / (v + self.eps).sqrt()
        return self.mults * x + self.adds

In [42]:
def conv_layer(ni, nf, ks=3, stride=2, bn=True, **kwargs):
    layers = [nn.Conv2d(ni, nf, ks, padding=ks//2, stride=stride, bias=not bn),
              GeneralRelu(**kwargs)]
    if bn: layers.append(BatchNorm(nf))
    return nn.Sequential(*layers)

In [43]:
def init_cnn_(m, f):
    if isinstance(m, nn.Conv2d):
        f(m.weight, a=.1)
        if getattr(m, 'bias', None) is not None: m.bias.data.zero_()
    for l in m.children(): init_cnn_(l, f)
        
def init_cnn(m, uniform=False):
    f = init.kaiming_uniform_ if uniform else init.kaiming_normal_
    init_cnn_(m, f)
    
def get_learn_run(nfs, data, lr, layer, cbs=None, opt_func=None, uniform=False, **kwargs):
    model = get_cnn_model(data, nfs, layer, **kwargs)
    init_cnn(model, uniform=uniform)
    return get_runner(model, data, lr=lr, cbs=cbs, opt_func=opt_func)

In [44]:
learn, run = get_learn_run(nfs, data, .9, conv_layer, cbs=cbfs)

In [45]:
with Hooks(learn.model, append_stats) as hooks:
    run.fit(1, learn)
    fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
    for h in hooks[:-1]:
        ms, ss = h.stats
        axs[0].plot(ms[:10])
        axs[1].plot(ss[:10])
    plt.legend(range(6))
    
    fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
    for h in hooks[:-1]:
        ms, ss = h.stats
        axs[0].plot(ms)
        axs[1].plot(ss)
    plt.legend(range(6))

TypeError: var(): argument 'dim' (position 1) must be int, not tuple