In [1]:
#default_exp lesson3

In [2]:
import os
os.chdir("..")

In [3]:
# export
import torch
from torch import nn
from torch.optim import Adam
from solutions.lesson1 import *
from solutions.lesson2 import *
from fastai.datasets import download_data
from torch.functional import F

In [4]:
# export
def get_mnist_data():
    """Returns X_train, y_train, X_test, y_test for MNIST dataset."""
    MNIST_URL='http://deeplearning.net/data/mnist/mnist.pkl'
    path = download_data(MNIST_URL, ext=".gz")
    return get_data(path) 

In [5]:
# export
def stats(x):
    return x.mean(), x.std()

In [6]:
X_train, y_train, X_test, y_test = get_mnist_data()

# Basic Model

In [7]:
x_train_resized = X_train.reshape(-1, 1, 28, 28)

In [8]:
mean, std = stats(x_train_resized)

In [9]:
x_train_resized.size()

torch.Size([50000, 1, 28, 28])

In [10]:
x_train_norm = (x_train_resized - mean)/std

In [11]:
stats(x_train_norm)

(tensor(-3.0466e-06), tensor(1.))

In [12]:
nh = 100
mdl = nn.Sequential(
    nn.Conv2d(1, 5, 5),
    nn.ReLU()
)

In [13]:
for m in mdl.modules():
    if hasattr(m, 'weight'):
        nn.init.kaiming_normal_(m.weight, mode='fan_in')

In [14]:
stats(mdl(x_train_norm))

(tensor(0.5034, grad_fn=<MeanBackward0>),
 tensor(0.8765, grad_fn=<StdBackward0>))

In [15]:
# export
class Lambda(nn.Module):
    def __init__(self, f):
        super().__init__()
        self.f = f
        
    def forward(self, x):
        return self.f(x)

In [16]:
# def squeeze(x):
#     return torch.squeeze(x)

In [47]:
def conv_layer(ni, nf, size, stride=2):
    return nn.Sequential(
        nn.Conv2d(ni, nf, size, stride, padding=size//2),
        nn.ReLU()
    )

In [110]:
mdl = nn.Sequential(
    conv_layer(1, 8, 5),
    conv_layer(8, 16, 3),
    conv_layer(16, 32, 3),
    conv_layer(32, 64, 3),
    conv_layer(64, 64, 3),
    conv_layer(64, 10, 3),
    nn.AdaptiveAvgPool2d(1),
    nn.LogSoftmax(dim=1),
    Lambda(torch.squeeze)
)

In [49]:
mdl(x_train_norm).size()

torch.Size([50000, 10])

In [50]:
mdl(x_train_norm).squeeze().size()

torch.Size([50000, 10])

In [51]:
x_valid_norm = X_test.reshape(-1, 1, 28, 28)
x_valid_norm = (x_valid_norm - mean)/std

In [52]:
stats(x_valid_norm)

(tensor(-0.0059), tensor(0.9924))

In [53]:
train_data = Dataset(x_train_norm, y_train)
valid_data = Dataset(x_valid_norm, y_test)

In [54]:
c = (torch.max(y_test) + 1).item()

In [71]:
train_dl = DataLoader(train_data, 512)
valid_dl = DataLoader(valid_data, 1024)
data = DataBunch(train_dl, valid_dl, c=c)

In [72]:
data.c

10

In [206]:
g = nn.Conv2d(8, 16, 3)

In [281]:
# export
class GeneralReLU(nn.Module):
    def __init__(self, a=0.01, subtract=0.4):
        super().__init__()
        self.a = a
        self.subtract = subtract
        
    def forward(self, x):
        return F.leaky_relu(x, self.a) - self.subtract

In [282]:
# export
def init_cnn_(mdl):
    for layer in mdl.children():
        if isinstance(layer, nn.Conv2d):
            print("initializing conv2d...")
            nn.init.kaiming_normal_(layer.weight)
            if hasattr(layer, 'bias'):
                nn.init.zeros_(layer.bias)
        if isinstance(layer, nn.Sequential): init_cnn_(layer)
            
def conv_layer(ni, nf, size, stride=2, **kwargs):
    return nn.Sequential(
        nn.Conv2d(ni, nf, size, stride, padding=size//2),
        GeneralReLU(**kwargs)
    )
    
def get_model():
    conv_layers = [ 
        conv_layer(1, 8, 5),
        conv_layer(8, 16, 3),
        conv_layer(16, 32, 3),
        conv_layer(32, 64, 3),
        conv_layer(64, 10, 3),
    ]
             
    mdl = nn.Sequential(
        *conv_layers,
        nn.AdaptiveAvgPool2d(1),
        nn.LogSoftmax(dim=1),
        Lambda(torch.squeeze)
    ) 
    
    init_cnn_(mdl)
    
    return mdl 

In [283]:
mdl = get_model()

initializing conv2d...
initializing conv2d...
initializing conv2d...
initializing conv2d...
initializing conv2d...


In [284]:
mdl

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): GeneralReLU()
  )
  (1): Sequential(
    (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): GeneralReLU()
  )
  (2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): GeneralReLU()
  )
  (3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): GeneralReLU()
  )
  (4): Sequential(
    (0): Conv2d(64, 10, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): GeneralReLU()
  )
  (5): AdaptiveAvgPool2d(output_size=1)
  (6): LogSoftmax()
  (7): Lambda()
)

In [285]:
opt = Adam(mdl.parameters(), lr=1e-3)

In [286]:
loss = F.cross_entropy

In [287]:
learn = Learn(mdl, opt, data, loss)

In [288]:
runner = Runner(learn)

In [289]:
nn.NLLLoss()(F.log_softmax(torch.tensor([[1,2,3]]).float()), torch.tensor([2]))

  """Entry point for launching an IPython kernel.


tensor(0.4076)

In [290]:
# %time runner.fit(epochs=1)

# Some useful callbacks

In [291]:
torch.cuda.current_device()

0

In [292]:
torch.cuda.is_available()

True

In [293]:
# export
def current_gpu():
    print(torch.cuda.get_device_name(torch.cuda.current_device()))

In [294]:
# export
class AccuracyCallback(Callback):
    def __init__(self, runner):
        self.runner = runner
        
    def on_epoch_start(self):
        if runner.mode == ModelMode.VALID:
            self.correct = 0
            self.total = 0
        
    def on_batch_end(self):
        if runner.mode == ModelMode.VALID:
            self.total += self.runner.xb.shape[0]
            preds = torch.argmax(self.runner.pred, axis=1) 
            self.correct += (preds == self.runner.yb).int().sum().item()
        
    def on_epoch_end(self):
        if runner.mode == ModelMode.VALID:
            print("Validation accuracy: {}".format(self.correct/self.total)) 

In [295]:
stats(data.train_dl.ds.x)

(tensor(-3.0466e-06), tensor(1.))

In [296]:
stats(data.valid_dl.ds.x)

(tensor(-0.0059), tensor(0.9924))

In [297]:
runner = Runner(learn, cb_funcs=[AccuracyCallback])

In [298]:
runner.fit(epochs=5)

Validation accuracy: 0.932
Validation loss: 0.2306
Validation accuracy: 0.9577
Validation loss: 0.1863
Validation accuracy: 0.9664
Validation loss: 0.1615
Validation accuracy: 0.9714
Validation loss: 0.1456
Validation accuracy: 0.9748
Validation loss: 0.1342


In [40]:
runner.cbs[0].total, runner.cbs[0].correct

(50000, tensor(19467))

# Implement Batch Norm

In [69]:
# export 
class BatchNorm1d(nn.Module):
    def __init__(self, size, eps=1e-5, mom=0.1):
        super().__init__()
        self.eps = eps
        self.mom = mom
        self.register_buffer('gamma', torch.ones(size))
        self.register_buffer('beta', torch.zeros(size))
        self.mean = 0
        self.std = 1
        
    def forward(self, x):
        if self.mean is not None:
            self.mean = x.mean(axis=0) * self.mom + self.mean * (1 - self.mom)
        else:
            self.mean = x.mean(axis=0)
            
        if self.std is not None:
            self.std = x.std(axis=0) * self.mom + self.std * (1 - self.mom)
        else:
            self.std = x.std(axis=0)
            
        x_hat = (x - self.mean) / (self.std + self.eps)
        return self.gamma * x_hat + self.beta

In [70]:
bn.gamma, bn.beta

NameError: name 'bn' is not defined

In [None]:
X_train, y_train, X_test, y_test = get_mnist_data()

In [None]:
bn_test = nn.Sequential(
    nn.Linear(784, 100),
    BatchNorm1d(100)
)

In [None]:
def stats(x):
    return x.mean(), x.std()

In [None]:
stats(bn_test(X_train))

In [None]:
stats(X_train)

In [63]:
mean, std = stats(X_train)
stats((X_train - mean)/std)

(tensor(-3.0466e-06), tensor(1.))

# Utilities

# 