In [34]:
import dlc_practical_prologue as prologue
import torch
import math

from torch import optim
from torch import Tensor
from torch import nn
from torch.nn import functional as F
from torchvision import datasets

from helpers import *



In [35]:
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(1000)

In [42]:
class ResNetBlock(nn.Module):
    def __init__(self, nb_channels, kernel_size,
                 skip_connections = True, batch_normalization = True):
        super(ResNetBlock, self).__init__()

        self.conv1 = nn.Conv2d(nb_channels, nb_channels,
                               kernel_size = kernel_size,
                               padding = (kernel_size - 1) // 2)

        self.bn1 = nn.BatchNorm2d(nb_channels)

        self.conv2 = nn.Conv2d(nb_channels, nb_channels,
                               kernel_size = kernel_size,
                               padding = (kernel_size - 1) // 2)

        self.bn2 = nn.BatchNorm2d(nb_channels)

        self.skip_connections = skip_connections
        self.batch_normalization = batch_normalization

    def forward(self, x):
        y = self.conv1(x)
        if self.batch_normalization: y = self.bn1(y)
        y = F.relu(y)
        y = self.conv2(y)
        if self.batch_normalization: y = self.bn2(y)
        if self.skip_connections: y = y + x
        y = F.relu(y)

        return y


In [43]:
class ResNet(nn.Module):

    def __init__(self, nb_residual_blocks, nb_channels,
                 kernel_size = 14, nb_classes = 10,
                 skip_connections = True, batch_normalization = True):
        super(ResNet, self).__init__()

        self.conv = nn.Conv2d(2, nb_channels,
                              kernel_size = kernel_size,
                              padding = (kernel_size - 1) // 2)
        self.bn = nn.BatchNorm2d(nb_channels)

        self.resnet_blocks = nn.Sequential(
            *(ResNetBlock(nb_channels, kernel_size, skip_connections, batch_normalization)
              for _ in range(nb_residual_blocks))
        )

        self.fc = nn.Linear(nb_channels, nb_classes)

    def forward(self, x):
        x = F.relu(self.bn(self.conv(x)))
        x = self.resnet_blocks(x)
        x = F.avg_pool2d(x, 32).view(x.size(0), -1)
        x = self.fc(x)
        return x

In [44]:
skip_connections = False
batch_normalization = True
nb_samples = 1000

model = ResNet(nb_residual_blocks = 30, nb_channels = 2,
                   kernel_size = 14, nb_classes = 10,
                   skip_connections = skip_connections, batch_normalization = batch_normalization)

criterion = nn.CrossEntropyLoss()

monitored_parameters = [ b.conv1.weight for b in model.resnet_blocks ]

result = torch.empty(len(monitored_parameters), nb_samples)

for n in range(nb_samples):
    output = model(train_input[n:n+1])
    loss = criterion(output, train_target[n:n+1])
    model.zero_grad()
    loss.backward()
    for d, p in enumerate(monitored_parameters):
        result[d, n] = p.grad.norm()



ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 2, 1, 1])

In [None]:
shallow_model = nn.Sequential(
    nn.Linear()
)

In [None]:
resnet =

In [None]:


######################################################################

def create_shallow_model():
    return nn.Sequential(
        nn.Linear(2, 128),
        nn.ReLU(),
        nn.Linear(128, 2)
    )

def create_deep_model():
    return nn.Sequential(
        nn.Linear(2, 4),
        nn.ReLU(),
        nn.Linear(4, 8),
        nn.ReLU(),
        nn.Linear(8, 16),
        nn.ReLU(),
        nn.Linear(16, 32),
        nn.ReLU(),
        nn.Linear(32, 64),
        nn.ReLU(),
        nn.Linear(64, 128),
        nn.ReLU(),
        nn.Linear(128, 2)
    )

######################################################################

for std in [ -1, 1e-3, 1e-2, 1e-1, 1e-0, 1e1 ]:

    for m in [ create_shallow_model, create_deep_model ]:

        model = m()

        if std > 0:
            with torch.no_grad():
                for p in model.parameters(): p.normal_(0, std)

        train_model(model, train_input, train_target)

        print('std {:s} {:f} train_error {:.02f}% test_error {:.02f}%'.format(
            m.__name__,
            std,
            compute_nb_errors(model, train_input, train_target) / train_input.size(0) * 100,
            compute_nb_errors(model, test_input, test_target) / test_input.size(0) * 100
        )
        )