In [1]:
# PyTorch imports
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
# Prologue
import dlc_practical_prologue as prologue

In [2]:
# Load data
N = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)

In [38]:
print('Example train_input sample as Ints:\n', train_input[0].int())

Example train_input sample as Ints:
 tensor([[[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   9,  57, 106,  54,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,  35, 226, 186, 123, 186, 156,   2,   0,   0,   0,   0],
         [  0,   0,   0, 156, 132,   0,   0,   4, 197,  21,   0,   0,   0,   0],
         [  0,   0,   0,  86, 233, 132,  74, 132, 244,  37,   0,   0,   0,   0],
         [  0,   0,   0,   0,  41, 130, 155, 140, 211,  91,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   4, 195,  40,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,  26, 251,  11,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0, 101, 184,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0, 211, 151,   0,   0,   0,   

In [4]:
# Reference
print('Train input:', train_input.size())
print('Train target:', train_target.size())
print('Train classes:', train_classes.size())
print('... Same for test too\n')
# Target = 1 if digit1<= digit2
print('Possible values of train_target:', train_target.unique())
print('Pairs of classes for all samples:\n', train_classes)

Train input: torch.Size([1000, 2, 14, 14])
Train target: torch.Size([1000])
Train classes: torch.Size([1000, 2])
... Same for test too

Possible values of train_target: tensor([0, 1])
Pairs of classes for all samples:
 tensor([[9, 3],
        [5, 4],
        [7, 4],
        ...,
        [1, 4],
        [3, 5],
        [1, 1]])


In [5]:
# Reference
# def train_model(model, train_input, train_target, train_classes, mini_batch_size, nb_epochs = 100, criterion = nn.MSELoss(), optimizer = None):
#     eta = 1e-1
#     #
#     for e in range(nb_epochs):
#         acc_loss = 0
#         # We do this with mini-batches
#         for b in range(0, train_input.size(0), mini_batch_size):
#             #### Modify this based on model outputs ###########
#             output = model(train_input.narrow(0, b, mini_batch_size))
#             loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
#             ###################################################
#             #acc_loss = acc_loss + loss.item()
#             #
#             model.zero_grad()
#             loss.backward()
#             #
#             if (optimizer is None):
#                 with torch.no_grad():
#                     for p in model.parameters():
#                         p -= eta * p.grad
#             else:
#                 optimizer.step()
#         #
#         print(e, acc_loss)
    
# def compute_nb_errors(model, test_input, test_target, test_classes, mini_batch_size):
#     nb_errors = 0

#     for b in range(0, test_input.size(0), mini_batch_size):
#         #### Modify this based on model outputs ###########
#         output = model(test_input.narrow(0, b, mini_batch_size))
#         _, predicted_classes = output.max(1)
#         ###################################################
# #         for k in range(mini_batch_size):
# #             if test_target[b + k, predicted_classes[k]] <= 0:
# #                 nb_errors = nb_errors + 1

#     return nb_errors

In [25]:
# Model with no weight sharing
class Net_noWS(nn.Module):
    def __init__(self):
        super().__init__()
        conv_out = 64
        nb_hidden = 32
        # X1
        self.x1_conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.x1_conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.x1_fc1 = nn.Linear(conv_out, nb_hidden)
        self.x1_fc2 = nn.Linear(nb_hidden, 10)
        # X2
        self.x2_conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.x2_conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.x2_fc1 = nn.Linear(conv_out, nb_hidden)
        self.x2_fc2 = nn.Linear(nb_hidden, 10)
        # Combine
        self.comp_fc = nn.Linear(20, 1)

    def forward(self, x):
        conv_out = 64 
        nb_hidden = 32
        # X1
        x1 = F.relu(F.max_pool2d(self.x1_conv1(x[:, 0].view(-1, 1, 14, 14)), kernel_size=3, stride=3))
        x1 = F.relu(F.max_pool2d(self.x1_conv2(x1), kernel_size=2, stride=2))
        x1 = F.relu(self.x1_fc1(x1.view(-1, conv_out)))
        x1 = F.relu(self.x1_fc2(x1))
        # X2
        x2 = F.relu(F.max_pool2d(self.x2_conv1(x[:, 1].view(-1, 1, 14, 14)), kernel_size=3, stride=3))
        x2 = F.relu(F.max_pool2d(self.x2_conv2(x2), kernel_size=2, stride=2))
        x2 = F.relu(self.x2_fc1(x2.view(-1, conv_out)))
        x2 = F.relu(self.x2_fc2(x2))
        # Combine
        x = self.comp_fc(torch.cat((x1, x2), 1))
        #
        return x

In [22]:
# Model with weight sharing
class Net_WS(nn.Module):
    def __init__(self):
        super().__init__()
        conv_out = 64
        nb_hidden = 32
        # Same for X1 and X2
        self.x_conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.x_conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.x_fc1 = nn.Linear(conv_out, nb_hidden)
        self.x_fc2 = nn.Linear(nb_hidden, 10)
        # Combine
        self.comp_fc = nn.Linear(20, 1)

    def forward(self, x):
        conv_out = 64
        nb_hidden = 32
        # X1
        x1 = F.relu(F.max_pool2d(self.x_conv1(x[:, 0].view(-1, 1, 14, 14)), kernel_size=3, stride=3))
        x1 = F.relu(F.max_pool2d(self.x_conv2(x1), kernel_size=2, stride=2))
        x1 = F.relu(self.x_fc1(x1.view(-1, conv_out)))
        x1 = F.relu(self.x_fc2(x1))
        # X2
        x2 = F.relu(F.max_pool2d(self.x_conv1(x[:, 1].view(-1, 1, 14, 14)), kernel_size=3, stride=3))
        x2 = F.relu(F.max_pool2d(self.x_conv2(x2), kernel_size=2, stride=2))
        x2 = F.relu(self.x_fc1(x2.view(-1, conv_out)))
        x2 = F.relu(self.x_fc2(x2))
        # Combine
        x = self.comp_fc(torch.cat((x1, x2), 1))
        #
        return x

In [31]:
def train_model(model, train_input, train_target, train_classes, mini_batch_size, nb_epochs = 100, criterion = nn.MSELoss(), optimizer = None):
    eta = 1e-1
    #
    for e in range(nb_epochs):
        acc_loss = 0
        #
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            print(output)
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            # train_classes unused
            acc_loss = acc_loss + loss.item()
            #
            model.zero_grad()
            loss.backward()
            #
            if (optimizer is None):
                with torch.no_grad():
                    for p in model.parameters():
                        p -= eta * p.grad
            else:
                optimizer.step()
        #
#         print(e, acc_loss)

In [9]:
def compute_nb_errors(model, test_input, test_target, test_classes, mini_batch_size):
    nb_errors = 0

    for b in range(0, test_input.size(0), mini_batch_size):
        output = model(test_input.narrow(0, b, mini_batch_size))
        # test_classes unused
        #_, predicted_classes = output.max(1)
        for k in range(mini_batch_size):
            if output[b + k] != test_target[b + k]:
                print('output:', output[b + k])
                print('target:', test_target[b + k])
                nb_errors = nb_errors + 1

    return nb_errors

In [42]:
ex_train_input = train_input[0:10]
ex_train_target = train_target[0:10]
ex_train_classes = train_classes[0:10]
#
ex_test_input = test_input[0:10]
ex_test_target = test_target[0:10]
ex_test_classes = test_classes[0:10]
#
mini_batch_size = 2
#
for k in range(1):
    model = Net_noWS()
    train_model(model, ex_train_input, ex_train_target, ex_train_classes, mini_batch_size, nb_epochs = 10, criterion = nn.CrossEntropyLoss())
#     nb_test_errors = compute_nb_errors(model, ex_test_input, ex_test_target, ex_test_classes, mini_batch_size)
#     print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
#                                                       nb_test_errors, test_input.size(0)))

tensor([[-6.7775],
        [-8.1099]], grad_fn=<AddmmBackward>)
tensor([[-6.3199],
        [-6.9861]], grad_fn=<AddmmBackward>)
tensor([[-8.7670],
        [-5.7075]], grad_fn=<AddmmBackward>)


IndexError: Target 1 is out of bounds.

In [None]:
# #
# mini_batch_size = 100
# #
# for k in range(10):
#     model = Net_noWS()
#     train_model(model, train_input, train_target, train_classes, mini_batch_size)
#     nb_test_errors = compute_nb_errors(model, test_input, test_target, test_classes, mini_batch_size)
#     print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
#                                                       nb_test_errors, test_input.size(0)))