In [1]:
# PyTorch imports
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
# DLC Practical prologue
import dlc_practical_prologue as prologue

In [2]:
# Load data
N = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)

In [3]:
# Data sizes for reference
print('Train input:', train_input.size())
print('Train target:', train_target.size())
print('Train classes:', train_classes.size())
print('... Same for test too')

Train input: torch.Size([1000, 2, 14, 14])
Train target: torch.Size([1000])
Train classes: torch.Size([1000, 2])
... Same for test too


In [4]:
# Baseline model - no WS, no AuxL
class Baseline_Net(nn.Module):
    def __init__(self):
        super().__init__()
        conv_out = 64
        # X1
        self.x1_conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.x1_conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.x1_fc1 = nn.Linear(conv_out, 32)
        self.x1_fc2 = nn.Linear(32, 10)
        # X2
        self.x2_conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.x2_conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.x2_fc1 = nn.Linear(conv_out, 32)
        self.x2_fc2 = nn.Linear(32, 10)
        # Combine
        self.comp_fc1 = nn.Linear(20, 64)
        self.comp_fc2 = nn.Linear(64, 1)
    
    def forward(self, x):
        conv_out = 64
        # X1
        x1 = F.relu(F.max_pool2d(self.x1_conv1(x[:, 0].view(-1, 1, 14, 14)), kernel_size=3, stride=3))
        x1 = F.relu(F.max_pool2d(self.x1_conv2(x1), kernel_size=2, stride=2))
        x1 = F.relu(self.x1_fc1(x1.view(-1, conv_out)))
        x1 = F.relu(self.x1_fc2(x1))
        # X2
        x2 = F.relu(F.max_pool2d(self.x2_conv1(x[:, 1].view(-1, 1, 14, 14)), kernel_size=3, stride=3))
        x2 = F.relu(F.max_pool2d(self.x2_conv2(x2), kernel_size=2, stride=2))
        x2 = F.relu(self.x2_fc1(x2.view(-1, conv_out)))
        x2 = F.relu(self.x2_fc2(x2))
        # Combine
        x = F.relu(self.comp_fc1(torch.cat((x1, x2), 1)))
        x = torch.sigmoid(self.comp_fc2(x))
        #
        return x

In [5]:
# Weight Sharing model - WS, no AuxL
class WtSharing_Net(nn.Module):
    def __init__(self):
        super().__init__()
        conv_out = 64
        # Same for X1 and X2
        self.x_conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.x_conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.x_fc1 = nn.Linear(conv_out, 32)
        self.x_fc2 = nn.Linear(32, 10)
        # Combine
        self.comp_fc1 = nn.Linear(20, 64)
        self.comp_fc2 = nn.Linear(64, 1)
    
    def forward(self, x):
        conv_out = 64
        # X1
        x1 = F.relu(F.max_pool2d(self.x_conv1(x[:, 0].view(-1, 1, 14, 14)), kernel_size=3, stride=3))
        x1 = F.relu(F.max_pool2d(self.x_conv2(x1), kernel_size=2, stride=2))
        x1 = F.relu(self.x_fc1(x1.view(-1, conv_out)))
        x1 = F.relu(self.x_fc2(x1))
        # X2
        x2 = F.relu(F.max_pool2d(self.x_conv1(x[:, 1].view(-1, 1, 14, 14)), kernel_size=3, stride=3))
        x2 = F.relu(F.max_pool2d(self.x_conv2(x2), kernel_size=2, stride=2))
        x2 = F.relu(self.x_fc1(x2.view(-1, conv_out)))
        x2 = F.relu(self.x_fc2(x2))
        # Combine
        x = F.relu(self.comp_fc1(torch.cat((x1, x2), 1)))
        x = torch.sigmoid(self.comp_fc2(x))
        #
        return x

In [6]:
def train_model(model, train_input, train_target, train_classes, mini_batch_size, optimizer, criterion, nb_epochs):
    for e in range(nb_epochs):
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size).float())
            # Note: train_classes unused
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

def compute_nb_errors(model, test_input, test_target, test_classes, mini_batch_size):
    nb_errors = 0
    output = model(test_input)
    expected = test_target.view(-1, 1)
    abs_diff = (expected - output).abs()
    # Iterate for every entry in abs_diff
    for val in abs_diff:
        if (val.item() > 0.5):  # GT or GTE?
            nb_errors += 1
    #        
    return nb_errors

In [7]:
def execute(model, nb_iterations):
    nb_epochs = 256
    mini_batch_size = 50
    eta = 1e-1
    #
    for k in range(nb_iterations):
        optimizer = torch.optim.SGD(model.parameters(), lr = eta)
        criterion = nn.MSELoss()
        train_model(model, train_input, train_target, train_classes, mini_batch_size, optimizer, criterion, nb_epochs)
        nb_test_errors = compute_nb_errors(model, test_input, test_target, test_classes, mini_batch_size)
        print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))

In [8]:
model_base = Baseline_Net()
model_ws = WtSharing_Net()
#
execute(model_ws, 1)

  return F.mse_loss(input, target, reduction=self.reduction)


test error Net 47.80% 478/1000
