In [1]:
# PyTorch imports
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
# DLC Practical prologue
import dlc_practical_prologue as prologue

# Load Data

In [2]:
N = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)

In [3]:
# Data sizes for reference
print('Train input:', train_input.size())
print('Train target:', train_target.size())
print('Train classes:', train_classes.size())
print('... Same for test too')

Train input: torch.Size([1000, 2, 14, 14])
Train target: torch.Size([1000])
Train classes: torch.Size([1000, 2])
... Same for test too


# Model Architectures

### Baseline Convolutional Model

For Conv2D, MaxPool2D => See sizing rules on PyTorch page.

| Datasize | Operation      |
| ---------| ---------------|
| 1x14x14  | Conv, k=3, p=1 |
| 32x14x14 | MaxP, k=3, s=1 |
| 32x12x12 | Conv, k=3, p=1 |
| 64x12x12 | MaxP, k=2, s=2 |
| 64x6x6   | Flatten        |
| 2304     | Lin, 2304->64  |
| 64       | Lin, 64->10    |

In [4]:
class BaselineNet(nn.Module):
    # Init method
    def __init__(self, use_weight_sharing_, use_auxiliary_loss_):
        super().__init__()
        self.use_weight_sharing = use_weight_sharing_
        self.use_auxiliary_loss = use_auxiliary_loss_
        self.conv_out = 64*6*6
        
        # X1
        self.x1_conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.x1_conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.x1_fc1 = nn.Linear(self.conv_out, 64)
        self.x1_fc2 = nn.Linear(64, 10)
        
        # X2
        if not self.use_weight_sharing:
            self.x2_conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
            self.x2_conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
            self.x2_fc1 = nn.Linear(self.conv_out, 64)
            self.x2_fc2 = nn.Linear(64, 10)
        
        # Combine
        self.comp_fc1 = nn.Linear(20, 100)
        self.comp_fc2 = nn.Linear(100, 50)
        self.comp_fc3 = nn.Linear(50, 2)
        # Dropout
        self.dropout=nn.Dropout()
        
    # Forward method    
    def forward(self, x):
        # X1
        x1 = F.relu(F.max_pool2d(self.x1_conv1(x[:,0:1]), kernel_size=3, stride=1))
        x1 = F.relu(F.max_pool2d(self.x1_conv2(x1), kernel_size=2, stride=2))
        x1 = x1.reshape(x1.size(0), -1)
        x1 = self.dropout(F.relu(self.x1_fc1(x1)))
        x1 = self.x1_fc2(x1)
        
        # X2
        if not self.use_weight_sharing:
            x2 = F.relu(F.max_pool2d(self.x2_conv1(x[:,1:2]), kernel_size=3, stride=1))
            x2 = F.relu(F.max_pool2d(self.x2_conv2(x2), kernel_size=2, stride=2))
            x2 = x2.reshape(x2.size(0), -1)
            x2 = self.dropout(F.relu(self.x2_fc1(x2)))
            x2 = self.x2_fc2(x2)
        else: 
            x2 = F.relu(F.max_pool2d(self.x1_conv1(x[:,1:2]), kernel_size=3, stride=1))
            x2 = F.relu(F.max_pool2d(self.x1_conv2(x2), kernel_size=2, stride=2))
            x2 = x2.reshape(x2.size(0), -1)
            x2 = self.dropout(F.relu(self.x1_fc1(x2)))
            x2 = self.x1_fc2(x2)          
        
        # Combine
        x = F.relu(self.comp_fc1(torch.cat((x1, x2), 1)))
        x = F.relu(self.comp_fc2(x))
        x = torch.sigmoid(self.comp_fc3(x))
        if self.use_auxiliary_loss:
            return x, x1.softmax(1), x2.softmax(1)
        else:
            return x

### AlexNet-like Convolutional Model

For Conv2D, MaxPool2D => See sizing rules on PyTorch page.

| Datasize | Operation      |
| ---------| ---------------|
| 1x14x14  | Conv, k=6, p=1 |
| 32x11x11 | MaxP, k=2, s=2 |
| 32x5x5   | Conv, k=2, p=1 |
| 85x5x5   | MaxP, k=2, s=2 |
| 85x2x2   | Conv, k=3, p=1 | 
| 128x2x2  | Conv, k=3, p=1 | 
| 128x2x2  | Conv, k=3, p=1 | 
| 85x2x2   | MaxP, k=2, s=2 |
| 85x1x1   | Flatten        |
| 85       | Lin, 85->85    |
| 85       | Lin, 85->10    |

In [5]:
class AlexNet(nn.Module):
    # Init method
    def __init__(self, use_weight_sharing_, use_auxiliary_loss_):
        super().__init__()
        self.use_weight_sharing = use_weight_sharing_
        self.use_auxiliary_loss = use_auxiliary_loss_
        self.conv_out = 85
        
        # X1
        self.x1_conv1 = nn.Conv2d(1, 32, kernel_size=6, padding=1)
        self.x1_conv2 = nn.Conv2d(32, 85, kernel_size=2, padding=1)
        self.x1_nest1 = nn.Conv2d(85, 128, kernel_size=3, padding=1)
        self.x1_nest2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.x1_nest3 = nn.Conv2d(128, 85, kernel_size=3, padding=1)
        self.x1_fc1 = nn.Linear(self.conv_out, self.conv_out)
        self.x1_fc2 = nn.Linear(self.conv_out, 10)
        
        # X2
        if not self.use_weight_sharing:
            self.x2_conv1 = nn.Conv2d(1, 32, kernel_size=6, padding=1)
            self.x2_conv2 = nn.Conv2d(32, 85, kernel_size=2, padding=1)
            self.x2_nest1 = nn.Conv2d(85, 128, kernel_size=3, padding=1)
            self.x2_nest2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
            self.x2_nest3 = nn.Conv2d(128, 85, kernel_size=3, padding=1)
            self.x2_fc1 = nn.Linear(self.conv_out, self.conv_out)
            self.x2_fc2 = nn.Linear(self.conv_out, 10)
        
        # Combine
        self.comp_fc1 = nn.Linear(20, 100)
        self.comp_fc2 = nn.Linear(100, 50)
        self.comp_fc3 = nn.Linear(50, 2)
        # Dropout
        self.dropout=nn.Dropout()
        
    # Forward method    
    def forward(self, x):  # Remove relus?
        # X1
        x1 = F.relu(F.max_pool2d(self.x1_conv1(x[:,0:1]), kernel_size=2, stride=2))
        x1 = F.relu(F.max_pool2d(self.x1_conv2(x1), kernel_size=2, stride=2))
        x1 = F.relu(F.max_pool2d(self.x1_nest3(self.x1_nest2(self.x1_nest1(x1))), kernel_size=2, stride=2))
        x1 = x1.reshape(x1.size(0), -1)
        x1 = self.dropout(F.relu(self.x1_fc1(x1)))   # use dropout?
        x1 = self.x1_fc2(x1)
        
        # X2
        if not self.use_weight_sharing:
            x2 = F.relu(F.max_pool2d(self.x2_conv1(x[:,1:2]), kernel_size=2, stride=2))
            x2 = F.relu(F.max_pool2d(self.x2_conv2(x2), kernel_size=2, stride=2))
            x2 = F.relu(F.max_pool2d(self.x2_nest3(self.x2_nest2(self.x2_nest1(x2))), kernel_size=2, stride=2))
            x2 = x2.reshape(x2.size(0), -1)
            x2 = self.dropout(F.relu(self.x2_fc1(x2)))   # use dropout?
            x2 = self.x2_fc2(x2)
        else: 
            x2 = F.relu(F.max_pool2d(self.x1_conv1(x[:,1:2]), kernel_size=2, stride=2))
            x2 = F.relu(F.max_pool2d(self.x1_conv2(x2), kernel_size=2, stride=2))
            x2 = F.relu(F.max_pool2d(self.x1_nest3(self.x1_nest2(self.x1_nest1(x2))), kernel_size=2, stride=2))
            x2 = x2.reshape(x2.size(0), -1)
            x2 = self.dropout(F.relu(self.x1_fc1(x2)))   # use dropout?
            x2 = self.x1_fc2(x2)
        
        # Combine
        x = F.relu(self.comp_fc1(torch.cat((x1, x2), 1)))
        x = F.relu(self.comp_fc2(x))
        x = torch.sigmoid(self.comp_fc3(x))
        if self.use_auxiliary_loss:
            return x, x1.softmax(1), x2.softmax(1)
        else:
            return x

# Methods for Training and Testing

## Train Model

In [6]:
def train_model(model_type, train_input, train_target, train_classes, test_input, test_target, test_classes, mini_batch_size, nb_epochs, nb_iterations):
    # Initialize model parameters
    model = None
    eta = 1e-1
    alpha = 0.1
    net_types = [(False, False), (True, False), (False, True), (True, True)]
    
    # Iterate for four possible cases
    for (use_weight_sharing, use_auxiliary_loss) in net_types:
        print("Training", "-"*100)
        print("Using Weight Sharing:", use_weight_sharing)
        print("Using Auxiliary Loss:", use_auxiliary_loss)        
        #
        #total_errors = 0.0
        all_errors = torch.empty(nb_iterations)
        # Iterate for performance estimation
        for k in range(nb_iterations):
            # Define the training model
            if model_type == "baseline":
                model = BaselineNet(use_weight_sharing, use_auxiliary_loss)
            elif model_type == "alex":
                model = AlexNet(use_weight_sharing, use_auxiliary_loss)
            elif model_type == "deep":
                model = DeepNet(use_weight_sharing, use_auxiliary_loss) # Undefined
            elif model_type == "very_deep":
                model = VeryDeepNet(use_weight_sharing, use_auxiliary_loss) # Undefined
            model.to(device)
            #
            optimizer= torch.optim.SGD(model.parameters(), lr = eta)
            criterion = nn.CrossEntropyLoss()
            
            # Iterate over several epochs
            for e in range(nb_epochs):
                # Iterate over mini-batches
                for b in range(0, train_input.size(0), mini_batch_size):
                    if use_auxiliary_loss:
                        output, output2, output3 = model(train_input.narrow(0, b, mini_batch_size).to(device))
                        loss1 = criterion(output, train_target.narrow(0, b, mini_batch_size).to(device))
                        loss2 = criterion(output2, train_classes[:, 0].narrow(0, b, mini_batch_size).to(device))
                        loss3 = criterion(output3, train_classes[:, 1].narrow(0, b, mini_batch_size).to(device))
                        # Loss = weighted average of the three losses
                        loss = (1-alpha)*loss1 + alpha*(loss2 + loss3)
                    else:
                        output = model(train_input.narrow(0, b, mini_batch_size).to(device))
                        loss = criterion(output, train_target.narrow(0, b, mini_batch_size).to(device))
                    # Backprop
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
            
            # Compute number of errors
            if use_auxiliary_loss:
                (nb_errors, nb_errors2, nb_errors3) = compute_nb_errors(model, test_input, test_target, test_classes)
                error = (100 * nb_errors) / test_input.size(0)
                error2 = (100 * nb_errors2) / test_classes.size(0)
                error3 = (100 * nb_errors3) / test_classes.size(0)
                print('test error Net {:0.2f}% {:d}/{:d}'.format(error, nb_errors, test_input.size(0)))
                print('test error X1 {:0.2f}% {:d}/{:d}'.format(error2, nb_errors2, test_classes.size(0))) # Originally test_targets
                print('test error X2 {:0.2f}% {:d}/{:d}'.format(error3, nb_errors3, test_classes.size(0))) 
            else:
                nb_errors = compute_nb_errors(model, test_input, test_target, test_classes)
                error = (100 * nb_errors) / test_input.size(0)
                print('test error Net {:0.2f}% {:d}/{:d}'.format(error, nb_errors, test_input.size(0)))
            #
            #total_errors += nb_errors
            all_errors[k] = error
        #
        print('Standard Deviation: {:0.2f}%'.format(all_errors.std().item()))
        print('Mean Error: {:0.2f}%'.format(all_errors.mean().item()))
        #print("Avg. Error: ", (total_errors/(10*nb_iterations)), "%")

## Count Errors

In [7]:
# Compute number of errors
def compute_nb_errors(model, test_input, test_target, test_classes):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
    nb_errors = 0
    if model.use_auxiliary_loss:
        o1, o2, o3 = model(test_input.to(device))
    else:
        o1 = model(test_input.to(device))
    #
    output1 = torch.argmax(o1, dim = 1)
    expected1 = test_target.to(device)
    nb_errors = torch.count_nonzero((expected1 != output1))
    #
    if model.use_auxiliary_loss:
        output2 = torch.argmax(o2, dim = 1)
        expected2 = test_classes[:,0].to(device)
        nb_errors2 = torch.count_nonzero((expected2 != output2))
        output3 = torch.argmax(o3, dim = 1)
        expected3 = test_classes[:,1].to(device)
        nb_errors3 = torch.count_nonzero((expected3 != output3))
        return (nb_errors, nb_errors2, nb_errors3)
    else:
         return nb_errors

# Deployment

### BaselineNet

In [8]:
# Initialize parameters
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_type = 'baseline'
nb_iterations = 10
nb_epochs = 50
mini_batch_size = 100
# Use normalised inputs
m1 = train_input.mean()
s1 = train_input.std()
norm_train_input = train_input.sub_(m1).div_(s1)
norm_test_input = test_input.sub_(m1).div_(s1)
#
print('Using device:', device)
train_model(model_type, norm_train_input, train_target, train_classes, norm_test_input, test_target, test_classes, 
            mini_batch_size, nb_epochs, nb_iterations)

Using device: cpu
Training ----------------------------------------------------------------------------------------------------
Using Weight Sharing: False
Using Auxiliary Loss: False
test error Net 23.20% 232/1000
test error Net 30.20% 302/1000
test error Net 26.60% 266/1000
test error Net 22.70% 227/1000
test error Net 22.80% 228/1000
test error Net 21.80% 218/1000
test error Net 30.10% 301/1000
test error Net 22.20% 222/1000
test error Net 21.20% 212/1000
test error Net 25.20% 252/1000
Standard Deviation: 3.33%
Mean Error: 24.60%
Training ----------------------------------------------------------------------------------------------------
Using Weight Sharing: True
Using Auxiliary Loss: False
test error Net 20.40% 204/1000
test error Net 19.40% 194/1000
test error Net 21.90% 219/1000
test error Net 17.60% 176/1000
test error Net 18.20% 182/1000
test error Net 18.30% 183/1000
test error Net 24.40% 244/1000
test error Net 18.80% 188/1000
test error Net 16.50% 165/1000
test error Net 17

### AlexNet

In [9]:
# Initialize parameters
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_type = 'alex'
nb_iterations = 10
nb_epochs = 50
mini_batch_size = 100
# Use normalised inputs
m1 = train_input.mean()
s1 = train_input.std()
norm_train_input = train_input.sub_(m1).div_(s1)
norm_test_input = test_input.sub_(m1).div_(s1)
#
print('Using device:', device)
train_model(model_type, norm_train_input, train_target, train_classes, norm_test_input, test_target, test_classes, 
            mini_batch_size, nb_epochs, nb_iterations)

Using device: cpu
Training ----------------------------------------------------------------------------------------------------
Using Weight Sharing: False
Using Auxiliary Loss: False
test error Net 47.40% 474/1000
test error Net 47.40% 474/1000
test error Net 47.40% 474/1000
test error Net 47.40% 474/1000
test error Net 47.40% 474/1000
test error Net 47.40% 474/1000
test error Net 47.40% 474/1000
test error Net 47.40% 474/1000
test error Net 47.40% 474/1000
test error Net 47.40% 474/1000
Standard Deviation: 0.00%
Mean Error: 47.40%
Training ----------------------------------------------------------------------------------------------------
Using Weight Sharing: True
Using Auxiliary Loss: False


RuntimeError: mat1 and mat2 shapes cannot be multiplied (100x10 and 85x85)

# Part below not to be submitted

In [None]:
# # For development purposes
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model_type = 'alex'
# nb_iterations = 3
# nb_epochs = 10
# mini_batch_size = 2
# #
# ex_train_input = train_input[0:10]
# ex_train_target = train_target[0:10]
# ex_train_classes = train_classes[0:10]
# #
# ex_test_input = test_input[0:10]
# ex_test_target = test_target[0:10]
# ex_test_classes = test_classes[0:10]
# #
# print('Using device:', device)
# train_model(model_type, ex_train_input, ex_train_target, ex_train_classes, ex_test_input, ex_test_target, ex_test_classes, 
#             mini_batch_size, nb_epochs, nb_iterations)

In [None]:
# # Without normalisation
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model_type = 'shallow'
# nb_iterations = 10
# nb_epochs = 50
# mini_batch_size = 100
# #
# print('Using device:', device)
# train_model(model_type, train_input, train_target, train_classes, test_input, test_target, test_classes, 
#             mini_batch_size, nb_epochs, nb_iterations)

In [None]:
# m1 = train_input.mean()
# s1 = train_input.std()
# x_train=train_input.sub_(m1).div_(s1)
# x_test=test_input.sub_(m1).div_(s1)

# def execute_norm(model_nm, nb_iterations, nb_epochs=20):
#     model=None
#     nb_epochs = nb_epochs
#     mini_batch_size = 50
#     eta = 1e-1
#     use_gpu=True
#     tot_err=0
#     for k in range(nb_iterations):
#         if(model_nm=="base"):
#             model = Baseline_Net()
#         else:
#             model = WtSharing_Net()
#         optimizer = torch.optim.SGD(model.parameters(), lr = eta)
#         criterion = nn.CrossEntropyLoss()#MSELoss()
#         train_model(model, x_train, train_target, train_classes, mini_batch_size, optimizer, criterion, nb_epochs, use_gpu)
#         nb_test_errors = compute_nb_errors(model, x_test, test_target, test_classes, mini_batch_size,use_gpu)
#         tot_err+=nb_test_errors
#         print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / x_test.size(0),
#                                                       nb_test_errors, x_test.size(0)))
#     print("Avg. Error: ",(tot_err/(10*nb_iterations)), "%")    


In [None]:
# from torchsummary import summary
# print(summary(model_base.to("cuda"),input_size=(2, 14, 14)))
# print(summary(model_ws.to("cuda"),input_size=(2, 14, 14)))

In [None]:
# !pip install torchsummary 