# Miniproject 1 - Classification
## Classification, weight sharing, auxiliary losses

The objective of this project is to test different architectures to compare two digits visible in a two-channel image. It aims at showing in particular the impact of weight sharing, and of the use of an auxiliary loss to help the training of the main objective.

### 0. Import library and define python3 as default

In [3]:
#!/usr/bin/env python3
""" File to solve the first miniproject which is classification """

import random
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import numpy as np

import torch
from torch.autograd import Variable
from torch import nn, optim
from torch.nn import functional as F
import dlc_practical_prologue as prologue


__author__ = 'Eugène Lemaitre, Natalie Bolón Brun, Louis Munier'
__version__ = '0.1'

### 1. Import and Process data

In [4]:
def import_data(N, normalize):
    """Function to import dataset from prologue"""
    train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)

    # Normalize data
    if normalize:
        mu, std = train_input.mean(), train_input.std()
        train_input.sub_(mu).div_(std)
        test_input.sub_(mu).div_(std)

    return train_input, train_classes, train_target, test_input, test_classes, test_target

In [5]:
def to_one_hot(tensor):
    one_hot = torch.zeros((tensor.size(0), 10)).type(torch.FloatTensor)
    one_hot[list(range(0,tensor.size(0))), tensor] = 1
    return one_hot

In [6]:
def split_data(train_input, train_classes, test_input, test_classes):
    """Split data into two set, pictures up/down."""
    train_input_up = Variable(train_input[:, 0, :, :].reshape(train_input.size(0), 1, train_input.size(2), train_input.size(3)))
    train_input_down = Variable(train_input[:, 1, :, :].reshape(train_input.size(0), 1, train_input.size(2), train_input.size(3)))

    train_classes_up = Variable(to_one_hot(train_classes[:,0]))
    train_classes_down = Variable(to_one_hot(train_classes[:,1]))
    
    test_input_up = Variable(test_input[:, 0, :, :].reshape(test_input.size(0), 1, test_input.size(2), test_input.size(3)))
    test_input_down = Variable(test_input[:, 1, :, :].reshape(test_input.size(0), 1, test_input.size(2), test_input.size(3)))
    
    test_classes_up = Variable(to_one_hot(test_classes[:, 0]))
    test_classes_down = Variable(to_one_hot(test_classes[:, 1]))
    
    dict_up = {'train_input':train_input_up, 'train_classes':train_classes_up, 'test_input':test_input_up, 'test_classes':test_classes_up}
    dict_down = {'train_input':train_input_down, 'train_classes':train_classes_down, 'test_input':test_input_down, 'test_classes':test_classes_down}
    
    return dict_up, dict_down

In [7]:
def validation_set(dict_in_up, dict_in_down, train_target, size):
    rnd = []
    other = np.arange(dict_in_up['train_input'].size(0))
    
    for i in range(size):
        rnd.append(random.randint(0, dict_in_up['train_input'].size(0)-1))
        np.delete(other, rnd[-1])
        
    dict_out_up = {'train_input':dict_in_up['train_input'][other,:,:,:], 'train_input_valid':dict_in_up['train_input'][rnd,:,:,:], \
                   'train_classes':dict_in_up['train_classes'][other,:], 'train_classes_valid':dict_in_up['train_classes'][rnd,:], \
                   'test_input':dict_in_up['test_input'], 'test_classes':dict_in_up['test_classes']}
    
    dict_out_down = {'train_input':dict_in_down['train_input'][other,:,:,:], 'train_input_valid':dict_in_down['train_input'][rnd,:,:,:], \
                     'train_classes':dict_in_down['train_classes'][other,:], 'train_classes_valid':dict_in_down['train_classes'][rnd,:], \
                     'test_input':dict_in_down['test_input'], 'test_classes':dict_in_down['test_classes']}
    
    return dict_out_up, dict_out_down, train_target[other], train_target[rnd]

### 2. Define the device to work on CUDA if it is available

In [8]:
def define_device(model, criterion, dict_in, train_target, test_target):
    """Check if cuda is available to run model on it."""
    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
        
    print('\nDevice : {}'.format(device))

    model.to(device)
    criterion.to(device)
    dict_in['train_input'], dict_in['train_classes'] = dict_in['train_input'].to(device), dict_in['train_classes'].to(device)
    dict_in['test_input'], dict_in['test_classes'] = dict_in['test_input'].to(device), dict_in['test_classes'].to(device)
    train_target, test_target = train_target.to(device), test_target.to(device)
    
    return model, criterion, dict_in, train_target, test_target

### 3. Define models

In [9]:
class Net_recognition(nn.Module):
    """Recognition model definition."""
    def __init__(self, nb_hidden):
        super(Net_recognition, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=1))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

In [10]:
class Net_compare(nn.Module):
    """Comparison model definition."""
    def __init__(self, nb_hidden):
        super(Net_compare, self).__init__()
        self.conv1 = nn.Conv1d(1, 16, kernel_size=5)
        self.fc1 = nn.Linear(4, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 1)

    def forward(self, x):
        x = F.relu(F.max_pool1d(self.conv1(x), kernel_size=3, stride=1))
        x = F.relu(self.fc1(x.view(-1, 4)))
        x = self.fc2(x)
        return x

### 4. Define training models

In [11]:
def train_model_recognition(model, criterion, dict_in, epochs, mini_batch_size = 100, lr = 1e-1, verbose = 2):
    """Training recognition model."""
    eta = 1e-1
    nb_errors = []
    output_to_train = torch.zeros(dict_in['train_classes'].size(), dtype = torch.double)
    
    for e in range(epochs):
        sum_loss = 0
        
        for b in range(0, dict_in['train_input'].size(0), mini_batch_size):
            output = model(dict_in['train_input'].narrow(0, b, mini_batch_size))
            loss = criterion(output, dict_in['train_classes'].narrow(0, b, mini_batch_size))
            
            if e == epochs-1:
                output_to_train[b:b+mini_batch_size, :] = output

            model.zero_grad()
            loss.backward()
            sum_loss += loss.item()

            for p in model.parameters():
                p.data.sub_(eta * p.grad.data)

        if e < epochs-1:
            end_print = '\t\t\r'
        else:
            end_print = '\n'
                
        if verbose == 0: print('Epoch: {}, loss: {:0.2f}'.format(e+1, sum_loss), end = end_print)
        elif verbose == 1 and e%5 == 0: print(e+1, sum_loss)
        
        nb_errors.append(compute_nb_errors_recognition(model, dict_in, mini_batch_size))
    
    return nb_errors, output_to_train

In [12]:
def train_model_compare(model, criterion, input_data, target, epochs, mini_batch_size = 100, lr = 1e-1, verbose = 2):
    """Training comparison model."""
    eta = 1e-1
    nb_errors = []
    
    for e in range(epochs):
        sum_loss = 0
        
        for b in range(0, input_data.size(0), mini_batch_size):
            output = model(input_data.narrow(0, b, mini_batch_size))
            loss = criterion(output, input_data.narrow(0, b, mini_batch_size))

            model.zero_grad()
            loss.backward()
            sum_loss += loss.item()

            for p in model.parameters():
                p.data.sub_(eta * p.grad.data)

        if e < epochs-1:
            end_print = '\t\t\r'
        else:
            end_print = '\n'
                
        if verbose == 0: print('Epoch: {}, loss: {:0.2f}'.format(e+1, sum_loss), end = end_print)
        elif verbose == 1 and e%5 == 0: print(e+1, sum_loss)
        
        nb_errors.append(compute_nb_errors_compare(model, input_data, target, mini_batch_size))
    
    return nb_errors

### 5. Compute number of errors

In [13]:
def compute_nb_errors_recognition(model, dict_in, mini_batch_size = 100):
    errors = 0

    for b in range(0, dict_in['train_input_valid'].size(0), mini_batch_size):
        output = model(dict_in['train_input_valid'].narrow(0, b, mini_batch_size))
        _, predicted = output.data.max(1)

        for k in range(mini_batch_size):
            if dict_in['train_classes_valid'].data[b + k, predicted[k]] <= 0:
                errors = errors + 1
    
    return errors*100/dict_in['train_classes_valid'].size(0)

In [14]:
def compute_nb_errors_compare(model, input_data, target, mini_batch_size = 100):
    errors = 0

    for b in range(0, input_data.size(0), mini_batch_size):
        output = model(input_data.narrow(0, b, mini_batch_size))
        _, predicted = output.data.max(1)

        for k in range(mini_batch_size):
            if target.data[b + k, predicted[k]] <= 0:
                errors = errors + 1
    
    return errors*100/target.size(0)

### 6. Adapt the learning rate

In [15]:
def adapt_learning_rate(learning_rate, loss, e):
    """Adapt the leaning rate: divide by two if oscillations are seen, multiply by two if a plateau is reached

    e: epochs
    """
    lst = loss[e - 50:e]
    
    if e > 50:
        # Decreases learning rate if high variation in the loss
        if loss[e] - loss[e - 1] - 0.5 > 0 and e > 5:
            learning_rate = learning_rate/2
            #optimizer = optim.SGD(model.parameters(), lr = learning_rate)
            #print('Decrease learning rate ',learning_rate)

        # Increases learning rate if a plateau is reached
        elif abs(sum(lst)/len(lst) - loss[e]) < 0.05:
            learning_rate = 2*learning_rate
            #optimizer = optim.SGD(model.parameters(), lr = learning_rate)
            #print('Increase learning rate ', learning_rate)
            
    return learning_rate

### Main part

In [18]:
## Define variables
N, normalize = 1000, True
hidden_layer = 200
repeat, validation_size = 5, 200
dict_up_valid = {}
epochs, mini_batch_size, dyn_lr, verbose = 25, 100, 0.5, 0
criterion = nn.MSELoss()

# Import data
train_input, train_classes, train_target, test_input, test_classes, test_target = import_data(N, normalize)

# Main process
loss = []
xdata = np.arange(2*epochs)
nb_error_test = np.empty([repeat, 2*epochs])
nb_final_tests = np.empty([repeat, epochs])

for r in range(repeat):
    # Define models
    model_recognition = Net_recognition(hidden_layer)
    model_compare = Net_compare(hidden_layer)
    
    # Process data
    dict_up, dict_down = split_data(train_input, train_classes, test_input, test_classes)
    model_recognition, criterion, dict_up, train_target, test_target = \
        define_device(model_recognition, criterion, dict_up, train_target, test_target)
    
    # Define validation set
    dict_up_valid, dict_down_valid, new_train_target, train_target_valid = \
        validation_set(dict_up, dict_down, train_target, validation_size)
    
    to_train = torch.zeros([dict_up_valid['train_classes'].size(0), 2*dict_up_valid['train_classes'].size(1)]).type(torch.DoubleTensor)

    # Training models
    nb_error_test[r, :epochs], to_train[:,:dict_up_valid['train_classes'].size(1)] = train_model_recognition(model_recognition, criterion, dict_up_valid, epochs, mini_batch_size, lr = dyn_lr, verbose = verbose)
    nb_error_test[r, epochs:], to_train[:,dict_up_valid['train_classes'].size(1):] = train_model_recognition(model_recognition, criterion, dict_down_valid, epochs, mini_batch_size, lr = dyn_lr, verbose = verbose)
    
    to_train = to_train.reshape(to_train.size(0), 1, to_train.size(1))
    nb_final_tests[r, :] = train_model_compare(model_compare, criterion, to_train.type(torch.DoubleTensor), train_target_valid, epochs, mini_batch_size, lr = dyn_lr, verbose = verbose)
    
    # Dynamically adapt learning rate
    #dyn_lr = adapt_learning_rate(dyn_lr, loss, e)    

plt.figure('Final results')
plt.title('Final results')
plt.xlabel('Epochs [-]') ; plt.ylabel('Errors [%]')
plt.plot(xdata, nb_error_test.mean(0), 'r')
plt.fill_between(xdata, nb_error_test.mean(0) - nb_error_test.std(0), nb_error_test.mean(0) + nb_error_test.std(0),color='gray', alpha=0.2)
print("\n\nLast mean error : {}%".format(nb_error_test[:, -1].mean(0)))


Device : cpu
Epoch: 25, loss: 0.32		
Epoch: 25, loss: 0.23		


RuntimeError: Expected object of scalar type Double but got scalar type Float for argument #2 'weight'