# MINIPROJECT 1

## Classification, weight sharing, auxiliary losses


The objective of this project is to test different architectures to compare two digits visible in a two-channel image. It aims at showing in particular the impact of weight sharing, and of the use of an auxiliary loss to help the training of the main objective.

### 0. Import libraries

In [32]:
import torch
from torch.autograd import Variable
from torch import nn, optim
from torch.nn import functional as F

from dlc_practical_prologue import generate_pair_sets

### 1. Generate data

In [56]:
train_input, train_target, train_classes, test_input, test_target, test_classes = generate_pair_sets(1000)

def normalize_data(train_input, test_input):
    mu, std = train_input.mean(), train_input.std()
    train_input.sub_(mu).div_(std)
    test_input.sub_(mu).div_(std)
    
def to_one_hot(tensor):
    one_hot = torch.zeros((tensor.size(0), 10)).type(torch.FloatTensor)
    one_hot[list(range(0,tensor.size(0))), tensor[:,0]] = 1
    return one_hot

train_input = train_input.reshape((train_input.size(0)*2, 1, train_input.size(2), train_input.size(3)))
train_classes = train_classes.reshape((train_classes.size(0)*2, 1))
test_input = test_input.reshape((test_input.size(0)*2, 1, test_input.size(2), test_input.size(3)))
test_classes = test_classes.reshape((test_classes.size(0)*2, 1))

train_classes_one_hot = to_one_hot(train_classes)
test_classes_one_hot = to_one_hot(test_classes)

#train_target = train_target.type(torch.FloatTensor)
#test_target = test_target.type(torch.FloatTensor)

train_target_bin = torch.zeros((train_target.size(0), 2))
train_target_bin[list(range(train_target.size(0))), train_target[:]] = 1

test_target_bin = torch.zeros((test_target.size(0), 2))
test_target_bin[list(range(test_target.size(0))), test_target[:]] = 1

normalize_data(train_input, test_input)

### 2. Define train function and other auxiliary functions that can be necessary


In [104]:
def train_model_conv(model, train_input, train_target, epochs=25, \
                mini_batch_size=100, lr=1e-3, criterion=None, optimizer=None, verbose=2):
    
    print('Training Conv Net')
    
    # use MSE loss by default
    if not criterion:
        criterion = nn.MSELoss()
        
    # use SGD by default
    if not optimizer:
        optimizer = optim.SGD(model.parameters(), lr = lr)

    
    for e in range(epochs):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss.backward()
            sum_loss = sum_loss + loss.item()
            optimizer.step()
            #for p in model.parameters():
            #    p.data.sub_(lr * p.grad.data)
        
        if verbose == 0: print('Epoch: {}, loss: {:0.2f}'.format(e, sum_loss))
        elif verbose == 1 and e%5 == 0: print(e, sum_loss)
            

def train_model_fc(model, train_input, train_target, epochs=25, \
                mini_batch_size=100, lr=1e-3, criterion=None, optimizer=None, verbose=2):
    print('Training Fully connected net')
    
    # use MSE loss by default
    if not criterion:
        criterion = nn.MSELoss()
        
    # use SGD by default
    if not optimizer:
        optimizer = optim.SGD(model.parameters(), lr = lr)

    
    for e in range(epochs):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss.backward(retain_graph=True)
            sum_loss = sum_loss + loss.item()
            optimizer.step()
            #for p in model.parameters():
            #    p.data.sub_(lr * p.grad.data)
        
        if verbose == 0: print('Epoch: {}, loss: {:0.2f}'.format(e, sum_loss))
        elif verbose == 1 and e%5 == 0: print(e, sum_loss)
            


def compute_nb_errors(model, input, target, mini_batch_size=100):
    errors = 0

    for b in range(0, input.size(0), mini_batch_size):
        output = model(input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.data.max(1)
        
        for k in range(mini_batch_size):
            if target.data[b + k, predicted_classes[k]] <= 0:
                errors = errors + 1
    return errors


    


In [118]:

class Net_Conv(nn.Module):
    def __init__(self, nb_hidden):
        super(Net_Conv, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=1))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x    
    

class Net_Full(nn.Module):
    def __init__(self):
        super(Net_Full, self).__init__()
        self.fc1 = nn.Linear(20, 100)
        self.fc2 = nn.Linear(100,200)
        self.fc3 = nn.Linear(200, 2)
    
    def forward(self,x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

### 3. Generate MLP 1 - with aux loss


In [119]:
train_input, train_target, train_classes_one_hot = Variable(train_input), Variable(train_target), Variable(train_classes_one_hot)
test_input, test_target, test_classes_one_hot = Variable(test_input), Variable(test_target), Variable(test_classes_one_hot)


nb_error_test = []
for k in range(10):
    model1 = Net_Conv(200)
    model2 = Net_Full()
    train_model_conv(model1, train_input, train_classes_one_hot, lr=1, verbose=2)
    train_model_fc(model2, train_classes_one_hot.view(1000,20), train_target_bin, lr=5e-1, verbose=2)
    
    out = model2(model1(test_input).view(1000, 20))
    _, argm = out.max(1)
    nb_test_errors = 1000 - (argm == test_target).sum(0)
    nb_error_test.append(100.0*nb_test_errors/test_target.size(0))
    
    #nb_test_errors_n1 = compute_nb_errors(model1, test_input, test_classes_one_hot)
    #nb_error_test += nb_test_errors_n1
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_target.size(0),
                                                      nb_test_errors, test_target.size(0)))

print('Test error mu: {:0.2f}% '.format(sum(nb_error_test)/len(nb_error_test)))


Training Conv Net
Training Fully connected net
test error Net 9.00% 45/1000
Training Conv Net
Training Fully connected net
test error Net 8.00% 40/1000
Training Conv Net
Training Fully connected net
test error Net 11.00% 56/1000
Training Conv Net
Training Fully connected net
test error Net 8.00% 42/1000
Training Conv Net
Training Fully connected net
test error Net 12.00% 64/1000
Test error mu: 4.00% 


### 4. Generate ConvNet 1 - with aux loss 

### 5. Generate MLP 2 - without aux loss

### 6. Generate ConvNet 2 - without aux loss 

In [None]:
### 7. Generate 