# Project 1 - Classification, weight sharing, auxiliary losses

This notebook contains my ideas concerning the first question of the project. 

>The goal of the project is to compare different architectures, and assess the performance improvement
that can be achieved through weight sharing, or using auxiliary losses. For the latter, the training can
in particular take advantage of the availability of the classes of the two digits in each pair, beside the
Boolean value truly of interest. 

>All the experiments should be done with 1000 pairs for training and test. A convnet with around 70000
parameters can be trained with 25 epochs in the VM in less than 2s and should achieve around 15% error
rate. 

>Performance estimates provided in your report should be estimated through 10+ rounds for each
architecture, where both data and weight initialization are randomized, and you should provide estimates
of standard deviations.

## TODO:
- get rid of aux network

In [1]:
# Imports
import torch

from torch import nn
from torch.nn import functional as F
from torch import optim
from torchvision import datasets
from torch.utils.tensorboard import SummaryWriter

# Models
from models_felix import *

# Import data generating function
from dlc_practical_prologue import generate_pair_sets

# Matplot
import matplotlib.pyplot as plt

## Import data

In [2]:
# Get data
train_input, train_target, train_classes, test_input, test_target, test_classes = generate_pair_sets(nb=1000)

# Print dimensions
print(train_input.size())
print(train_target.size())
print(train_classes.size())

torch.Size([1000, 2, 14, 14])
torch.Size([1000])
torch.Size([1000, 2])


In [3]:
# Normalize data
train_mean = train_input.mean()
train_std = train_input.std()

train_input -= train_mean
test_input -= train_mean

train_input /= train_std
test_input /= train_std

## Training and testing function

In [4]:
def testing(X_test, y_test, model, y_aux = None):
    # Put model in test model
    model.eval()

    if y_aux == None:
        out = model(X_test).argmax(dim=1)
        err = (out != y_test).sum()
        return err/y_test.size(0)
    else:
        out, out_aux = model(X_test)
        out, out_aux = out.argmax(dim=1), out_aux.argmax(dim=1)
        err = (out != y_test).sum()
        err_aux = (out_aux != y_aux).sum()
        return err/y_test.size(0), err_aux/y_aux.size(0)

In [5]:
def training(model, 
            train_input, train_target, train_classes,
            test_input, test_target, test_classes,
            lr = 0.1,
            schedule_lr = True, # Learning rate sheduling
            tb_logging = True, # Tensorboard logging
            mini_batch_size = 100,
            nb_epochs = 1,
            verbose = True):

    # Parmas
    print_every = int(0.1 * nb_epochs)
    if print_every == 0: print_every = 1

    # Optimizer
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    # Loss criterion
    criterion = nn.CrossEntropyLoss()

    # Logging
    writer = SummaryWriter()

    # Lerning rate sheduler
    if schedule_lr:
        nb_steps = 10
        step_size = 1 if nb_epochs <= nb_steps else int(nb_epochs/nb_steps)
        gamma = 0.65
        lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

    # Iterate over epochs
    for e in range(nb_epochs):
        # Learning rate sheduler step
        if e != 0 and schedule_lr: lr_scheduler.step()

        # Set model in training mode
        model.train()

        # Iterate over minibatches
        for b in range(0, train_input.size(0), mini_batch_size):
            optimizer.zero_grad()

            if model.loss_mode == 'train_with_aux':
                out, out_aux = model(train_input.narrow(0, b, mini_batch_size))

                loss_target = criterion(out, train_target.narrow(0, b, mini_batch_size))
                loss_aux = criterion(out_aux, train_classes.narrow(0, b, mini_batch_size).T.reshape(-1))
                loss = loss_target + loss_aux
            elif model.loss_mode == 'train_aux_only':
                out_aux = model(train_input.narrow(0, b, mini_batch_size))

                loss = criterion(out_aux, train_classes.narrow(0, b, mini_batch_size).T.reshape(-1))
            elif model.loss_mode == 'train_without_aux':
                out = model(train_input.narrow(0, b, mini_batch_size))

                loss = criterion(out, train_target.narrow(0, b, mini_batch_size))

            loss.backward()
            optimizer.step()

        # Logging
        if tb_logging:
            # Get error rates
            error_rate_test = testing(test_input, test_target, model)
            error_rate_training = testing(train_input, train_target, model)

            # Write
            writer.add_scalar('Accuracy/testing', 1-error_rate_test, e)
            writer.add_scalar('Accuracy/training', 1-error_rate_training, e)
            writer.add_scalar('Loss/training', loss)

        # Printing
        if verbose and ((e+1) % print_every == 0 or e+1==nb_epochs):
            # Get error rate
            if model.loss_mode == 'train_aux_only':
                test_classes = test_classes.T.reshape(-1)
                error_rate_test = testing(test_input, test_classes, model)
                print("### Epoch {:3d}: Auxillary error ={:.2f}% ###".format(e+1, error_rate_test*100))
            elif model.loss_mode == 'train_without_aux':
                error_rate_test = testing(test_input, test_target, model)
                print("### Epoch {:3d}: Target error ={:.2f}% ###".format(e+1, error_rate_test*100))
            elif model.loss_mode == 'train_with_aux':
                test_classes = test_classes.T.reshape(-1)
                error_rate_target_test, error_rate_aux_test = testing(test_input, test_target, model, test_classes)
                print("### Epoch {:3d}: Auxillary error ={:.2f}%, Target error ={:.2f}% ###".format(e+1, error_rate_aux_test*100, error_rate_target_test*100))
        
    # Loggging
    if tb_logging:
        writer.close()

    return model

## Let's do it!
### Auxillary loss only (MNIST classification)

In [6]:
# Define model
convNet = SimpleConvNet()
auxNet = AuxNet(in_features=576)
classNet = None
mode = 'train_aux_only'

model1 = CombinedBaseModel(ConvNet=convNet, AuxNet=auxNet, ClassNet=classNet, mode=mode)

In [7]:
# Train it
model1 = training(model1,
                train_input, train_target, train_classes,
                test_input, test_target, test_classes,
                lr = 0.1,
                nb_epochs=25, tb_logging=False, schedule_lr=False)

### Epoch   2: Auxillary error =78.10% ###
### Epoch   4: Auxillary error =57.00% ###
### Epoch   6: Auxillary error =14.30% ###
### Epoch   8: Auxillary error =43.60% ###
### Epoch  10: Auxillary error =6.35% ###
### Epoch  12: Auxillary error =6.35% ###
### Epoch  14: Auxillary error =6.15% ###
### Epoch  16: Auxillary error =5.85% ###
### Epoch  18: Auxillary error =5.45% ###
### Epoch  20: Auxillary error =5.30% ###
### Epoch  22: Auxillary error =5.50% ###
### Epoch  24: Auxillary error =5.45% ###
### Epoch  25: Auxillary error =5.35% ###


### Target loss only

In [8]:
# Define model
convNet = SimpleConvNet()
auxNet = None
classNet = ClassNet(in_features=1152)
mode = 'train_without_aux'

model2 = CombinedBaseModel(ConvNet=convNet, AuxNet=auxNet, ClassNet=classNet, mode=mode)

In [9]:
# Train it
model2 = training(model2,
                train_input, train_target, train_classes,
                test_input, test_target, test_classes,
                lr = 0.1,
                nb_epochs=25, tb_logging=False, schedule_lr=False)

### Epoch   2: Target error =32.00% ###
### Epoch   4: Target error =54.00% ###
### Epoch   6: Target error =25.20% ###
### Epoch   8: Target error =25.00% ###
### Epoch  10: Target error =17.90% ###
### Epoch  12: Target error =30.50% ###
### Epoch  14: Target error =19.50% ###
### Epoch  16: Target error =18.70% ###
### Epoch  18: Target error =18.40% ###
### Epoch  20: Target error =18.40% ###
### Epoch  22: Target error =18.90% ###
### Epoch  24: Target error =15.40% ###
### Epoch  25: Target error =16.90% ###


### Using auxillary and target loss

In [10]:
# Define model
convNet = SimpleConvNet()
auxNet = AuxNet(in_features=576)
classNet = ClassNet(in_features=1152)
mode = 'train_with_aux'

model3 = CombinedBaseModel(ConvNet=convNet, AuxNet=auxNet, ClassNet=classNet, mode=mode)

In [11]:
# Train it
model3 = training(model3,
                train_input, train_target, train_classes,
                test_input, test_target, test_classes,
                lr = 0.1,
                nb_epochs=25, tb_logging=False, schedule_lr=False)

### Epoch   2: Auxillary error =75.25%, Target error =37.50% ###
### Epoch   4: Auxillary error =30.00%, Target error =27.30% ###
### Epoch   6: Auxillary error =39.25%, Target error =26.70% ###
### Epoch   8: Auxillary error =29.65%, Target error =18.40% ###
### Epoch  10: Auxillary error =9.25%, Target error =17.40% ###
### Epoch  12: Auxillary error =9.35%, Target error =15.30% ###
### Epoch  14: Auxillary error =9.65%, Target error =16.00% ###
### Epoch  16: Auxillary error =7.15%, Target error =13.80% ###
### Epoch  18: Auxillary error =8.80%, Target error =13.20% ###
### Epoch  20: Auxillary error =10.60%, Target error =18.90% ###
### Epoch  22: Auxillary error =6.25%, Target error =13.20% ###
### Epoch  24: Auxillary error =6.90%, Target error =13.70% ###
### Epoch  25: Auxillary error =6.30%, Target error =13.00% ###


### First training auxillary network, than target network

In [12]:
# Define model
convNet = SimpleConvNet()
auxNet = AuxNet(in_features=576)
classNet = ClassNet(in_features=1152)
mode = 'train_aux_only'

model3 = CombinedBaseModel(ConvNet=convNet, AuxNet=auxNet, ClassNet=classNet, mode=mode)

print("Training auxillary network:")
model3 = training(model3,
                train_input, train_target, train_classes,
                test_input, test_target, test_classes,
                lr = 0.1,
                nb_epochs=10, tb_logging=False, schedule_lr=False)

# Changing loss mode
mode = 'train_without_aux'
model3.loss_mode = mode

print("Training without auxillary network:")
model3 = training(model3,
                train_input, train_target, train_classes,
                test_input, test_target, test_classes,
                lr = 0.1,
                nb_epochs=15, tb_logging=False, schedule_lr=False)

Training auxillary network:
### Epoch   1: Auxillary error =50.40% ###
### Epoch   2: Auxillary error =87.95% ###
### Epoch   3: Auxillary error =51.75% ###
### Epoch   4: Auxillary error =44.70% ###
### Epoch   5: Auxillary error =53.50% ###
### Epoch   6: Auxillary error =23.05% ###
### Epoch   7: Auxillary error =15.95% ###
### Epoch   8: Auxillary error =14.65% ###
### Epoch   9: Auxillary error =9.50% ###
### Epoch  10: Auxillary error =9.75% ###
Training without auxillary network:
### Epoch   1: Target error =27.40% ###
### Epoch   2: Target error =22.40% ###
### Epoch   3: Target error =21.40% ###
### Epoch   4: Target error =19.00% ###
### Epoch   5: Target error =19.40% ###
### Epoch   6: Target error =17.80% ###
### Epoch   7: Target error =16.20% ###
### Epoch   8: Target error =21.10% ###
### Epoch   9: Target error =15.90% ###
### Epoch  10: Target error =15.60% ###
### Epoch  11: Target error =15.90% ###
### Epoch  12: Target error =15.40% ###
### Epoch  13: Target error 