In [1]:
import torch
from dlc_practical_prologue import generate_pair_sets
import torch.nn as nn

## 1. Generate raw data, process it

In [2]:
train_input, train_target, train_classes, test_input, test_target, test_classes = generate_pair_sets(1000)

In [3]:
def normalize_data(tensor):
    mu, std = tensor.mean(), tensor.std()
    tmp = tensor.sub(mu).div(std)
    return tmp

In [4]:
def convert_to_one_hot_labels(target):
    tmp = target.new_zeros(target.size(0), target.max() + 1)
    tmp.scatter_(1, target.view(-1, 1), 1.0)
    return tmp

_We normalized the data so it has mean 0 and std 1._

In [5]:
train_input = normalize_data(train_input)


test_input = normalize_data(test_input)

In [6]:
print("Training set mean = {a}".format(a = train_input.mean().item()))
print("Training set std = {s}\n".format(s = train_input.std().item()))
print("Test set mean = {a}".format(a = test_input.mean().item()))
print("Test set std = {s}".format(s = test_input.std().item()))

Training set mean = -1.1463554550061872e-08
Training set std = 1.0

Test set mean = 2.992591134898248e-07
Test set std = 0.9999999403953552


_We will create a validation set to tune hyperparameters. This validation set is created from the training set in order to have fully independent testing data._

_80% of the training data goes to training and the remaining 20% for our validation set._

In [7]:
index_permutation = torch.randperm(train_input.size(0))

validation_proportion = 0.2
split = int(0.2 * train_input.size(0))

validation_index = index_permutation[:split]
training_index = index_permutation[split:]

validation_input = train_input[validation_index]
validation_target = train_target[validation_index]
validation_classes = train_classes[validation_index]

train_input = train_input[training_index]
train_target = train_target[training_index]
train_classes = train_classes[training_index]

## 2. Creating & Training models

In [8]:
import models
import torch.nn.functional as F

In [18]:
BATCH_SIZE = 64

In [19]:
basic_model = models.BaselineNetwork()

In [20]:
def train_model(model, train_input, train_target, train_classes, nb_epoch, batch_size, optimizer_params):
    nb_epoch, batch_size = nb_epoch, batch_size
    lr, momentum = optimizer_params['lr'], optimizer_params['momentum']
    
    optimizer = torch.optim.SGD(model.parameters(), lr = lr, momentum = momentum)
    criterion = nn.CrossEntropyLoss()
    
    for e in range(nb_epoch):
        for inputs, targets in zip(train_input.split(batch_size),
                                  train_target.split(batch_size)):
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

In [21]:
train_model(basic_model, train_input, train_target, train_classes, 50, BATCH_SIZE, {'lr': 0.01, 'momentum':0.9})

In [26]:
def test(test_input, test_target, test_classes, model, criterion):
    
    with torch.no_grad():
        nb_data_errors = 0
        loss_sum = 0
        
        for inputs, targets in zip(test_input.split(BATCH_SIZE),
                                  test_target.split(BATCH_SIZE)):
            
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss_sum += loss
            _, predicted_classes = torch.max(outputs, 1)
            
            for k in range(len(inputs)):
                if targets[k] != predicted_classes[k]:
                    nb_data_errors = nb_data_errors + 1

        accuracy = (1 - (nb_data_errors / test_input.size(0))) * 100

        
        return accuracy, loss_sum.item()

In [27]:
test(test_input, test_target, test_classes, basic_model, nn.CrossEntropyLoss())

(78.9, 8.051664352416992)

In [30]:
learning_rates = [0.001, 0.01] #, 0.1]
momentums = [0.9] #[0.5, 0.7, 0.9]
nb_epochs = [20, 50] #, 100]

best_accuracy = 0
best_params = {}

for lr in learning_rates:
    for momentum in momentums:
        for nb_epoch in nb_epochs:
            
                # creating params for optimizer
                optimizer_params = {'lr':lr, 'momentum':momentum}
                
                # initialize raw model
                model = models.BaselineNetwork()
                
                # train model on training data
                train_model(model, train_input, train_target, train_classes, nb_epoch, BATCH_SIZE, optimizer_params)
                
                # compute accuracy on validation data
                accuracy, loss = test(validation_input, validation_target, validation_classes, model, nn.CrossEntropyLoss())
                            
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_params['lr'] = lr
                    best_params['momentum'] = momentum
                    best_params['nb_epoch'] = nb_epoch
                    
print("Best accuracy obtained = {a}\n".format(a = best_accuracy))
print("with the following hyperparameters:\n")
print(best_params)

Best accuracy obtained = 76.5

with the following hyperparameters:

{'lr': 0.01, 'momentum': 0.9, 'nb_epoch': 50}


## 3. Testing model

_In order to test the model we will generate new data (training and test set), retrained the model on the new data en evaluate it on the new test set. We will do this process more than 10 times and estimates the mean accuracy as well as its standard deviation._

In [35]:
best_optimizer_params = {'lr': 0.01, 'momentum': 0.9}
best_nb_epoch = 50

In [36]:
nb_rounds = 10
test_model = models.BaselineNetwork

In [41]:
def evaluate_model(model, nb_rounds, criterion):
    
    accuracies = []
    
    for round in range(nb_rounds):
        
        # initialize new model
        model_evaluated = model()
        # generate new data
        train_input, train_target, train_classes, test_input, test_target, test_classes = generate_pair_sets(1000)
        train_input = normalize_data(train_input)
        test_input = normalize_data(test_input)
        
        train_model(model_evaluated, train_input, train_target, train_classes, best_nb_epoch, BATCH_SIZE, best_optimizer_params)
        
        accuracy, loss = test(test_input, test_target, test_classes, model_evaluated, criterion)
        
        print("Round {i}: accuracy = {a}% | loss = {l}".format(i = (round + 1), a = accuracy, l = loss))
        
        accuracies.append(accuracy)
        
    return torch.FloatTensor(accuracies)

In [42]:
accuracies = evaluate_model(test_model, nb_rounds, nn.CrossEntropyLoss())

Round 1: accuracy = 80.3% | loss = 9.005359649658203
Round 2: accuracy = 80.89999999999999% | loss = 7.677920341491699
Round 3: accuracy = 79.80000000000001% | loss = 9.256414413452148
Round 4: accuracy = 77.8% | loss = 9.148301124572754
Round 5: accuracy = 80.1% | loss = 8.6519136428833
Round 6: accuracy = 78.10000000000001% | loss = 8.843724250793457
Round 7: accuracy = 78.0% | loss = 9.749249458312988
Round 8: accuracy = 81.1% | loss = 7.922359466552734
Round 9: accuracy = 80.3% | loss = 7.184577941894531
Round 10: accuracy = 82.2% | loss = 8.34280776977539


In [43]:
print("The mean accuracy is: {a}".format(a = accuracies.mean()))
print("The accuracy std is: {s}".format(s = accuracies.std()))

The mean accuracy is: 79.86000061035156
The accuracy std is: 1.4660598039627075
