# Practical Session 4

In [1]:
# Imports
import torch
from torch import nn
from torch.nn import functional as F

import dlc_practical_prologue as prologue

In [2]:
# Load data
train_input, train_target, test_input, test_target = \
    prologue.load_data(one_hot_labels = True, normalize = True, flatten = False, full=False)

# Move to GPU
if torch.cuda.is_available:
    device = torch.device("cuda")
    train_input = train_input.to(device) 
    train_target = train_target.to(device)
    test_input = test_input.to(device)
    test_target = test_target.to(device)

* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples


## Training function

Re-organize the code to define and use a function

`train_model(model, train_input, train_target, mini_batch_size)`

In [3]:
class Net(nn.Module):
    def __init__(self, hidden_units=200):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(256, hidden_units)
        self.fc2 = nn.Linear(hidden_units, 10)
        # Hyperparameters
        self.eta = 1e-1
        self.nb_epochs = 25
        self.criterion = nn.MSELoss()

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

    def train_model(self, train_input, train_target, mini_batch_size, verbose=False):
        # Iterate over epochs
        for e in range(self.nb_epochs):
            acc_loss = 0
            # Iterate over mini-batches
            for b in range(0, train_input.size(0), mini_batch_size):
                print(b)
                print(train_input.narrow(0, b, mini_batch_size))
                output = self(train_input.narrow(0, b, mini_batch_size))
                loss = self.criterion(output, train_target.narrow(0, b, mini_batch_size))
                acc_loss = acc_loss + loss.item()

                model.zero_grad()
                loss.backward()
                with torch.no_grad():
                    for p in model.parameters():
                        p -= self.eta * p.grad
            if verbose:
                print(e, acc_loss)

In [4]:
model = Net().to(device)

model.train_model(train_input, train_target, 100)

## Test error

Write and test a function  
`compute_nb_errors(model, input, target, mini_batch_size)`  
To compute the number of prediction mistakes using a "winner-take-all" rule, that is the class with
the largest output is the predicted one.  

Run the training and test ten times, record the test error rates.  
With 25 epochs for training, the test error should be around 10% with the small sets, and around
0.7% with the full ones.

In [5]:
def compute_nb_errors(self, input, target, mini_batch_size):
    y_predicted = torch.argmax(model.forward(test_input), dim=1)
    y = torch.argmax(target, dim=1)

    # Compare predicted labels with true targets
    e = (y_predicted != y).sum() / target.size(0)
    return e

# Add method to class
model.compute_nb_errors = compute_nb_errors.__get__(model)

# Test trained model
error = model.compute_nb_errors(test_input, test_target, 100)
print("Test error: {:.2f}%".format(error*100))

Test error: 11.60%


## Influence of the number of hidden units

In the default network, the number of hidden units is 200.  
Modify the class constructor to take a parameter for that value, and run the training and compute the
test error for 10, 50, 200, 500, and 1,000 hidden units.

In [6]:
hidden_units = [10, 50, 200, 500, 1000, 10000]
for hu in hidden_units:
    # Definition and training
    model = Net(hidden_units = hu).to(device)
    model.nb_epochs = 100
    model.train_model(train_input, train_target, 100)

    # Testing
    model.compute_nb_errors = compute_nb_errors.__get__(model)
    error = model.compute_nb_errors(test_input, test_target, 100)
    print("Test error using {:d} hidden units: {:.2f}%".format(hu, error*100))

Test error using 10 hidden units: 12.40%
Test error using 50 hidden units: 6.50%
Test error using 200 hidden units: 7.40%
Test error using 500 hidden units: 5.20%
Test error using 1000 hidden units: 5.30%
Test error using 10000 hidden units: 39.50%


## Three convolutional layers

Write a new class Net2 with three convolutional layers. Pick the structure you want.

In [9]:
class Net2(Net):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=2)
        self.conv3 = nn.Conv2d(64, 256, kernel_size=2)
        self.fc1 = nn.Linear(256, 200)
        self.fc2 = nn.Linear(200, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(F.max_pool2d(self.conv3(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

    def compute_nb_errors(self, input, target, mini_batch_size):
        return compute_nb_errors(self, input, target, mini_batch_size)

In [12]:
# Initialize model
model = Net2().to(device)
model.nb_epochs = 100

# Train model
model.train_model(train_input, train_target, 100)

# Evaluate model
error = model.compute_nb_errors(test_input, test_target, 100)
print("Test error: {:.2f}%".format(error*100))

Test error: 11.50%
