Exercise 2

In [13]:
%matplotlib inline
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader
import copy

# Making the code device-agnostic
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# PyTorch TensorBoard support
from torch.utils.tensorboard import SummaryWriter

## 1.1.2 Transfer Learning from ImageNet

In [15]:
# Train augmentations
train_test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

cifar10_train = datasets.CIFAR10(root='D:/Dev/auto_download_data/D7047E/CIFAR10',
                                             train=True,
                                             download=True,
                                             transform=train_test_transform)
cifar10_test = datasets.CIFAR10(root='D:/Dev/auto_download_data/D7047E/CIFAR10',
                                            train=False,
                                            download=True,
                                            transform=train_test_transform)

Files already downloaded and verified
Files already downloaded and verified


In [16]:
# Hyperparameters
BATCH_SIZE = 100
SHUFFLE = True
LEARNING_RATE = 0.0001
epochs = 1

# Train 80 %, Validation 20 % of test set
train_set, val_set = torch.utils.data.random_split(cifar10_train, [0.8, 0.2], generator=torch.Generator())

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=SHUFFLE)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=SHUFFLE)
test_loader = DataLoader(cifar10_test, batch_size=BATCH_SIZE, shuffle=SHUFFLE)

In [17]:
def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs):
       
    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []
    
    #tb = SummaryWriter()
    
    best_val_loss = 999999
    
    for epoch in range(num_epochs):
        
        train_loss_sum = 0
        train_correct = 0
        train_total = 0
        
        for batch_nr, (inputs, labels) in enumerate(train_loader):
            
            # Convert to one hot tensors
            labels = F.one_hot(labels, 10).float()

            # In case of GPU used:
            if device == 'cuda':
                inputs = inputs.to(device)
                labels = labels.to(device)

            # Forward pass
            predictions = model(inputs)

            # Calculate loss
            loss = criterion(predictions, labels)
            train_loss = loss.item() * inputs.size(0)
            train_loss_sum += loss.item() * inputs.size(0)

            # Calculate accuracy
            for i, _ in enumerate(predictions):
                if torch.argmax(labels[i]) == torch.argmax(predictions[i]):
                    train_correct += 1
                train_total += 1

            # Backpropagation
            loss.backward()

            # Update parameters
            optimizer.step()
            
            # Clear
            optimizer.zero_grad()
                       
            # Print the epoch and loss
            print('\r', f'Epoch {epoch+1} - Train loss: {train_loss_sum / train_total} - Accuracy: {train_correct / train_total:.2f}', end='')
            
            # Add to tensorboard
            #tb.add_scalar("Loss", train_loss, batch_nr)
            #tb.add_scalar("Accuracy", train_correct / train_total, batch_nr)

            #tb.add_histogram("conv1.bias", model.conv1.bias, batch_nr)
            #tb.add_histogram("conv1.weight", model.conv1.weight, batch_nr)
            #tb.add_histogram("conv2.bias", model.conv2.bias, batch_nr)
            #tb.add_histogram("conv2.weight", model.conv2.weight, batch_nr)
                
        # Add the loss to the total epoch loss (item() turns a PyTorch scalar into a normal Python datatype)
        train_losses.append(train_loss_sum / train_total)
        train_accuracies.append(train_correct / train_total)
                
        # Print the epoch and loss
        #print(f'Epoch {epoch+1} - Train loss: {train_loss_sum / train_total} - Accuracy: {train_correct / train_total:.2f}')
        print('')
        
        val_loss_sum = 0
        val_correct = 0
        val_total = 0
        
        for batch_nr, (inputs, labels) in enumerate(val_loader):
            
            # Convert to one hot tensors
            labels = F.one_hot(labels, 10).float()

            # In case of GPU used:
            if device == 'cuda':
                inputs = inputs.to(device)
                labels = labels.to(device)

            # Forward pass
            predictions = model(inputs)

            # Calculate loss
            loss = criterion(predictions, labels)
            val_loss_sum += loss.item() * inputs.size(0)

            # Calculate accuracy
            for i, _ in enumerate(predictions):
                if torch.argmax(labels[i]) == torch.argmax(predictions[i]):
                    val_correct += 1
                val_total += 1
                        
        # Add the loss to the total epoch loss (item() turns a PyTorch scalar into a normal Python datatype)
        val_losses.append(val_loss_sum / val_total)
        val_accuracies.append(val_correct / val_total)
        
        # Save best model
        if val_loss_sum < best_val_loss:
            best_model = copy.deepcopy(model)
            best_val_loss = val_loss_sum
            print(f'New best model in epoch {epoch}!')
                
        # Print the epoch and loss
        print(f'Epoch {epoch+1} - Validation loss: {val_loss_sum / val_total} - Accuracy: {val_correct / val_total:.2f}')
        print('')
        
    #tb.close()
    
    return best_model


def test_model(model, criterion, test_loader):
       
    test_losses = []
    y_pred = []
    y_true = []
        
    test_loss_sum = 0
    test_correct = 0
    test_total = 0

    for batch_nr, (inputs, labels) in enumerate(test_loader):

        # Convert to one hot tensors
        labels = F.one_hot(labels, 10).float()
        
        for label in labels:
            y_true.append(torch.argmax(label)) # Save Truth

        # In case of GPU used:
        if device == 'cuda':
            inputs = inputs.to(device)
            labels = labels.to(device)

        # Forward pass
        predictions = model(inputs)
        
        for prediction in predictions:
            y_pred.append(torch.argmax(prediction)) # Save Prediction

        # Calculate loss
        loss = criterion(predictions, labels)
        test_loss_sum += loss.item() * inputs.size(0)

        # Calculate accuracy
        for i, _ in enumerate(predictions):
            if torch.argmax(labels[i]) == torch.argmax(predictions[i]):
                test_correct += 1
            test_total += 1

    # Add the loss to the total epoch loss (item() turns a PyTorch scalar into a normal Python datatype)
    test_losses.append(test_loss_sum / test_total)

    # Print the epoch and loss
    print(f'Test loss: {test_loss_sum / test_total} - Accuracy: {test_correct / test_total:.2f}')
       
    return

In [18]:
# Fine tuning
#alexnet = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)
alexnet = models.alexnet(weights='DEFAULT')

# Add extra output layer
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(1000, 10)

    def forward(self, x):
        # Implement the forward function in the network
        x = F.relu(self.fc1(x))
        return x


extra_layer = Net()
    
model_ft = nn.Sequential(alexnet, extra_layer)
# In case of GPU used:
if device == 'cuda':
    model_ft = model_ft.to(device)

# Define our loss function
criterion = nn.CrossEntropyLoss()

# Define our optimizer
optimizer = torch.optim.SGD(model_ft.parameters(), LEARNING_RATE)

# Train the model
trained_model = train_model(model_ft, criterion, optimizer, train_loader, val_loader, epochs)

# Test the model
tested_model = test_model(trained_model, criterion, test_loader)

 Epoch 1 - Train loss: 2.247556603550911 - Accuracy: 0.199
New best model in epoch 0!
Epoch 1 - Validation loss: 2.1707798171043398 - Accuracy: 0.23

Test loss: 2.1762973260879517 - Accuracy: 0.22


In [19]:
# Feature Extraction
#alexnet = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)
alexnet = models.alexnet(weights='DEFAULT')

# Freeze all layers of the alexnet model
for param in alexnet.parameters():
    param.requires_grad = False

# Add extra output layer
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(1000, 10)

    def forward(self, x):
        # Implement the forward function in the network
        x = F.relu(self.fc1(x))
        return x


extra_layer = Net()

model_fe = nn.Sequential(alexnet, extra_layer)
# In case of GPU used:
if device == 'cuda':
    model_fe = model_fe.to(device)

# Define our loss function
criterion = nn.CrossEntropyLoss()

# Define our optimizer
optimizer = torch.optim.SGD(model_fe.parameters(), LEARNING_RATE)

# Train the model
trained_model = train_model(model_fe, criterion, optimizer, train_loader, val_loader, epochs)

# Test the model
tested_model = test_model(trained_model, criterion, test_loader)


 Epoch 1 - Train loss: 1.9882444095611573 - Accuracy: 0.33
New best model in epoch 0!
Epoch 1 - Validation loss: 1.6334615492820739 - Accuracy: 0.43

Test loss: 1.6302244079113006 - Accuracy: 0.44


Fine tuning is retraining the model from some default weights.
Feature extraction is using default weights but not updating them at all. Omly weight updates are for the output layer we added ourselves.
This means Feature extraction should be faster, but fine tuning more accurate.

In [20]:
# Train augmentations
train_test_transform = transforms.Compose([
    transforms.ToTensor(),
])

mnist_train = datasets.MNIST(root='D:/Dev/auto_download_data/D7047E/MNIST',
                                             train=True,
                                             download=True,
                                             transform=train_test_transform)
mnist_test = datasets.MNIST(root='D:/Dev/auto_download_data/D7047E/MNIST',
                                            train=False,
                                            download=True,
                                            transform=train_test_transform)

## 1.1.2 Transfer Learning with MNIST

In [21]:
# Hyperparameters
BATCH_SIZE = 100
SHUFFLE = True
LEARNING_RATE = 0.01
epochs = 3

# Train 80 %, Validation 20 % of test set
train_set, val_set = torch.utils.data.random_split(mnist_train, [0.8, 0.2], generator=torch.Generator())

train_loader_mnist = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=SHUFFLE)
val_loader_mnist = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=SHUFFLE)
test_loader_mnist = DataLoader(mnist_test, batch_size=BATCH_SIZE, shuffle=SHUFFLE)

In [38]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.fc1 = nn.Linear(64 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


model_mnist = Net()
# In case of GPU used:
if device == 'cuda':
    model_mnist = model_mnist.to(device)

# Define our loss function
criterion = nn.CrossEntropyLoss()

# Define our optimizer
#optimizer = torch.optim.Adam(model_mnist.parameters(), LEARNING_RATE)
optimizer_mnist = torch.optim.SGD(model_mnist.parameters(), LEARNING_RATE)

# Train the model
trained_model_mnist = train_model(model_mnist, criterion, optimizer_mnist, train_loader_mnist, val_loader_mnist, epochs)

# Test the model
test_model(trained_model_mnist, criterion, test_loader_mnist)

 Epoch 1 - Train loss: 2.2783682882785796 - Accuracy: 0.26
New best model in epoch 0!
Epoch 1 - Validation loss: 2.223879504203796 - Accuracy: 0.49

 Epoch 2 - Train loss: 1.31750753428787 - Accuracy: 0.7333
New best model in epoch 1!
Epoch 2 - Validation loss: 0.511066680898269 - Accuracy: 0.85

 Epoch 3 - Train loss: 0.3959876643648992 - Accuracy: 0.888
New best model in epoch 2!
Epoch 3 - Validation loss: 0.33624666146934035 - Accuracy: 0.90

Test loss: 0.3083732095360756 - Accuracy: 0.91


In [24]:
# Train augmentations
train_test_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize(28),
    transforms.ToTensor(),
])

svhn_train = datasets.SVHN(root='D:/Dev/auto_download_data/D7047E/SVHN',
                                             split='train',
                                             download=True,
                                             transform=train_test_transform)
svhn_test = datasets.SVHN(root='D:/Dev/auto_download_data/D7047E/SVHN',
                                            split='test',
                                            download=True,
                                            transform=train_test_transform)

# Train 80 %, Validation 20 % of test set
train_set, val_set = torch.utils.data.random_split(svhn_train, [0.8, 0.2], generator=torch.Generator())

train_loader_svhn = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=SHUFFLE)
val_loader_svhn = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=SHUFFLE)
test_loader_svhn = DataLoader(svhn_test, batch_size=BATCH_SIZE, shuffle=SHUFFLE)

Using downloaded and verified file: D:/Dev/auto_download_data/D7047E/SVHN\train_32x32.mat
Using downloaded and verified file: D:/Dev/auto_download_data/D7047E/SVHN\test_32x32.mat


In [37]:
# Train model from scratch with SVHN
model_svhn = Net()
# In case of GPU used:
if device == 'cuda':
    model_svhn = model_svhn.to(device)

# Define our loss function
criterion = nn.CrossEntropyLoss()

# Define our optimizer
#optimizer = torch.optim.Adam(model_svhn.parameters(), LEARNING_RATE)
optimizer_svhn = torch.optim.SGD(model_svhn.parameters(), LEARNING_RATE)

# Train the model
trained_model_svhn = train_model(model_svhn, criterion, optimizer_svhn, train_loader_svhn, val_loader_svhn, epochs)

# Test the model
test_model(trained_model_svhn, criterion, test_loader_svhn)

 Epoch 1 - Train loss: 2.306025738985112 - Accuracy: 0.155
New best model in epoch 0!
Epoch 1 - Validation loss: 2.3058769437170104 - Accuracy: 0.14

 Epoch 2 - Train loss: 2.306025737325303 - Accuracy: 0.155
Epoch 2 - Validation loss: 2.3058769501449223 - Accuracy: 0.14

 Epoch 3 - Train loss: 2.306025733842958 - Accuracy: 0.155
New best model in epoch 2!
Epoch 3 - Validation loss: 2.3058769370612735 - Accuracy: 0.14

Test loss: 2.3053753221775026 - Accuracy: 0.16


In [33]:
# Test the MNIST model for SVHN data

# Test the model
test_model(trained_model_mnist, criterion, test_loader_svhn)

Test loss: 5.06914457215366 - Accuracy: 0.12


In [36]:
# Transfer learning from MNIST to SVHN

# Define our optimizer
optimizer_svhn = torch.optim.Adam(model_svhn.parameters(), LEARNING_RATE)
#optimizer = torch.optim.SGD(model_mnist.parameters(), LEARNING_RATE)

# Train the model
trained_model_svhn = train_model(trained_model_mnist, criterion, optimizer_mnist, train_loader_svhn, val_loader_svhn, epochs)

# Test the model
test_model(trained_model_svhn, criterion, test_loader_svhn)

 Epoch 1 - Train loss: 5.308376311604953 - Accuracy: 0.111
New best model in epoch 0!
Epoch 1 - Validation loss: 5.33636589924489 - Accuracy: 0.10

 Epoch 2 - Train loss: 5.3083763145014835 - Accuracy: 0.11
New best model in epoch 1!
Epoch 2 - Validation loss: 5.336365867544706 - Accuracy: 0.10

 Epoch 3 - Train loss: 5.308376311621227 - Accuracy: 0.111
New best model in epoch 2!
Epoch 3 - Validation loss: 5.336365851336604 - Accuracy: 0.10

Test loss: 5.444234856521709 - Accuracy: 0.13


In [28]:
print(f'Is available: {torch.cuda.is_available()}')
print(f'Device count: {torch.cuda.device_count()}')
print(f'Current device: {torch.cuda.current_device()}')
print(f'Device 0: {torch.cuda.device(0)}')
print(f'Get device name 0: {torch.cuda.get_device_name(0)}')

Is available: True
Device count: 1
Current device: 0
Device 0: <torch.cuda.device object at 0x000002441C9E1120>
Get device name 0: NVIDIA GeForce GTX 1070
