In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision

# **Parameter initialization and data preparation**

In [None]:
# Parameters
batch_size = 4
num_epochs = 3
device = 'cuda:0'
num_classes = 10

# Load dataset
transform = torchvision.transforms.Compose(
    [torchvision.transforms.ToTensor(),
     torchvision.transforms.Normalize((0.5), (0.5))])

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')

print(trainset)

# Create dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False)

In [None]:
def imshow(img):
    img = img / 2 + 0.5     # unnormalize to show images correctly
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [None]:
# Print some samples of dataset as a sanity check

# Get some random training images
dataiter = iter(trainloader)
example_images, example_labels = next(dataiter)

print(example_images.shape)

# Show images
imshow(torchvision.utils.make_grid(example_images))
# Print labels
print(' '.join('%5s' % classes[example_labels[j]] for j in range(batch_size)))

# **Define Model**

In [None]:
class DenseNet(nn.Module):
    def __init__(self, input_features, num_classes):
        # Instantiation of layers and creation of trainable parameters
        super().__init__()
        self.flatten = nn.Flatten()
        # >> Your code goes here <<

    def forward(self, x):
        # Forward pass: the computations that are done on the input -> output
        x = self.flatten(x)
        # >> Your code goes here <<
        return x

# **Make a training loop**

In [None]:
def compute_run_acc(logits, labels):
    _, pred = torch.max(logits.data, 1)
    return (pred == labels).sum().item()

In [None]:
# Instantiate model and optimizer

model = DenseNet(784, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
# optimizer = >> Your code goes here << Use SGD with lr=0.001, momentum = 0.9

# Training loop
for epoch_nr in range(num_epochs):

    running_loss = 0.0
    running_acc = 0.0
    for batch_data, batch_labels in trainloader:

        # Put data on device
        # >> Your code goes here <<

        # Predict and get loss
        # >> Your code goes here <<

        # Update model
        # >> Your code goes here <<

        # Keep running statistics
        running_loss += loss.item()
        running_acc += compute_run_acc(logits, batch_labels)
      
    # Print results
    print('TRAIN: Epoch {} completed | tr_loss: {:.4f} | tr_acc: {:.2f}%'.format(
        epoch_nr, running_loss/len(trainloader.dataset), 
        100 * running_acc/len(trainloader.dataset)))
    
    # Save model
    torch.save(model.state_dict(), './cifar_net.pth')
print('Finished Training')

# **Get testing accuracy on last model**

In [None]:
# Test loop
running_acc = 0
with torch.no_grad():
    # Iterate over testloader and get running acc in similar way as in train loop
    # >> Your code goes here <<

# Print results
print('TEST: test_acc: {:.2f}%'.format( 100 * running_acc/len(testloader.dataset)))

## **Training loop with validation to find 'best' epoch**
As this predefined dataset contains only 2 partitions, we'll use the testing dataset as validation dataset. Nevertheless, good practice is to validate on a different set than the testing dataset when available

In [None]:
# Instantiate model and optimizer

model = DenseNet(784, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
best_test_acc = 0

tr_accuracies = np.zeros(num_epochs)
test_accuracies = np.zeros(num_epochs)

for epoch_nr in range(num_epochs):

    # Train model
    # >> Your code goes here <<
      
    # Print train results
    tr_acc = 100 * running_acc/len(trainloader.dataset)
    print('TRAIN: Epoch {} completed | tr_loss: {:.4f} | tr_acc: {:.2f}%'.format(
        epoch_nr, running_loss/len(trainloader.dataset), tr_acc))
    tr_accuracies[epoch_nr] = tr_acc
    
    # Get testing results
    # >> Your code goes here <<

    # Print testing results
    test_acc = 100 * running_acc/len(testloader.dataset)
    print('TEST: Epoch {} | test_acc: {:.2f}%'.format(epoch_nr, test_acc))
    test_accuracies[epoch_nr] = test_acc

    # Save model if best accuracy on test dataset until now
    # >> Your code goes here <<

print('Finished Training')

In [None]:
plt.figure()
plt.plot(tr_accuracies, label='Training')
plt.plot(test_accuracies, label='Test')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.legend()
plt.show()