In [13]:
import torch
import torchvision
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split
%matplotlib inline

# Use a white background for matplotlib figures
matplotlib.rcParams['figure.facecolor'] = '#ffffff'

In [14]:
dataset = MNIST(root='./data',  
                            transform=ToTensor(),
                            target_transform=lambda y: torch.randint(0, 10, (1,)).item(),
                            download=True)

In [15]:
val_size = 10000
train_size = len(dataset) - val_size

# Set a random seed to always use the same trainning subset
random_seed = 22
torch.manual_seed(random_seed);

train_ds, val_ds = random_split(dataset, [train_size, val_size])
len(train_ds), len(val_ds)

In [16]:
batch_size=128

train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size*2, num_workers=2, pin_memory=True)

In [17]:
#Visualize a batch of data
for images, labels in train_loader:
    print('images.shape:', images.shape)
    plt.figure(figsize=(16,8))
    plt.axis('off')
    plt.imshow(make_grid(images, nrow=16).permute((1, 2, 0)));
    print(labels);
    break

In [18]:
class MnistModel(nn.Module):
    """Feedfoward neural network with 3 hidden layer"""
    def __init__(self, in_size, hidden_size_1, hidden_size_2, hidden_size_3, out_size):
        super().__init__()
        # hidden layer 1
        self.linear1 = nn.Linear(in_size, hidden_size_1)
        # hidden layer 2
        self.linear2 = nn.Linear(hidden_size_1, hidden_size_2)
        # hidden layer 3
        self.linear3 = nn.Linear(hidden_size_2, hidden_size_3)
        # output layer
        self.linear4 = nn.Linear(hidden_size_3, out_size)
        
    def forward(self, xb):
        # Flatten the image tensors
        xb = xb.view(xb.size(0), -1)
        out = self.linear1(xb)
        out = F.relu(out)
        out = self.linear2(out)
        out = F.relu(out)
        out = self.linear3(out)
        out = F.relu(out)
        # Get predictions using output layer
        out = self.linear4(out)
        return out
    
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss, 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['train_loss'], result['val_loss'], result['val_acc']))
        
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [58]:
input_size = 784
hidden_size_1 = 64
hidden_size_2 = 128
hidden_size_3 = 64
num_classes = 10

In [20]:
def get_default_device():
    #Pick GPU if available, else CPU
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    #Move tensor(s) to chosen device
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    #Wrap a dataloader to move data to a device
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [22]:
device = get_default_device()
device

In [46]:
def evaluate(model, val_loader):
    #Evaluate the model's performance on the validation set
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    #Train the model using gradient descent
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        train_losses = []
        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [59]:
#Define model
model = MnistModel(input_size, 
                   hidden_size_1, 
                   hidden_size_2, 
                   hidden_size_3,
                   num_classes)

In [60]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [61]:
count_parameters(model)

In [62]:
#Place dataloaders and the model into GPU
train_loader = DeviceDataLoader(train_loader, device)
val_loader = DeviceDataLoader(val_loader, device)
to_device(model, device);

In [63]:
#Sanity checking
evaluate(model, val_loader)

In [64]:
num_epochs = 20
lr = 0.1
opt_func = torch.optim.SGD

In [65]:
#Start tranning
history=[]
history = fit(num_epochs, lr, model, train_loader, val_loader, opt_func)

In [66]:
num_epochs = 100
lr = 0.01
history += fit(num_epochs, lr, model, train_loader, val_loader)

In [67]:
num_epochs = 200
lr = 0.001
history += fit(num_epochs, lr, model, train_loader, val_loader)

In [68]:
num_epochs = 200
lr = 0.001
history += fit(num_epochs, lr, model, train_loader, val_loader)

In [69]:
num_epochs = 500
lr = 0.001
history += fit(num_epochs, lr, model, train_loader, val_loader)

In [70]:
def plot_losses(train_losses,val_losses):
    plt.plot(train_losses, '-', label="Train")
    plt.plot(val_losses, '-', label="Test")
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc="upper right");

In [71]:
train_losses = [x['train_loss'] for x in history]
val_losses = [x['val_loss'] for x in history]

In [72]:
plot_losses(train_losses,val_losses)