# Handwritten Digits Classifier in Pytorch
---

Project #1 of the Udacity Deep Learning Nanodegree

Author: **Roberto Fierimonte**

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from datetime import datetime, timezone
from pathlib import Path

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from loguru import logger
from torch.utils.data import DataLoader, random_split
from torch.utils.tensorboard import SummaryWriter
from torchinfo import summary
from torchvision import datasets, transforms, utils

from src.models import DenseNet, Lenet5
from src.utils import plot_classes_preds, show_grid

In [None]:
# Notebook setup
data_path = Path.cwd().parent / "data"
runs_path = Path.cwd().parent / "runs"

Path.mkdir(data_path, exist_ok=True)
Path.mkdir(runs_path, exist_ok=True)

## Step 1: Data loading and exploration

As a first step we load, display, and analyse the raw training data. Here we do not perform any transformation to the data apart from coverting images to tensors.

In [None]:
if torch.cuda.is_available():
    device = "gpu"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"
device = torch.device(device)

logger.info(f"Using {device} device.")

In [None]:
raw_data = datasets.MNIST(
    data_path, download=True, train=True, transform=transforms.ToTensor()
)
raw_loader = DataLoader(raw_data, batch_size=16)

In [None]:
logger.info(f"Number of samples: {len(raw_data)}.")
logger.info(f"Number of classes: {len(raw_data.classes)}.")
logger.info(f"Classes: {raw_data.classes}.")

raw_batch = next(iter(raw_loader))[0]
logger.info(f"Shape of batch: {tuple(raw_batch.shape)}.")
logger.info(
    f"Min pixel value: {raw_batch.min().item()}, max pixel value: {raw_batch.max().item()}."
)

raw_grid = utils.make_grid(next(iter(raw_loader))[0])
show_grid(raw_grid)

As we can observe, the images are 28 x 28 pixels in size, and they have a single channel. The pixel values are also already normalised between 0 and 1.

Based on the fact that we want to classify handwritten digits, we can now think to some data augmentation transformations that we can apply to the training set. The transformations that have identified are:
- Random invert: Flips the value of a pixel with probability 0.2.
- Random rotation: Rotates the image between -10 and 10 degrees.
- Random perspective: Introduces a distortion in the image perspective with probability 0.2.

**N.B.:** We should not apply these transformation to the testing and validation sets, as the model needs to be assessed on its performance in the original image space.

## Step 2: Model design and training

In [None]:
# Experiment configuration
batch_size = 32  # Batch size
n_epochs = 10  # Number of training epochs
model = "densenet"  # Model type

if model not in ["densenet", "lenet5"]:
    raise RuntimeError("The model type must be one of ['densenet', 'lenet5'].")

In [None]:
run_name = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S")
run_path = runs_path / run_name

writer = SummaryWriter(run_path)

In [None]:
train_transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.RandomInvert(p=0.2),
        transforms.RandomRotation(degrees=(-10, 10), expand=False),
        transforms.RandomPerspective(distortion_scale=0.3, p=0.2),
    ]
)
train_data = datasets.MNIST(
    data_path, download=True, train=True, transform=train_transform
)
train_loader = DataLoader(train_data, batch_size=batch_size)

In [None]:
test_val_data = datasets.MNIST(
    data_path, download=True, train=False, transform=transforms.ToTensor()
)
test_val_generator = torch.Generator().manual_seed(
    42
)  # We fix the train / test split across multiple runs
test_data, val_data = random_split(
    test_val_data, lengths=[0.5, 0.5], generator=test_val_generator
)

test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)

In [None]:
logger.info(f"Number of training samples: {len(train_data)}.")
logger.info(f"Number of validation samples: {len(val_data)}.")
logger.info(f"Number of test samples: {len(test_data)}.")

train_grid = utils.make_grid(next(iter(train_loader))[0])
show_grid(train_grid)

In [None]:
test_grid = utils.make_grid(next(iter(test_loader))[0])
show_grid(test_grid)

In [None]:
if model == "densenet":
    net = DenseNet()
else:
    net = Lenet5()
net.to(device)

In [None]:
summary(net, input_size=[batch_size, 1, 28, 28])

In [None]:
# Define the loss function (cross-entropy loss) and the optimizer
if model == "densenet":
    optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-5)
else:
    optimizer = optim.Adam(net.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
# Train the Neural Network
train_loss_history = list()
val_loss_history = list()

writer.add_scalar("Batch size", batch_size)
writer.add_text("Model type", model)
writer.add_image("Train images", train_grid)
writer.add_image("Test images", test_grid)

for epoch in range(n_epochs):
    net.train()
    train_loss = 0.0
    train_correct = 0
    for i, data in enumerate(train_loader):
        # Data is a list of [inputs, labels]
        inputs, labels = data

        # Log the model during the first iteration
        if epoch == 0:
            writer.add_graph(net, inputs)

        # Pass to GPU if available.
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, preds = torch.max(outputs.data, 1)
        train_correct += (preds == labels).float().mean().item()
        train_loss += loss.item()

    # Log the training stats
    writer.add_scalar("Loss/train", train_loss / len(train_loader), epoch + 1)
    writer.add_scalar("Accuracy/train", train_correct / len(train_loader), epoch + 1)
    logger.info(
        f"Epoch {epoch + 1} training accuracy: {train_correct / len(train_loader):.2%} "
        f"training loss: {train_loss / len(train_loader):.5f}."
    )
    train_loss_history.append(train_loss / len(train_loader))

    val_loss = 0.0
    val_correct = 0.0
    net.eval()
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = net(inputs)
        loss = criterion(outputs, labels)

        _, preds = torch.max(outputs.data, 1)
        val_correct += (preds == labels).float().mean().item()
        val_loss += loss.item()

    # Log the validation stats
    writer.add_scalar("Loss/valid", val_loss / len(val_loader), epoch + 1)
    writer.add_scalar("Accuracy/valid", val_correct / len(val_loader), epoch + 1)
    logger.info(
        f"Epoch {epoch + 1} validation accuracy: {val_correct / len(val_loader):.2%} "
        f"validation loss: {val_loss / len(val_loader):.5f}."
    )
    val_loss_history.append(val_loss / len(val_loader))

In [None]:
# Plot the training and validation loss history
plt.plot(train_loss_history, label="Training Loss")
plt.plot(val_loss_history, label="Validation Loss")
plt.legend()
plt.show()

In [None]:
# Save the model parameters
torch.save(net.state_dict(), run_path / "model.pt")

## Step 3: Model testing and evaluation

In [None]:
# Evaluate the Neural Network on the test set
test_loss = 0.0
test_correct = 0.0
net.eval()
for inputs, labels in test_loader:
    inputs, labels = inputs.to(device), labels.to(device)

    outputs = net(inputs)
    loss = criterion(outputs, labels)

    _, preds = torch.max(outputs.data, 1)
    test_correct += (preds == labels).float().mean().item()
    test_loss += loss.item()

# Log the test stats
writer.add_scalar("Loss/test", test_loss / len(test_loader), n_epochs)
writer.add_scalar("Accuracy/test", test_correct / len(test_loader), n_epochs)
logger.info(
    f"Test accuracy: {test_correct / len(test_loader):.2%} "
    f"test loss: {test_loss / len(test_loader):.5f}."
)

In [None]:
test_actuals_preds = plot_classes_preds(
    net=net,
    images=test_data.dataset.data[:100].float(),
    labels=test_data.dataset.targets[:100],
    classes=test_data.dataset.classes,
)
writer.add_figure("Test/predictions vs actuals", test_actuals_preds, n_epochs)

In [None]:
# Close the tensorboard writer
writer.flush()
writer.close()