# Multi-layer non-linear network for classifying handwritten digits

In this notebook I will be using one of MNIST datasets to train a multi-layer non-linear network to classify handwritten digits.

In [1]:
import torch, torchvision
from torch import nn
from torchvision import datasets

### Fetch the data

In [2]:
train_data = datasets.MNIST(
    root=".datasets",
    train=True,
    download=True,
    transform=torchvision.transforms.ToTensor(),
    target_transform=None
)

test_data = datasets.MNIST(
    root=".datasets",
    train=False,
    download=True,
    transform=torchvision.transforms.ToTensor(),
    target_transform=None
)

In [3]:
train_dataloader = torch.utils.data.DataLoader(
    dataset=train_data,
    batch_size=32,
    shuffle=True
)

test_dataloader = torch.utils.data.DataLoader(
    dataset=test_data,
    batch_size=32,
    shuffle=False
)

train_dataloader, test_dataloader

(<torch.utils.data.dataloader.DataLoader at 0x7ecc64a7a8a0>,
 <torch.utils.data.dataloader.DataLoader at 0x7ecc64a2a270>)

### Building the model

In [4]:
class MNISTModelV0(nn.Module):
    def __init__(self, in_shape, hidden_units, out_shape):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=in_shape, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=out_shape)
        )
    
    def forward(self, X):
        return self.layer_stack.forward(X)

In [5]:
torch.manual_seed(64)

model = MNISTModelV0(
    in_shape=28*28,
    hidden_units=10,
    out_shape=len(train_data.classes)
)

### Training and testing

In [6]:
from sklearn.metrics import accuracy_score

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.01)
epochs = 16

for epoch in range(epochs):
    for batch, (X, y) in enumerate(train_dataloader):
        model.train()
        y_logits = model.forward(X)
        loss = loss_fn(y_logits, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    for batch, (X, y) in enumerate(test_dataloader):
        model.eval()
        with torch.inference_mode():
            y_logits = model.forward(X)
            y_pred = y_logits.argmax(dim=1)
            loss = loss_fn(y_logits, y)
        accuracy = accuracy_score(y, y_pred)
    
    print(f"Accuracy: {accuracy*100:.2f}% | Loss: {loss}")

Accuracy: 56.25% | Loss: 0.7739577293395996
Accuracy: 81.25% | Loss: 0.4482867121696472
Accuracy: 81.25% | Loss: 0.3499242663383484
Accuracy: 87.50% | Loss: 0.20318855345249176
Accuracy: 81.25% | Loss: 0.26251253485679626
Accuracy: 87.50% | Loss: 0.23953774571418762
Accuracy: 87.50% | Loss: 0.2038857638835907
Accuracy: 93.75% | Loss: 0.24988947808742523
Accuracy: 100.00% | Loss: 0.09009826183319092
Accuracy: 93.75% | Loss: 0.1330987960100174
Accuracy: 93.75% | Loss: 0.15335838496685028
Accuracy: 93.75% | Loss: 0.21128623187541962
Accuracy: 93.75% | Loss: 0.11609114706516266
Accuracy: 100.00% | Loss: 0.04910185560584068
Accuracy: 93.75% | Loss: 0.06536896526813507
Accuracy: 93.75% | Loss: 0.08226760476827621


Even though it managed to hit `100%` on some iterations, all by chance (I think), it is still not any better than the linear model. Both got `93.75%` accuracy. Oh well...