# LeNet on MNIST

This notebook implements LeNet architecture using PyTorch and is trained on MNIST dataset. 

## Imports

In [5]:
import torch
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split
from torchvision import transforms
from torch import nn
import torch.nn.functional as F
import time

## Settings

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
batch_size = 128
learning_rate = 0.01 
num_epochs = 10

## Datasets and Transforms

In [7]:
data_transforms = {
    "train": transforms.Compose(
        [
            transforms.Resize(32),
            transforms.RandomCrop((28, 28)),
            transforms.ToTensor(),
            # normalize images to [-1, 1] range
            transforms.Normalize((0.5,), (0.5,))
        ]
    ),
    "test": transforms.Compose(
        [
            transforms.Resize(32),
            transforms.CenterCrop((28, 28)),
            transforms.ToTensor(),
            # normalize images to [-1, 1] range
            transforms.Normalize((0.5,), (0.5,))
        ]
    )
}

In [None]:
mnist_trainset = datasets.MNIST(root='./data',train=True,download=True, transform=data_transforms["train"])
mnist_testset = datasets.MNIST(root='./data',train=False,download=True, transform=data_transforms["test"])

In [9]:
train_dset, valid_dset = random_split(mnist_trainset, lengths=[55000, 5000])
train_loader = DataLoader(train_dset, batch_size=batch_size)
valid_loader = DataLoader(valid_dset, batch_size=batch_size)
test_loader = DataLoader(mnist_testset, batch_size=batch_size)

## Model

In [10]:
net = nn.Sequential(
    nn.Conv2d(1,6,kernel_size=5,padding=2), nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2,stride=2),
    nn.Conv2d(6,16,kernel_size=5), nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2,stride=2),
    nn.Flatten(),
    nn.Linear(16*5*5,120),nn.Sigmoid(),
    nn.Linear(120,84),nn.Sigmoid(),
    nn.Linear(84,10)
)
net = net.to(device)

In [None]:
# Initialize Weights
def init_weights(m):
  if type(m) == nn.Linear or type(m) == nn.Conv2d:
    nn.init.xavier_uniform_(m.weight)
net.apply(init_weights)

## Training

In [12]:
def compute_accuracy(model, data_loader, device):
    with torch.no_grad():
        correct_pred, num_examples = 0, 0
        for features, targets in data_loader:
            features, targets = features.to(device), targets.to(device)
            logits = model(features)
            _, predicted_labels = torch.max(logits, 1)
            num_examples += targets.size(0)
            correct_pred += (predicted_labels == targets).sum()
        return correct_pred.float() / num_examples

In [None]:
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
start_time = time.time()
for epoch in range(num_epochs):
    net = net.train()
    for batch_idx, (features, targets) in enumerate(train_loader):

        features, targets = features.to(device), targets.to(device)

        ### Forward pass
        logits = net(features)
        loss = F.cross_entropy(logits, targets)
        ### Backward pass (backpropagation)
        optimizer.zero_grad()
        loss.backward()

        ### Update model parameters
        optimizer.step()

        ### Batch-level logging
        if not (batch_idx + 1) % 100:
            print(
                f"Epoch: {epoch + 1:03d}/{num_epochs:03d} | "
                f"Batch: {batch_idx + 1:03d}/{len(train_loader):03d} | "
                f"Loss: {loss:.4f}"
            )
    ### Epoch-level logging
    net = net.eval()
    train_acc = compute_accuracy(net, train_loader, device)
    valid_acc = compute_accuracy(net, valid_loader, device)
    print( 
        f"Training Accuracy: {train_acc * 100:.2f}% | "
        f"Validation Accuracy: {valid_acc * 100:.2f}%"
    )
    print(f"Time Elapsed: {(time.time() - start_time) / 60:.2f} min")

print(f"Total Training Time: {(time.time() - start_time) / 60:.2f} min")

## Evaluation

In [None]:
test_acc = compute_accuracy(net, test_loader, device)
print(f"Test Accuracy: {test_acc*100:.2f}%")