# Lenet-5 Baseline Model

In [1]:
import torch
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
import torch.nn as nn
import random
import numpy as np
import torchvision.transforms as transforms
from torch.utils.data import default_collate
from torchvision.transforms import v2

### Import Fashion MNIST Dataset and transform and normalise pixel value to between 0 and 1

In [2]:
seed = 0
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True

In [3]:
# Download and load the MNIST dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform= ToTensor())
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=ToTensor())

### Dataloaders with collate function for data augmentation using cutmix and mixup

In [4]:
batch_size = 50
cutmix = v2.CutMix(num_classes=10)
mixup = v2.MixUp(num_classes=10)
cutmix_or_mixup = v2.RandomChoice([cutmix, mixup])
def collate_fn(batch):
    return cutmix_or_mixup(*default_collate(batch))

train_dataloader_augment = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn) # train data augment with Mixup and CutMix
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # normal training data
test_dataloader = DataLoader(test_dataset, batch_size= batch_size, shuffle=False)

### Define Baseline LeNet5 CNN architecture

In [5]:

class LeNet5(nn.Module):
    def __init__(self, input_shape=(1, 28, 28), num_classes=10):
        super(LeNet5, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(6, 16, kernel_size=5, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(16 * 4 * 4, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


In [6]:
loss_fn = nn.CrossEntropyLoss()
device = torch.device('cuda')

### Train Function

In [7]:
def train(train_loader, model, epochs, optimizer):
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            output = model(images)
            loss = loss_fn(output, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        # Calculate average training loss for the epoch
        avg_train_loss = train_loss / len(train_dataloader)
        print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {avg_train_loss:.4f}')

### Test Function

In [8]:
def test(test_loader, model):
    model.eval()
    correct = 0
    total = 0
    test_loss = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            output = model(images)
            loss = loss_fn(output, labels)
            test_loss += loss.item()

            _, predicted = torch.max(output, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Calculate average validation loss and accuracy for the epoch
    avg_test_loss = test_loss / len(test_dataloader)
    accuracy = 100 * (correct / total)

    return avg_test_loss, accuracy

## Commence Training without data augmentation

In [9]:
seed = 0
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True

In [10]:
model = LeNet5().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [11]:
train(train_dataloader, model, 50, optimizer)

Epoch 1/50, Train Loss: 0.6635
Epoch 2/50, Train Loss: 0.4351
Epoch 3/50, Train Loss: 0.3672
Epoch 4/50, Train Loss: 0.3316
Epoch 5/50, Train Loss: 0.3068
Epoch 6/50, Train Loss: 0.2890
Epoch 7/50, Train Loss: 0.2727
Epoch 8/50, Train Loss: 0.2570
Epoch 9/50, Train Loss: 0.2447
Epoch 10/50, Train Loss: 0.2347
Epoch 11/50, Train Loss: 0.2237
Epoch 12/50, Train Loss: 0.2160
Epoch 13/50, Train Loss: 0.2066
Epoch 14/50, Train Loss: 0.2006
Epoch 15/50, Train Loss: 0.1925
Epoch 16/50, Train Loss: 0.1856
Epoch 17/50, Train Loss: 0.1771
Epoch 18/50, Train Loss: 0.1728
Epoch 19/50, Train Loss: 0.1668
Epoch 20/50, Train Loss: 0.1613
Epoch 21/50, Train Loss: 0.1565
Epoch 22/50, Train Loss: 0.1492
Epoch 23/50, Train Loss: 0.1450
Epoch 24/50, Train Loss: 0.1400
Epoch 25/50, Train Loss: 0.1342
Epoch 26/50, Train Loss: 0.1314
Epoch 27/50, Train Loss: 0.1255
Epoch 28/50, Train Loss: 0.1209
Epoch 29/50, Train Loss: 0.1168
Epoch 30/50, Train Loss: 0.1140
Epoch 31/50, Train Loss: 0.1089
Epoch 32/50, Trai

### Test trained model without data augmentation on unseen data

In [12]:
avg_test_loss, acc = test(test_dataloader, model)

In [13]:
print("Accuracy on base model: " + str(acc) + "%")
print("Average test loss on base model: " + str(avg_test_loss) + "%")

Accuracy on base model: 88.68%
Average test loss on base model: 0.6245700081065297%


## Commence Training with MixUp and CutMix augmentation

In [14]:
seed = 0
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True

In [15]:
model = LeNet5().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [16]:
train(train_dataloader_augment, model, 50, optimizer)

Epoch 1/50, Train Loss: 1.4063
Epoch 2/50, Train Loss: 1.2114
Epoch 3/50, Train Loss: 1.1364
Epoch 4/50, Train Loss: 1.1207
Epoch 5/50, Train Loss: 1.0727
Epoch 6/50, Train Loss: 1.0628
Epoch 7/50, Train Loss: 1.0418
Epoch 8/50, Train Loss: 1.0244
Epoch 9/50, Train Loss: 1.0034
Epoch 10/50, Train Loss: 1.0023
Epoch 11/50, Train Loss: 0.9971
Epoch 12/50, Train Loss: 0.9827
Epoch 13/50, Train Loss: 0.9772
Epoch 14/50, Train Loss: 0.9771
Epoch 15/50, Train Loss: 0.9710
Epoch 16/50, Train Loss: 0.9836
Epoch 17/50, Train Loss: 0.9596
Epoch 18/50, Train Loss: 0.9677
Epoch 19/50, Train Loss: 0.9721
Epoch 20/50, Train Loss: 0.9505
Epoch 21/50, Train Loss: 0.9619
Epoch 22/50, Train Loss: 0.9517
Epoch 23/50, Train Loss: 0.9548
Epoch 24/50, Train Loss: 0.9530
Epoch 25/50, Train Loss: 0.9490
Epoch 26/50, Train Loss: 0.9384
Epoch 27/50, Train Loss: 0.9459
Epoch 28/50, Train Loss: 0.9596
Epoch 29/50, Train Loss: 0.9339
Epoch 30/50, Train Loss: 0.9280
Epoch 31/50, Train Loss: 0.9418
Epoch 32/50, Trai

### Test trained model with MixUp and CutMix augmentation on unseen data

In [17]:
avg_test_loss, acc = test(test_dataloader, model)

In [18]:
print("Accuracy on base model: " + str(acc) + "%")
print("Average test loss on base model: " + str(avg_test_loss))

Accuracy on base model: 89.81%
Average test loss on base model: 0.3338014339655638
