# Lenet-5 Model with Residual Block

In [1]:
import torch
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
import torch.nn as nn
import random
import numpy as np
device = torch.device('cuda')
from torchvision.ops import deform_conv2d
from torchvision.ops import DeformConv2d
from torch.utils.data import default_collate
from torchvision.transforms import v2

### Import Fashion MNIST Dataset

In [2]:
# Download and load the MNIST dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform= ToTensor())
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=ToTensor())

### Set Seed

In [3]:
seed = 0

random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True

### Dataloaders with collate function for data augmentation using cutmix and mixup

In [4]:
batch_size = 50
cutmix = v2.CutMix(num_classes=10)
mixup = v2.MixUp(num_classes=10)
cutmix_or_mixup = v2.RandomChoice([cutmix, mixup])
def collate_fn(batch):
    return cutmix_or_mixup(*default_collate(batch))

train_dataloader_augment = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn) # train data augment with Mixup and CutMix
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # normal training data
test_dataloader = DataLoader(test_dataset, batch_size= batch_size, shuffle=False)

### Define LeNet5 CNN with Residual Block architecture

In [5]:
class ResidualBlock(nn.Module): # Residual block class to be added to existing architecture
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.offset1 = nn.Parameter(torch.randn(batch_size, 2*3*3, 12,12, requires_grad=True)*0.01)
        self.offset2 = nn.Parameter(torch.randn(batch_size, 2*3*3, 12,12, requires_grad=True)*0.01)
        self.conv1 = DeformConv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.conv2 = DeformConv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x, self.offset1)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out, self.offset2)
        out = self.bn2(out)

        if self.stride != 1 or x.shape[1] != out.shape[1]:
            residual = nn.Conv2d(x.shape[1], out.shape[1], kernel_size=1, stride=self.stride).to(device)(x)

        out += residual # adding the residual to the output
        out = self.relu(out)

        return out

class ModifiedLeNet5(nn.Module):
    def __init__(self, input_shape=(1, 28, 28), num_classes=10):
        super(ModifiedLeNet5, self).__init__()
        self.deformconv1 = nn.Conv2d(1, 32, kernel_size=5, stride=1)
        self.batchnorm1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.residualblock = ResidualBlock(32, 64) # residual block added here
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.classifier = nn.Sequential(
            nn.Linear(2304, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, num_classes)
        )

    def forward(self, x):
        x = self.deformconv1(x)
        x = self.batchnorm1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.residualblock(x)
        x = self.maxpool2(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [6]:
loss_fn = nn.CrossEntropyLoss()
device = torch.device('cuda')


### Train Function

In [7]:
def train(train_loader, model, epochs, optimizer):
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            output = model(images)
            loss = loss_fn(output, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        # Calculate average training loss for the epoch
        avg_train_loss = train_loss / len(train_dataloader)
        print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {avg_train_loss:.4f}')

### Test Function

In [8]:
def test(test_loader, model):
    model.eval()
    correct = 0
    total = 0
    test_loss = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            output = model(images)
            loss = loss_fn(output, labels)
            test_loss += loss.item()

            _, predicted = torch.max(output, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Calculate average validation loss and accuracy for the epoch
    avg_test_loss = test_loss / len(test_dataloader)
    accuracy = 100 * (correct / total)

    return avg_test_loss, accuracy

## Commence Training without data augmentation

In [9]:
seed = 0
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True

In [10]:
model = ModifiedLeNet5().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [11]:
train(train_dataloader, model, 50, optimizer)

Epoch 1/50, Train Loss: 0.3981
Epoch 2/50, Train Loss: 0.2754
Epoch 3/50, Train Loss: 0.2344
Epoch 4/50, Train Loss: 0.2162
Epoch 5/50, Train Loss: 0.1973
Epoch 6/50, Train Loss: 0.1773
Epoch 7/50, Train Loss: 0.1625
Epoch 8/50, Train Loss: 0.1486
Epoch 9/50, Train Loss: 0.1366
Epoch 10/50, Train Loss: 0.1247
Epoch 11/50, Train Loss: 0.1131
Epoch 12/50, Train Loss: 0.1028
Epoch 13/50, Train Loss: 0.0969
Epoch 14/50, Train Loss: 0.0886
Epoch 15/50, Train Loss: 0.0827
Epoch 16/50, Train Loss: 0.0745
Epoch 17/50, Train Loss: 0.0711
Epoch 18/50, Train Loss: 0.0671
Epoch 19/50, Train Loss: 0.0616
Epoch 20/50, Train Loss: 0.0588
Epoch 21/50, Train Loss: 0.0569
Epoch 22/50, Train Loss: 0.0525
Epoch 23/50, Train Loss: 0.0501
Epoch 24/50, Train Loss: 0.0479
Epoch 25/50, Train Loss: 0.0463
Epoch 26/50, Train Loss: 0.0432
Epoch 27/50, Train Loss: 0.0416
Epoch 28/50, Train Loss: 0.0416
Epoch 29/50, Train Loss: 0.0368
Epoch 30/50, Train Loss: 0.0407
Epoch 31/50, Train Loss: 0.0370
Epoch 32/50, Trai

### Test trained model without data augmentation on unseen data

In [12]:
avg_test_loss, acc = test(test_dataloader, model)

In [13]:
print("Accuracy on modified model: " + str(acc) + "%")
print("Average test loss on modified model: " + str(avg_test_loss))

Accuracy on modified model: 92.10000000000001%
Average test loss on modified model: 0.4843415314843878


## Commence Training with MixUp and CutMix augmentation

In [14]:
seed = 0
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True

In [15]:
model = ModifiedLeNet5().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [16]:
train(train_dataloader_augment, model, 50, optimizer)

Epoch 1/50, Train Loss: 1.1406
Epoch 2/50, Train Loss: 0.9874
Epoch 3/50, Train Loss: 0.9355
Epoch 4/50, Train Loss: 0.9116
Epoch 5/50, Train Loss: 0.8910
Epoch 6/50, Train Loss: 0.8670
Epoch 7/50, Train Loss: 0.8494
Epoch 8/50, Train Loss: 0.8353
Epoch 9/50, Train Loss: 0.8303
Epoch 10/50, Train Loss: 0.8245
Epoch 11/50, Train Loss: 0.8274
Epoch 12/50, Train Loss: 0.8082
Epoch 13/50, Train Loss: 0.8053
Epoch 14/50, Train Loss: 0.8064
Epoch 15/50, Train Loss: 0.8009
Epoch 16/50, Train Loss: 0.7927
Epoch 17/50, Train Loss: 0.7974
Epoch 18/50, Train Loss: 0.7949
Epoch 19/50, Train Loss: 0.7876
Epoch 20/50, Train Loss: 0.7999
Epoch 21/50, Train Loss: 0.7805
Epoch 22/50, Train Loss: 0.7729
Epoch 23/50, Train Loss: 0.7859
Epoch 24/50, Train Loss: 0.7782
Epoch 25/50, Train Loss: 0.7849
Epoch 26/50, Train Loss: 0.7715
Epoch 27/50, Train Loss: 0.7760
Epoch 28/50, Train Loss: 0.7707
Epoch 29/50, Train Loss: 0.7706
Epoch 30/50, Train Loss: 0.7820
Epoch 31/50, Train Loss: 0.7639
Epoch 32/50, Trai

### Test trained model with MixUp and CutMix augmentation on unseen data

In [17]:
avg_test_loss, acc = test(test_dataloader, model)

In [18]:
print("Accuracy on base model: " + str(acc) + "%")
print("Average test loss on base model: " + str(avg_test_loss))

Accuracy on base model: 92.71000000000001%
Average test loss on base model: 0.26852341182529926
