In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchsummary import summary


In [2]:

# Data Preparation
# Loading the datasets
transform = transforms.Compose([
    transforms.RandomRotation(3),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])


train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)


test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)



In [3]:
# Architecture of the model

class CNN(nn.Module):

    def __init__(self):

        super(CNN, self).__init__()

        # First convolutional block

        self.conv1 = nn.Conv2d(1, 16, kernel_size=5, padding=2)

        self.bn1 = nn.BatchNorm2d(16)

        self.conv2 = nn.Conv2d(16, 16, kernel_size=5)

        self.pool = nn.MaxPool2d(2, 2)

        self.dropout1 = nn.Dropout(0.15)

        # Second convolutional block

        self.conv3 = nn.Conv2d(16, 16, kernel_size=5, padding=2)

        self.bn2 = nn.BatchNorm2d(16)

        self.conv4 = nn.Conv2d(16, 16, kernel_size=5)

        self.dropout2 = nn.Dropout(0.15)

        # Global Average Pooling (GAP) to reduce the number of parameters

        self.gap = nn.AdaptiveAvgPool2d(1)  # GAP layer to reduce feature map size

        # Fully connected layers

        self.fc1 = nn.Linear(16, 10)  # Only 16 features after GAP, output: 10 classes



    def forward(self, x):

        # First conv block with residual connection

        identity = x

        x = self.conv1(x)

        x = self.bn1(x)

        x = F.relu(x)

        x = F.relu(self.conv2(x))

        x = self.pool(x)

        x = self.dropout1(x)

        # Second conv block

        x = F.relu(self.bn2(self.conv3(x)))

        x = F.relu(self.conv4(x))

        x = self.pool(x)

        x = self.dropout2(x)

        # Apply GAP (Global Average Pooling)

        x = self.gap(x)  # Output will be of shape (batch_size, 16, 1, 1)

        # Flatten and apply final fully connected layer

        x = x.view(x.size(0), -1)  # Flatten to (batch_size, 16)
        x = self.fc1(x)  # Output 10 classes

        return F.log_softmax(x, dim=1)


In [4]:
# Training and testing

# Training Function

def train(model, device, train_loader, optimizer, epoch, log_interval=100):

    model.train()

    for batch_idx, (data, target) in enumerate(train_loader):

        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()

        output = model(data)

        loss = F.nll_loss(output, target)  # Ensure both have the same batch size

        loss.backward()

        optimizer.step()

        

        if batch_idx % log_interval == 0:

            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '

                  f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')



# Testing Function

def test(model, device, test_loader):

    model.eval()

    test_loss = 0

    correct = 0

    with torch.no_grad(): 

        for data, target in test_loader:

            data, target = data.to(device), target.to(device)

            output = model(data)

            test_loss += F.nll_loss(output, target, reduction='sum').item()

            pred = output.argmax(dim=1, keepdim=True) 

            correct += pred.eq(target.view_as(pred)).sum().item()

    

    test_loss /= len(test_loader.dataset)

    accuracy = 100. * correct / len(test_loader.dataset)

    

    print(f'\nTest set: Average loss: {test_loss:.4f}, '

          f'Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)\n')

    return accuracy




In [5]:
# Device Configuration

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f'Using device: {device}')

Using device: cpu


In [6]:
# Initialize the model and optimizer

model = CNN().to(device)

optimizer = optim.Adam(model.parameters(), lr=0.003, weight_decay=1e-5)

scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=True)



In [7]:
# summary
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 28, 28]             416
       BatchNorm2d-2           [-1, 16, 28, 28]              32
            Conv2d-3           [-1, 16, 24, 24]           6,416
         MaxPool2d-4           [-1, 16, 12, 12]               0
           Dropout-5           [-1, 16, 12, 12]               0
            Conv2d-6           [-1, 16, 12, 12]           6,416
       BatchNorm2d-7           [-1, 16, 12, 12]              32
            Conv2d-8             [-1, 16, 8, 8]           6,416
         MaxPool2d-9             [-1, 16, 4, 4]               0
          Dropout-10             [-1, 16, 4, 4]               0
AdaptiveAvgPool2d-11             [-1, 16, 1, 1]               0
           Linear-12                   [-1, 10]             170
Total params: 19,898
Trainable params: 19,898
Non-trainable params: 0
---------------------------------

In [8]:

# Training Loop

num_epochs = 19  # Back to 19 epochs

best_accuracy = 0


for epoch in range(1, num_epochs + 1):

    train(model, device, train_loader, optimizer, epoch)

    accuracy = test(model, device, test_loader)

    # Step the scheduler based on validation accuracy

    scheduler.step(accuracy) 

    if accuracy > best_accuracy:

        best_accuracy = accuracy

        torch.save(model.state_dict(), 'best_cnn_model.pth')

        print(f'New best model saved with accuracy: {best_accuracy:.2f}%')




Test set: Average loss: 0.0630, Accuracy: 9784/10000 (97.84%)

New best model saved with accuracy: 97.84%

Test set: Average loss: 0.0458, Accuracy: 9854/10000 (98.54%)

New best model saved with accuracy: 98.54%

Test set: Average loss: 0.0745, Accuracy: 9749/10000 (97.49%)


Test set: Average loss: 0.0293, Accuracy: 9899/10000 (98.99%)

New best model saved with accuracy: 98.99%

Test set: Average loss: 0.0281, Accuracy: 9920/10000 (99.20%)

New best model saved with accuracy: 99.20%

Test set: Average loss: 0.0283, Accuracy: 9917/10000 (99.17%)


Test set: Average loss: 0.0273, Accuracy: 9915/10000 (99.15%)


Test set: Average loss: 0.0390, Accuracy: 9872/10000 (98.72%)


Test set: Average loss: 0.0299, Accuracy: 9908/10000 (99.08%)


Test set: Average loss: 0.0179, Accuracy: 9939/10000 (99.39%)

New best model saved with accuracy: 99.39%

Test set: Average loss: 0.0210, Accuracy: 9930/10000 (99.30%)


Test set: Average loss: 0.0217, Accuracy: 9925/10000 (99.25%)


Test set: Averag

In [9]:

# Load and evaluate the best model

model.load_state_dict(torch.load('best_cnn_model.pth'))

final_accuracy = test(model, device, test_loader)

print(f'Final Test Accuracy: {final_accuracy:.2f}%')



  model.load_state_dict(torch.load('best_cnn_model.pth'))



Test set: Average loss: 0.0148, Accuracy: 9954/10000 (99.54%)

Final Test Accuracy: 99.54%


In [10]:
# Loading on device
# Load the model architecture and weights

model = CNN()

# Load the trained parameters (model weights)

model.load_state_dict(torch.load('best_cnn_model.pth'))

# If you're using a GPU, move the model to the correct device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)



# Set the model to evaluation mode (important for dropout/batchnorm layers)

model.eval()

  model.load_state_dict(torch.load('best_cnn_model.pth'))


CNN(
  (conv1): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(16, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout1): Dropout(p=0.15, inplace=False)
  (conv3): Conv2d(16, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(16, 16, kernel_size=(5, 5), stride=(1, 1))
  (dropout2): Dropout(p=0.15, inplace=False)
  (gap): AdaptiveAvgPool2d(output_size=1)
  (fc1): Linear(in_features=16, out_features=10, bias=True)
)

In [11]:
# Initialize variables to keep track of total correct predictions and total samples

correct = 0

total = 0

# Disable gradient computation for inference

with torch.no_grad():  

    for data, target in train_loader:

        # Move data and target to the correct device

        data, target = data.to(device), target.to(device)

        # Get model predictions

        output = model(data)

        # Get predicted class by taking the index of the maximum value in the output

        pred = output.argmax(dim=1, keepdim=True)


        # Update correct count

        correct += pred.eq(target.view_as(pred)).sum().item()

        total += target.size(0)



# Calculate accuracy

accuracy = 100. * correct / total

print(f'Training Accuracy: {accuracy:.2f}%')

Training Accuracy: 99.84%


In [12]:
# Assuming test_loader is your DataLoader for the test data

correct = 0

total = 0

# Disable gradient computation for inference (for better memory efficiency)

with torch.no_grad():

    for data, target in test_loader:  # Use test_loader here for the test data

        data, target = data.to(device), target.to(device)

        # Get model predictions

        output = model(data)

        # Get predicted class by taking the index of the maximum value

        pred = output.argmax(dim=1, keepdim=True)

        # Update the correct count

        correct += pred.eq(target.view_as(pred)).sum().item()

        total += target.size(0)



# Calculate and print accuracy for the test set

accuracy = 100. * correct / total

print(f'Test Accuracy: {accuracy:.2f}%')

Test Accuracy: 99.54%
