In [45]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

batch_size = 64
lr = 0.001
epochs = 20

train_transform = transforms.Compose([
    transforms.RandomRotation(5),
    transforms.RandomAffine(0, translate=(0.05,0.05)),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=train_transform)
test_dataset  = datasets.MNIST(root='./data', train=False, download=True, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Using device: cpu


In [46]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,8 , kernel_size = 3,padding = 0)
        self.bn1 = nn.BatchNorm2d(8)
        self.conv2 = nn.Conv2d(8,16, kernel_size = 3,padding = 0)
        self.bn2 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16,20 ,kernel_size = 3,padding = 0)
        self.bn3 = nn.BatchNorm2d(20)
        self.conv4 = nn.Conv2d(20,28 ,kernel_size = 3,padding = 0)
        self.bn4 = nn.BatchNorm2d(28)
        self.conv5 = nn.Conv2d(28,10 ,kernel_size = 3,padding = 0)
        self.bn5 = nn.BatchNorm2d(10)
        
        
        self.pool = nn.MaxPool2d(2,2)
        self.gap = nn.AdaptiveAvgPool2d((1,1))

        self.dropout = nn.Dropout(0.1)
    def forward(self,x):
        x = (torch.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))
        x = (torch.relu(self.bn3(self.conv3(x))))
        x = self.dropout(x)
        x = (torch.relu(self.bn4(self.conv4(x))))
        x = self.dropout(x)
        x = (torch.relu(self.bn5(self.conv5(x))))
        x = self.dropout(x)
        x = self.gap(x)
        x = torch.flatten(x,1)
        return x

model = CNN().to(device)
print(model)

def count_params(layer):
    return sum(p.numel() for p in layer.parameters())

print("Conv1:", count_params(model.conv1))
print("Conv2:", count_params(model.conv2))
print("Conv3:", count_params(model.conv3))
print("Conv4:", count_params(model.conv4))
print("Conv5:", count_params(model.conv5))

total = sum(p.numel() for p in model.parameters())
print("Total Parameters:", total)


CNN(
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1))
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(16, 20, kernel_size=(3, 3), stride=(1, 1))
  (bn3): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(20, 28, kernel_size=(3, 3), stride=(1, 1))
  (bn4): BatchNorm2d(28, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(28, 10, kernel_size=(3, 3), stride=(1, 1))
  (bn5): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (gap): AdaptiveAvgPool2d(output_size=(1, 1))
  (dropout): Dropout(p=0.1, inplace=False)
)
Conv1: 80
Conv2: 1168
Conv3: 2900
Conv4: 5068
Conv5: 2530
Total Paramete

What is cosine annealing?
What is step decay?
- in the final layer if the number of feature map were not exactly 10 than there could be an issue in the accuracy of the model as gap would convert feature map into that numbers such that their mean is zero
10 degree of data augmenation is applied to increase accuracy
-

In [47]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr = lr)
schedular = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=epochs,eta_min = 1e-5)

for epoch in range(epochs):
    model.train()
    total_loss = 0.0
    for image,labels in train_loader:
        image,labels = image.to(device),labels.to(device)

        optimizer.zero_grad()
        outputs = model(image)
        loss = criterion(outputs,labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    schedular.step()

In [48]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 99.41%
