## Improve the accuracy of a vanila CNN

In [1]:
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
import torchvision
import torchvision.transforms as v2
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
# Load CIFAR-10 dataset
transform = v2.Compose([
    v2.ToTensor(),
    v2.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [45]:
# Define the vanilla CNN Model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)  # Output: 32x32x32
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)  # Output: 64x32x32
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: 64x16x16
        self.fc1 = nn.Linear(64 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
# Define the vanilla CNN Model
class CNNModelWeightInit(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)  # Output: 32x32x32
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)  # Output: 64x32x32
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: 64x16x16
        self.fc1 = nn.Linear(64 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()

    def weight_init(self):
        for m in self.modules:
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal
    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [3]:
def training_loop(model, criterion, optimizer, epochs=5, scheduler=None):
    # Training loop
    model.train()
    for epoch in tqdm(range(epochs), desc="Epochs"):
        for images, labels in train_loader:
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        if scheduler:
            scheduler.step()
            print(f"scheduler update")
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}")
        
def testing_loop(model):
    # Evaluate on the test set
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

In [47]:
# Vanilla model
model = CNNModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
training_loop(model, criterion, optimizer)
accuracy = testing_loop(model)
print(f"Test Accuracy: {accuracy}%")

Epochs:  20%|██        | 1/5 [00:49<03:18, 49.58s/it]

Epoch [1/5], Loss: 1.4888


Epochs:  40%|████      | 2/5 [01:37<02:25, 48.36s/it]

Epoch [2/5], Loss: 0.5245


Epochs:  60%|██████    | 3/5 [02:28<01:39, 49.98s/it]

Epoch [3/5], Loss: 0.3499


Epochs:  80%|████████  | 4/5 [03:21<00:50, 50.99s/it]

Epoch [4/5], Loss: 1.1404


Epochs: 100%|██████████| 5/5 [04:08<00:00, 49.74s/it]

Epoch [5/5], Loss: 0.7273





Test Accuracy: 69.92%


In [51]:
# Incorporating BatchNorm, Dropout
class CNNModel_BN_DO(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)  # Output: 64x32x32
        self.bn2 = nn.BatchNorm2d(64)
        # self.relu2 = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: 64x16x16
        self.fc1 = nn.Linear(64 * 16 * 16, 128)
        # self.relu3 = nn.ReLU()
        self.do1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.relu(self.fc1(x))
        x = self.do1(x)
        x = self.fc2(x)
        return x
        

In [56]:
# CNN with BatchNorm and Dropout
model_BN_DO = CNNModel_BN_DO()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_BN_DO.parameters(), lr=0.001)
training_loop(model_BN_DO, criterion, optimizer, epochs=10)
accuracy = testing_loop(model_BN_DO)
print(f"Test Accuracy: {accuracy}%")

Epochs:  10%|█         | 1/10 [00:52<07:55, 52.85s/it]

Epoch [1/10], Loss: 1.5638


Epochs:  20%|██        | 2/10 [01:46<07:05, 53.15s/it]

Epoch [2/10], Loss: 1.3343


Epochs:  30%|███       | 3/10 [02:39<06:13, 53.33s/it]

Epoch [3/10], Loss: 1.3777


Epochs:  40%|████      | 4/10 [03:33<05:20, 53.36s/it]

Epoch [4/10], Loss: 1.3795


Epochs:  50%|█████     | 5/10 [04:26<04:26, 53.21s/it]

Epoch [5/10], Loss: 1.1376


Epochs:  60%|██████    | 6/10 [05:19<03:33, 53.26s/it]

Epoch [6/10], Loss: 1.2930


Epochs:  70%|███████   | 7/10 [06:12<02:39, 53.33s/it]

Epoch [7/10], Loss: 1.2194


Epochs:  80%|████████  | 8/10 [07:05<01:46, 53.22s/it]

Epoch [8/10], Loss: 1.3360


Epochs:  90%|█████████ | 9/10 [07:58<00:52, 52.92s/it]

Epoch [9/10], Loss: 1.0125


Epochs: 100%|██████████| 10/10 [08:48<00:00, 52.84s/it]

Epoch [10/10], Loss: 0.8911





Test Accuracy: 54.86%


In [8]:
# Incorporating BatchNorm, Dropout with weight initialization and a lr scheduler
class CNNModel_BN_DO_weightInitScheduler(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)  # Output: 64x32x32
        self.bn2 = nn.BatchNorm2d(64)
        # self.relu2 = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: 64x16x16
        self.fc1 = nn.Linear(64 * 16 * 16, 128)
        # self.relu3 = nn.ReLU()
        self.do1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(128, 10)
        
        # self._initialize_weights()
        
    def _initialize_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Conv2d):
                init.kaiming_normal_(module.weight, mode='fan_out', nonlinearity='relu')
                if module.bias is not None:
                    init.constant_(module.bias, 0)
            elif isinstance(module, nn.Linear):
                init.xavier_normal_(module.weight)
                if module.bias is not None:
                    init.constant_(module.bias, 0)
                    
    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.relu(self.fc1(x))
        x = self.do1(x)
        x = self.fc2(x)
        return x
        

In [9]:
# Model with BatchNorm and Dropout but with weight initialization and a scheduler
# CNN with BatchNorm and Dropout
model_BN_DO_weightInitScheduler = CNNModel_BN_DO_weightInitScheduler()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_BN_DO_weightInitScheduler.parameters(), lr=0.001)  
scheduler = StepLR(optimizer, step_size=2, gamma=0.1)
training_loop(model_BN_DO_weightInitScheduler, criterion, optimizer, epochs=10, scheduler=scheduler)
accuracy = testing_loop(model_BN_DO_weightInitScheduler)
print(f"Test Accuracy: {accuracy}%")

Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epochs:  10%|█         | 1/10 [00:58<08:47, 58.59s/it]

scheduler update
Epoch [1/10], Loss: 1.2726


Epochs:  20%|██        | 2/10 [01:59<07:57, 59.67s/it]

scheduler update
Epoch [2/10], Loss: 1.5909


Epochs:  30%|███       | 3/10 [02:59<07:01, 60.24s/it]

scheduler update
Epoch [3/10], Loss: 1.3904


Epochs:  40%|████      | 4/10 [03:57<05:56, 59.37s/it]

scheduler update
Epoch [4/10], Loss: 1.1893


Epochs:  50%|█████     | 5/10 [04:54<04:51, 58.37s/it]

scheduler update
Epoch [5/10], Loss: 0.9175


Epochs:  60%|██████    | 6/10 [05:51<03:51, 57.86s/it]

scheduler update
Epoch [6/10], Loss: 0.9979


Epochs:  70%|███████   | 7/10 [06:47<02:52, 57.40s/it]

scheduler update
Epoch [7/10], Loss: 1.5793


Epochs:  80%|████████  | 8/10 [07:44<01:54, 57.07s/it]

scheduler update
Epoch [8/10], Loss: 1.4115


Epochs:  90%|█████████ | 9/10 [08:40<00:56, 56.78s/it]

scheduler update
Epoch [9/10], Loss: 1.8260


Epochs: 100%|██████████| 10/10 [09:40<00:00, 58.02s/it]

scheduler update
Epoch [10/10], Loss: 0.8650





Test Accuracy: 65.1%


In [76]:
# Inbuilt deeper model
model_resnet = torchvision.models.resnet18()
optimizer = optim.Adam(model_resnet.parameters(), lr=0.01)
training_loop(model_resnet, criterion, optimizer)
print(f"Accuracy: {testing_loop(model_resnet)}")

Epochs:  20%|██        | 1/5 [03:46<15:04, 226.21s/it]

Epoch [1/5], Loss: 1.3498


Epochs:  40%|████      | 2/5 [07:32<11:19, 226.45s/it]

Epoch [2/5], Loss: 1.6756


Epochs:  60%|██████    | 3/5 [11:21<07:35, 227.56s/it]

Epoch [3/5], Loss: 0.5348


Epochs:  80%|████████  | 4/5 [15:12<03:48, 228.71s/it]

Epoch [4/5], Loss: 0.6208


Epochs: 100%|██████████| 5/5 [19:01<00:00, 228.28s/it]

Epoch [5/5], Loss: 0.7127





Accuracy: 71.98


In [10]:
# Incorporating BatchNorm, Dropout with weight initialization and a lr scheduler
class CNNModel_BN_DO_weightInitScheduler_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(64, 256, kernel_size=3, stride=1, padding=1)  # Output: 64x32x32
        self.bn2 = nn.BatchNorm2d(256)
        # self.relu2 = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: 256, 16, 16
        self.fc1 = nn.Linear(256*16*16, 1024)
        self.fc2 = nn.Linear(1024, 256)
        # self.relu3 = nn.ReLU()
        self.do1 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(256, 10)
        
        # self._initialize_weights()
        
    def _initialize_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Conv2d):
                init.kaiming_normal_(module.weight, mode='fan_in', nonlinearity='relu')
                if module.bias is not None:
                    init.constant_(module.bias, 0)
            elif isinstance(module, nn.Linear):
                init.xavier_normal_(module.weight)
                if module.bias is not None:
                    init.constant_(module.bias, 0)
                    
    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.relu(self.fc1(x))
        x = self.do1(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x
        

In [12]:
# Model with BatchNorm, Dropout, weight initialization,a scheduler and additional FC layer
model_BN_DO_weightInitScheduler_2 = CNNModel_BN_DO_weightInitScheduler_2()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_BN_DO_weightInitScheduler_2.parameters(), lr=0.001)  
scheduler = StepLR(optimizer, step_size=3, gamma=0.1)
training_loop(model_BN_DO_weightInitScheduler_2, criterion, optimizer, epochs=10, scheduler=scheduler)
accuracy = testing_loop(model_BN_DO_weightInitScheduler_2)
print(f"Test Accuracy: {accuracy}%")

Epochs:  10%|█         | 1/10 [06:53<1:01:59, 413.29s/it]

scheduler update
Epoch [1/10], Loss: 1.5152


Epochs:  20%|██        | 2/10 [13:45<54:59, 412.44s/it]  

scheduler update
Epoch [2/10], Loss: 1.1868


Epochs:  30%|███       | 3/10 [19:48<45:30, 390.12s/it]

scheduler update
Epoch [3/10], Loss: 0.6391


Epochs:  40%|████      | 4/10 [25:17<36:36, 366.03s/it]

scheduler update
Epoch [4/10], Loss: 0.5635


Epochs:  50%|█████     | 5/10 [30:44<29:19, 351.85s/it]

scheduler update
Epoch [5/10], Loss: 1.1428


Epochs:  60%|██████    | 6/10 [36:15<22:59, 344.88s/it]

scheduler update
Epoch [6/10], Loss: 0.6592


Epochs:  70%|███████   | 7/10 [42:27<17:41, 353.69s/it]

scheduler update
Epoch [7/10], Loss: 1.0814


Epochs:  80%|████████  | 8/10 [49:05<12:15, 367.85s/it]

scheduler update
Epoch [8/10], Loss: 0.8850


Epochs:  90%|█████████ | 9/10 [55:51<06:19, 379.64s/it]

scheduler update
Epoch [9/10], Loss: 0.4498


Epochs: 100%|██████████| 10/10 [1:02:33<00:00, 375.35s/it]

scheduler update
Epoch [10/10], Loss: 0.2432





Test Accuracy: 73.71%


In [None]:
accuracy = testing_loop(model_BN_DO_weightInitScheduler_2)
print(f"Test Accuracy: {accuracy}%")

Test Accuracy: 63.5%
