<a href="https://colab.research.google.com/github/nishantruwari/projects/blob/main/implementing_cnn_and_comparing_optimisers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import math

In [3]:
# Step 2: Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
# Step 3: CIFAR-10 dataset
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [6]:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:13<00:00, 12.9MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [7]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=100, shuffle=False)


In [8]:
# Step 4: Define CNN Model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 16 * 16, 512)
        self.fc2 = nn.Linear(512, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [9]:
class AdamOptimizer:
    def __init__(self, params, lr=0.001, betas=(0.9, 0.999), eps=1e-8):
        self.params = list(params)
        self.lr = lr
        self.betas = betas
        self.eps = eps
        self.t = 0
        self.m = [torch.zeros_like(p) for p in self.params]
        self.v = [torch.zeros_like(p) for p in self.params]

    def zero_grad(self):
        """Clear the gradients of all optimized parameters."""
        for param in self.params:
            if param.grad is not None:
                param.grad.detach_()
                param.grad.zero_()

    def step(self):
        """Perform a single optimization step."""
        self.t += 1
        beta1, beta2 = self.betas
        for i, param in enumerate(self.params):
            if param.grad is None:
                continue
            grad = param.grad.data

            # Update biased first moment estimate
            self.m[i] = beta1 * self.m[i] + (1 - beta1) * grad

            # Update biased second raw moment estimate
            self.v[i] = beta2 * self.v[i] + (1 - beta2) * (grad ** 2)

            # Bias-corrected estimates
            m_hat = self.m[i] / (1 - beta1 ** self.t)
            v_hat = self.v[i] / (1 - beta2 ** self.t)

            # Update parameters
            param.data -= self.lr * m_hat / (torch.sqrt(v_hat) + self.eps)


In [10]:
class AdamWOptimizer(AdamOptimizer):
    def __init__(self, params, lr=0.001, betas=(0.9, 0.999), eps=1e-8, weight_decay=1e-2):
        super().__init__(params, lr, betas, eps)
        self.weight_decay = weight_decay

    def step(self):
        """Perform a single optimization step with weight decay."""
        self.t += 1
        beta1, beta2 = self.betas
        for i, param in enumerate(self.params):
            if param.grad is None:
                continue
            grad = param.grad.data

            # Apply weight decay
            grad += self.weight_decay * param.data

            # Update biased first moment estimate
            self.m[i] = beta1 * self.m[i] + (1 - beta1) * grad

            # Update biased second raw moment estimate
            self.v[i] = beta2 * self.v[i] + (1 - beta2) * (grad ** 2)

            # Bias-corrected estimates
            m_hat = self.m[i] / (1 - beta1 ** self.t)
            v_hat = self.v[i] / (1 - beta2 ** self.t)

            # Update parameters
            param.data -= self.lr * m_hat / (torch.sqrt(v_hat) + self.eps)


class AdaBeliefOptimizer(AdamOptimizer):
    def step(self):
        """Perform a single optimization step with AdaBelief modification."""
        self.t += 1
        beta1, beta2 = self.betas
        for i, param in enumerate(self.params):
            if param.grad is None:
                continue
            grad = param.grad.data

            # Update biased first moment estimate
            self.m[i] = beta1 * self.m[i] + (1 - beta1) * grad

            # Update biased second raw moment estimate with AdaBelief modification
            self.v[i] = beta2 * self.v[i] + (1 - beta2) * ((grad - self.m[i]) ** 2)

            # Bias-corrected estimates
            m_hat = self.m[i] / (1 - beta1 ** self.t)
            v_hat = self.v[i] / (1 - beta2 ** self.t)

            # Update parameters
            param.data -= self.lr * m_hat / (torch.sqrt(v_hat) + self.eps)


In [11]:
# Step 6: Train and test functions
def train_model(model, optimizer, criterion, train_loader, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")


In [12]:
def test_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

In [13]:
# Step 7: Main program
num_epochs = 10
criterion = nn.CrossEntropyLoss()

In [None]:
# Training with Adam
print("Training with Adam...")
model_adam = CNN().to(device)
optimizer_adam = AdamOptimizer(model_adam.parameters(), lr=0.001)
train_model(model_adam, optimizer_adam, criterion, train_loader, num_epochs)
accuracy_adam = test_model(model_adam, test_loader)
print(f"Test Accuracy with Adam: {accuracy_adam:.2f}%")

Training with Adam...
Epoch [1/10], Loss: 1.6226
Epoch [2/10], Loss: 1.2859
Epoch [3/10], Loss: 1.1612
Epoch [4/10], Loss: 1.0961
Epoch [5/10], Loss: 1.0562
Epoch [6/10], Loss: 1.0159
Epoch [7/10], Loss: 0.9912
Epoch [8/10], Loss: 0.9676
Epoch [9/10], Loss: 0.9524
Epoch [10/10], Loss: 0.9329
Test Accuracy with Adam: 70.12%


In [None]:
# Training with AdaBelief
print("\nTraining with AdaBelief...")
model_adabelief = CNN().to(device)
optimizer_adabelief = AdaBeliefOptimizer(model_adabelief.parameters(), lr=0.001)
train_model(model_adabelief, optimizer_adabelief, criterion, train_loader, num_epochs)
accuracy_adabelief = test_model(model_adabelief, test_loader)
print(f"Test Accuracy with AdaBelief: {accuracy_adabelief:.2f}%")



Training with AdaBelief...
Epoch [1/10], Loss: 1.6160
Epoch [2/10], Loss: 1.3174
Epoch [3/10], Loss: 1.1722
Epoch [4/10], Loss: 1.1104
Epoch [5/10], Loss: 1.0488
Epoch [6/10], Loss: 1.0202
Epoch [7/10], Loss: 0.9810
Epoch [8/10], Loss: 0.9552
Epoch [9/10], Loss: 0.9369
Epoch [10/10], Loss: 0.9198
Test Accuracy with AdaBelief: 69.75%


In [14]:
# Step 8: Compare Results
print("\nOptimizer Comparison:")
print(f"Adam Test Accuracy: {accuracy_adam:.2f}%")
#print(f"AdamW Test Accuracy: {accuracy_adamw:.2f}%")
print(f"AdaBelief Test Accuracy: {accuracy_adabelief:.2f}%")


Optimizer Comparison:


NameError: name 'accuracy_adam' is not defined