# **Importing Libraries**

In [42]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import random_split, DataLoader
import os

## Defining The Model

In [47]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the number of epochs
num_epochs = 10
data_dir = 'imagenet-mini'
batch_size = 8

class CustomVGG(nn.Module):
    def __init__(self, num_classes=1000):
        super(CustomVGG, self).__init__()
        # Define the VGG blocks
        self.block1 = self.vgg_block(3, 64, 2)
        self.block2 = self.vgg_block(64, 128, 2)
        self.block3 = self.vgg_block(128, 256, 3)
        self.block4 = self.vgg_block(256, 512, 3)
        self.block5 = self.vgg_block(512, 512, 3)
        # Define the fully connected layers
        self.fc_layers = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        # Forward pass through the VGG blocks
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        # Flatten the output before passing through fully connected layers
        x = x.view(x.size(0), -1)
        # Forward pass through the fully connected layers
        x = self.fc_layers(x)
        return x

    def vgg_block(self, in_channels, out_channels, num_conv_layers):
        layers = []
        for _ in range(num_conv_layers):
            layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
            layers.append(nn.ReLU(inplace=True))
            in_channels = out_channels
        layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        return nn.Sequential(*layers)

# Create an instance of the CustomVGG model
model = CustomVGG(num_classes=1000)  # Change num_classes according to your task
# Print the model architecture
print(model)


CustomVGG(
  (block1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): Max

# *Loading the Datasets*

In [48]:
# Define transformations including data augmentation for training
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transform_val = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_dataset = datasets.ImageFolder(root=f'{data_dir}/train', transform=transform_train)
val_dataset = datasets.ImageFolder(root=f'{data_dir}/val', transform=transform_val)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

## Accuracy


In [49]:
# Function to calculate accuracy
def calculate_accuracy(loader, model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total


## Training loop


In [50]:

# Function to create and return the model and optimizer
def create_model_optimizer(optimizer_name):
    model = CustomVGG(num_classes=1000).to(device)
    optimizers = {
        'SGD': optim.SGD(model.parameters(), lr=0.001, momentum=0.9),
        'Adam': optim.Adam(model.parameters(), lr=0.0001),
        'RMSprop': optim.RMSprop(model.parameters(), lr=0.0001),
        'AdamW': optim.AdamW(model.parameters(), lr=0.0001),
        'Adamax': optim.Adamax(model.parameters(), lr=0.0001),
        'SparseAdam': optim.SparseAdam(model.parameters(), lr=0.0001),
        'RAdam': optim.RAdam(model.parameters(), lr=0.0001),
        'NAdam': optim.NAdam(model.parameters(), lr=0.0001)
    }
    optimizer = optimizers[optimizer_name]
    return model, optimizer



In [51]:
# Function to train and evaluate the model
def train_and_evaluate(optimizer_name):
    model, optimizer = create_model_optimizer(optimizer_name)
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    scaler = GradScaler()

    best_val_acc = 0
    best_model_wts = None
    patience = 3
    trigger_times = 0

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    for epoch in range(num_epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        loop = tqdm(train_loader, desc=f'Epoch [{epoch+1}/{num_epochs}]', leave=True)
        for inputs, labels in loop:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            
            scaler.scale(loss).backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            loop.set_postfix(loss=loss.item())
        
        train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct / total
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)

        val_loss, val_correct, val_total = 0.0, 0, 0
        model.eval()
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        val_loss /= len(val_loader)
        val_accuracy = 100 * val_correct / val_total
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

        scheduler.step()

        print(f'Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%')

        if val_accuracy > best_val_acc:
            best_val_acc = val_accuracy
            best_model_wts = model.state_dict().copy()
            trigger_times = 0
        else:
            trigger_times += 1

        if trigger_times >= patience:
            print('Early stopping triggered')
            break

    print(f'Best Validation Accuracy: {best_val_acc:.2f}%')
    if best_model_wts:
        model.load_state_dict(best_model_wts)
    torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'best_val_acc': best_val_acc
    }, f'best_model_{optimizer_name}.pth')

    return train_losses, val_losses, train_accuracies, val_accuracies

## **Plotting the Results**


In [52]:
# Function to plot loss vs. epoch graph
def plot_loss_vs_epoch(loss_dict, acc_dict):
    plt.figure(figsize=(12, 8))
    
    # Plotting Loss
    plt.subplot(2, 1, 1)
    for optimizer_name, losses in loss_dict.items():
        plt.plot(range(1, len(losses['train']) + 1), losses['train'], label=f'{optimizer_name} Train')
        plt.plot(range(1, len(losses['val']) + 1), losses['val'], label=f'{optimizer_name} Val')
    
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss vs. Epoch')
    plt.legend()
    
    # Plotting Accuracy
    plt.subplot(2, 1, 2)
    for optimizer_name, accs in acc_dict.items():
        plt.plot(range(1, len(accs['train']) + 1), accs['train'], label=f'{optimizer_name} Train')
        plt.plot(range(1, len(accs['val']) + 1), accs['val'], label=f'{optimizer_name} Val')
    
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.title('Accuracy vs. Epoch')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

# List of optimizers to be used
optimizers_list = ['SGD', 'Adam', 'RMSprop', 'AdamW', 'Adamax', 'SparseAdam', 'RAdam', 'NAdam']

# Train and evaluate for different optimizers
loss_dict, acc_dict = {}, {}
for optimizer_name in optimizers_list:
    print(f'Training with optimizer {optimizer_name}')
    train_losses, val_losses, train_accuracies, val_accuracies = train_and_evaluate(optimizer_name)
    loss_dict[optimizer_name] = {'train': train_losses, 'val': val_losses}
    acc_dict[optimizer_name] = {'train': train_accuracies, 'val': val_accuracies}

plot_loss_vs_epoch(loss_dict, acc_dict)

Training with optimizer SGD


Epoch [1/10]: 100%|██████████| 4341/4341 [07:20<00:00,  9.84it/s, loss=6.91]


Epoch [1/10] - Train Loss: 6.9078, Train Acc: 0.10%, Val Loss: 6.9078, Val Acc: 0.10%


Epoch [2/10]:  41%|████      | 1763/4341 [03:00<04:23,  9.79it/s, loss=6.91]


KeyboardInterrupt: 