In [1]:
# This is to use the Utils.py file. Since they are not in the same folder. 
# Mapping the current working directory to the main path.
import os, sys
project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision 
import torchvision.transforms as transforms
from tqdm import tqdm
from Utils import *

In [3]:
DEVICE = torch.device("cpu") #getDeviceType(DLFrameworks.PyTorch)
print(f"Running on {DEVICE}")

Running on cpu


In [4]:
EPOCHS = 20
BATCH_SIZE = 16
LEARNING_RATE = 0.001

In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5), # Since our images are RGB, 
                          (0.5,0.5,0.5)) # we are using 3 values for mean and standard deviation
])

# Dataset Preparation

In [6]:
from pathlib import Path

dataset_exists = Path("./data/cifar-10-python.tar.gz").exists()

In [7]:
# Training Dataset
train_set = torchvision.datasets.CIFAR10(root="./data",
                                         train=True,
                                         transform=transform,
                                         download= not dataset_exists)

train_loader = torch.utils.data.DataLoader(dataset=train_set,
                                           batch_size=BATCH_SIZE, 
                                           shuffle=True,
                                           num_workers=2)

In [8]:
# Validation Dataset
val_set = torchvision.datasets.CIFAR10(root="./data",
                                       train=False,
                                       transform=transform,
                                       download=not dataset_exists)

val_loader = torch.utils.data.DataLoader(dataset=val_set,
                                         batch_size=BATCH_SIZE,
                                         shuffle=False,
                                         num_workers=2)

# Model

In [9]:
class CIFAR10Net(nn.Module):
    def __init__(self, name: str, drop_out_prob: int, num_classes: int):
        super(CIFAR10Net, self).__init__()

        self.model_name = name
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.drop = nn.Dropout(p= drop_out_prob)

        self.fc1 = nn.Linear(64 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, num_classes)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)

        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)

        x = torch.flatten(x, 1) 

        x = self.fc1(x)
        x = self.fc2(x)
        return x

In [10]:
model = CIFAR10Net(name="CIFAR-10", drop_out_prob=0.5, num_classes=10).to(DEVICE)

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr= LEARNING_RATE)

# Training

In [None]:

def train(model, optimizer, criterion, train_loader, EPOCHS):
    model.train()
    total_loss = []

    for epoch in range(EPOCHS):

        running_loss = 0.0
        progress_bar = tqdm(train_loader,
                            desc= f"Epoch: {epoch + 1} / {EPOCHS}",
                            unit="batch")


        for i, (images,labels) in enumerate(progress_bar, 1):
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            output = model(images)

            loss = criterion(output, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            progress_bar.set_postfix(avg_loss=running_loss / i)
        
        total_loss.append(running_loss)
    
    return total_loss

In [13]:
total_loss = train(model, optimizer, criterion, train_loader, EPOCHS)

Epoch: 1 / 20: 100%|██████████| 3125/3125 [01:19<00:00, 39.46batch/s, avg_loss=1.3] 
Epoch: 2 / 20: 100%|██████████| 3125/3125 [01:20<00:00, 38.78batch/s, avg_loss=0.987]
Epoch: 3 / 20: 100%|██████████| 3125/3125 [01:25<00:00, 36.51batch/s, avg_loss=0.884]
Epoch: 4 / 20: 100%|██████████| 3125/3125 [01:19<00:00, 39.53batch/s, avg_loss=0.823]
Epoch: 5 / 20: 100%|██████████| 3125/3125 [01:28<00:00, 35.38batch/s, avg_loss=0.781]
Epoch: 6 / 20: 100%|██████████| 3125/3125 [01:20<00:00, 38.99batch/s, avg_loss=0.745]
Epoch: 7 / 20: 100%|██████████| 3125/3125 [01:18<00:00, 39.75batch/s, avg_loss=0.719]
Epoch: 8 / 20: 100%|██████████| 3125/3125 [01:23<00:00, 37.49batch/s, avg_loss=0.694]
Epoch: 9 / 20: 100%|██████████| 3125/3125 [01:25<00:00, 36.71batch/s, avg_loss=0.674]
Epoch: 10 / 20: 100%|██████████| 3125/3125 [01:31<00:00, 34.05batch/s, avg_loss=0.653]
Epoch: 11 / 20: 100%|██████████| 3125/3125 [01:28<00:00, 35.36batch/s, avg_loss=0.637]
Epoch: 12 / 20: 100%|██████████| 3125/3125 [01:27<00:

# Evaluation

In [16]:
def evaluate(model, test_loader):
    model.eval()
    correct = total = 0
    progress_bar = tqdm(test_loader,
                        desc=f"Test Dataset",
                        unit="batch")

    with torch.no_grad():
        for i, (images, labels) in enumerate(progress_bar, 1):
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            output = model(images)
            _, predicted = torch.max(output.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Test Accuracy: {100 * correct / total:.2f}%")


In [17]:
evaluate(model, val_loader)

Test Dataset: 100%|██████████| 625/625 [00:21<00:00, 29.70batch/s] 

Test Accuracy: 68.47%





# Hyper Parameter Tuning

In [19]:
import optuna

In [26]:
def make_model(drop_out_prob, channels) -> nn.Module:
    class CIFAR10Net(nn.Module):
        def __init__(self, name: str, drop_out_prob: int, num_classes: int):
            super(CIFAR10Net, self).__init__()

            self.model_name = name
            self.conv1 = nn.Conv2d(in_channels=3, out_channels=channels, kernel_size=3, padding=1)
            self.conv2 = nn.Conv2d(in_channels=channels, out_channels=channels * 2, kernel_size=3, padding=1)
            self.relu = nn.ReLU()
            self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
            self.drop = nn.Dropout(p= drop_out_prob)

            self.fc1 = nn.Linear(64 * 8 * 8, 256)
            self.fc2 = nn.Linear(256, num_classes)
        
        def forward(self, x):
            x = self.conv1(x)
            x = self.relu(x)
            x = self.pool(x)

            x = self.conv2(x)
            x = self.relu(x)
            x = self.pool(x)

            x = torch.flatten(x, 1) 

            x = self.fc1(x)
            x = self.fc2(x)
            return x
    
    model = CIFAR10Net(name="CIFAR-10Net", drop_out_prob=drop_out_prob, num_classes=10)
    return model

In [21]:
def train_model(model,optimizer, criterion, train_loader, DEVICE, EPOCHS):

    model.train()
    total_loss = []

    for epoch in range(EPOCHS):

        running_loss = 0.0
        progress_bar = tqdm(train_loader,
                            desc=f"{epoch + 1} / {EPOCHS} EPOCHS",
                            unit="batch")
        
        for i, images, labels in enumerate(progress_bar, 1):
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            output = model(images)

            loss = criterion(output, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            progress_bar.set_postfix(avg_loss=running_loss / i)
        
        total_loss.append(running_loss)
    
    return total_loss

In [22]:
def evaluate(model, val_loader, DEVICE):

    model.eval()
    correct = total = 0
    progress_bar = tqdm(val_loader,
                        desc= "Validating",
                        unit="batch")
    
    with torch.no_grad():
        for i, (images, labels) in enumerate(progress_bar, 1):

            images, labels = images.to(DEVICE), labels.to(DEVICE)
            output = model(images)
            _, predicted = torch.max(output.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
    print(f"Test Accuracy: {100 * correct / total:.2f}%")
    return correct / total

In [23]:
def objective(trial, train_loader, val_loader, DEVICE):
        
    learning_rate = trial.suggest_float('learning_rate',1e-5, 1e-2, log=True)
    dropout = trial.suggest_float('dropout', 0.1, 0.7)
    channels = trial.suggest_categorical('channels', [32, 64, 128])

    model = make_model(drop_out_prob=dropout, channels=channels).to(DEVICE)
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    train(model, optimizer, criterion, train_loader,5)
    accuracy = evaluate(model, val_loader, DEVICE)
    return accuracy

In [27]:
from functools import partial
objective_with_args = partial(objective, train_loader=train_loader, val_loader=val_loader, DEVICE=DEVICE)


study = optuna.create_study(direction='maximize')
study.optimize(objective_with_args, n_trials=5)

print("Best trial:")
trial = study.best_trial
print(f"Accuracy: {trial.value:.4f}")
print("Params:")
for k, v in trial.params.items():
    print(f"{k}: {v}")

[I 2025-07-08 22:37:14,754] A new study created in memory with name: no-name-ad271db4-b7f6-437b-ae7b-18115409247d
Epoch: 1 / 5: 100%|██████████| 3125/3125 [01:12<00:00, 42.88batch/s, avg_loss=1.27]
Epoch: 2 / 5: 100%|██████████| 3125/3125 [01:09<00:00, 44.69batch/s, avg_loss=0.935]
Epoch: 3 / 5: 100%|██████████| 3125/3125 [01:13<00:00, 42.28batch/s, avg_loss=0.809]
Epoch: 4 / 5: 100%|██████████| 3125/3125 [01:16<00:00, 40.99batch/s, avg_loss=0.723]
Epoch: 5 / 5: 100%|██████████| 3125/3125 [01:15<00:00, 41.56batch/s, avg_loss=0.658]
Validating: 100%|██████████| 625/625 [00:19<00:00, 32.42batch/s] 
[I 2025-07-08 22:43:42,254] Trial 0 finished with value: 0.7148 and parameters: {'learning_rate': 0.0003552405010988866, 'dropout': 0.6443597379302418, 'channels': 32}. Best is trial 0 with value: 0.7148.


Test Accuracy: 71.48%


Epoch: 1 / 5:   0%|          | 0/3125 [00:16<?, ?batch/s]
[W 2025-07-08 22:43:58,585] Trial 1 failed with parameters: {'learning_rate': 0.000930110634862718, 'dropout': 0.6678249003029241, 'channels': 128} because of the following error: RuntimeError('mat1 and mat2 shapes cannot be multiplied (16x16384 and 4096x256)').
Traceback (most recent call last):
  File "/Users/mithesh/Documents/CodeToGreatness/NeuroForge/Projects/myenv/lib/python3.12/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/17/js2qkwqd7cqdm6wdqkfsnpm00000gn/T/ipykernel_27062/2615564000.py", line 11, in objective
    train(model, optimizer, criterion, train_loader,5)
  File "/var/folders/17/js2qkwqd7cqdm6wdqkfsnpm00000gn/T/ipykernel_27062/342004722.py", line 15, in train
    output = model(images)
             ^^^^^^^^^^^^^
  File "/Users/mithesh/Documents/CodeToGreatness/NeuroForge/Projects/myenv/lib/python3.12/site

RuntimeError: mat1 and mat2 shapes cannot be multiplied (16x16384 and 4096x256)