In [1]:
import os, sys
project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm
from Utils import *
from PyTorchUtils import *

In [3]:
DEVICE = getDeviceType(DLFrameworks.Torch)
DEVICE

device(type='mps')

In [4]:
EPOCHS = 20
BATCH_SIZE = 16
LEARNING_RATE = 0.001

In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),                      
    transforms.Normalize((0.5, 0.5, 0.5),    
                         (0.5, 0.5, 0.5))
])

In [6]:
train_set = torchvision.datasets.CIFAR10(root='./data', 
                                         train=True,  
                                         transform=transform)

train_loader = torch.utils.data.DataLoader(train_set,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True,
                                           num_workers=2)

test_set = torchvision.datasets.CIFAR10(root='./data',
                                        train=False,
                                        transform=transform)

test_loader = torch.utils.data.DataLoader(test_set,
                                          batch_size=BATCH_SIZE,
                                          shuffle=False,
                                          num_workers=2)

In [7]:
class CIFAR10(nn.Module):
    def __init__(self):
        super(CIFAR10, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32,kernel_size=3, padding=1)
        self.relu  = nn.ReLU()
        self.pool  = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64,kernel_size=3, padding=1)
        self.drop  = nn.Dropout(p=0.5)

        dummy = torch.zeros(1, 3, 32, 32)
        with torch.no_grad():
            x = self.pool(self.relu(self.conv1(dummy))) 
            x = self.pool(self.relu(self.conv2(x)))   

        flatten_size = x.numel()  
        self.fc1 = nn.Linear(flatten_size, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):

        x = self.relu(self.conv1(x))
        x = self.pool(x)
        
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        
        x = x.view(x.size(0), -1)
        
        x = self.relu(self.fc1(x))
        x = self.drop(x)
        x = self.fc2(x)
        return x


In [8]:
model = CIFAR10().to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)

### Training Loop

In [9]:
total_loss = train_model(model, train_loader, optimizer, criterion, 1, DEVICE)

Epoch: 1 / 1: 100%|██████████| 3125/3125 [00:33<00:00, 93.45batch/s, avg_loss=1.41] 


### Evaluating Loop

In [10]:
evaluate_model(model, test_loader, DEVICE)

Testing: 100%|██████████| 625/625 [00:14<00:00, 43.42batch/s] 

Test Accuracy: 62.98%





## Hyper parameter Tuning

In [11]:
import optuna
from torch.utils.data import random_split, DataLoader

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
transform = transforms.Compose([
    transforms.ToTensor(),                      
    transforms.Normalize((0.5, 0.5, 0.5),    
                         (0.5, 0.5, 0.5))
])

In [13]:
full_train = torchvision.datasets.CIFAR10(root="data",
                                          train=True,
                                          transform=transform)

In [14]:
train_size = int(0.8 * len(full_train))
val_size   = len(full_train) - train_size
train_set, val_set = random_split(full_train, [train_size, val_size])

In [15]:
def objective(trial):

    lr      = trial.suggest_float('lr',      1e-5, 1e-2, log=True)
    dropout = trial.suggest_float('dropout', 0.1, 0.7)
    channels = trial.suggest_categorical('channels', [32, 64, 128])

    train_loader = DataLoader(train_set, batch_size=128, shuffle=True)
    val_loader   = DataLoader(val_set,   batch_size=256, shuffle=False)

    class CIFARNet(nn.Module):
        def __init__(self):
            super().__init__()
            self.conv1 = nn.Conv2d(3, channels, 3, padding=1)
            self.conv2 = nn.Conv2d(channels, channels*2, 3, padding=1)
            self.pool  = nn.MaxPool2d(2,2)
            self.drop  = nn.Dropout(dropout)

            with torch.no_grad():
                x = torch.zeros(1,3,32,32)
                x = self.pool(self.conv1(x))
                x = self.pool(self.conv2(x))
            flat = x.numel()
            self.fc1 = nn.Linear(flat, 256)
            self.fc2 = nn.Linear(256, 10)

        def forward(self, x):
            x = self.pool(nn.functional.relu(self.conv1(x)))
            x = self.pool(nn.functional.relu(self.conv2(x)))
            x = x.view(x.size(0), -1)
            x = self.drop(nn.functional.relu(self.fc1(x)))
            return self.fc2(x)

    device = torch.device('mps' if torch.mps.is_available() else 'cpu')
    model  = CIFARNet().to(device)

    # 5. Optimizer & loss
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    # 6. Training for a few epochs
    for epoch in range(5):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            loss = criterion(model(images), labels)
            loss.backward()
            optimizer.step()

    # 7. Validation accuracy
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            preds = model(images).argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return correct / total

In [16]:
# 8. Run the study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

print("Best trial:")
trial = study.best_trial
print(f"Accuracy: {trial.value:.4f}")
print("Params:")
for k, v in trial.params.items():
    print(f"{k}: {v}")

[I 2025-07-07 12:15:38,495] A new study created in memory with name: no-name-a796e984-f681-4227-930a-12925f94205b
[W 2025-07-07 12:15:39,929] Trial 0 failed with parameters: {'lr': 0.007449908472050174, 'dropout': 0.29292818689866285, 'channels': 32} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/Users/mithesh/Documents/CodeToGreatness/NeuroForge/Projects/myenv/lib/python3.12/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/17/js2qkwqd7cqdm6wdqkfsnpm00000gn/T/ipykernel_26298/1089435353.py", line 43, in objective
    for images, labels in train_loader:
                          ^^^^^^^^^^^^
  File "/Users/mithesh/Documents/CodeToGreatness/NeuroForge/Projects/myenv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 733, in __next__
    data = self._next_data()
           ^^^^^^^^^^^^^^^^^
  File "/Users/mithesh/Documen

KeyboardInterrupt: 

## Best Model

In [None]:

# 1. Retrieve best hyperparameters from the Optuna study
best_params = study.best_trial.params
lr       = best_params["lr"]        # e.g. 0.00052
dropout  = best_params["dropout"]   # e.g. 0.37
channels = best_params["channels"]  # e.g. 64


train_loader = DataLoader(train_set, batch_size=128, shuffle=True, num_workers=2)
test_loader  = DataLoader(test_set,  batch_size=256, shuffle=False, num_workers=2)

# 3. Re‐define the model class using the best channels & dropout
class CIFARBest(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, channels, 3, padding=1)
        self.conv2 = nn.Conv2d(channels, channels*2, 3, padding=1)
        self.pool  = nn.MaxPool2d(2,2)
        self.drop  = nn.Dropout(dropout)

        # dynamically compute flatten size
        with torch.no_grad():
            dummy = torch.zeros(1,3,32,32)
            x = self.pool(self.conv1(dummy))
            x = self.pool(self.conv2(x))
        flat_size = x.numel()

        self.fc1 = nn.Linear(flat_size, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.drop(nn.functional.relu(self.fc1(x)))
        return self.fc2(x)

# 4. Instantiate, loss, optimizer
device   = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model    = CIFARBest().to(device)
criterion= nn.CrossEntropyLoss()
optimizer= optim.Adam(model.parameters(), lr=lr)

# 5. Full retraining loop
num_epochs = 20
for epoch in range(1, num_epochs+1):
    model.train()
    total_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        loss = criterion(model(images), labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch:02d}/{num_epochs} — Training Loss: {avg_loss:.4f}")

# 6. Final evaluation on test set
model.eval()
correct = total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        preds = model(images).argmax(dim=1)
        correct += (preds == labels).sum().item()
        total   += labels.size(0)

print(f"Test Accuracy with best params: {100 * correct / total:.2f}%")
