In [1]:
import os
import random
from tqdm import tqdm
from datetime import datetime
import pandas as pd
import gc

In [2]:
import optuna
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from optuna.integration import PyTorchLightningPruningCallback
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, ProgressBar, LearningRateMonitor, Timer, StochasticWeightAveraging, TQDMProgressBar, Callback
from pytorch_lightning.callbacks.callback import Callback
from pytorch_lightning.profilers import SimpleProfiler, AdvancedProfiler
from pytorch_lightning.utilities import seed
from pytorch_lightning.tuner.tuning import Tuner
from pytorch_lightning import Trainer, seed_everything
from optuna.integration import PyTorchLightningPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from PIL import Image
import matplotlib.pyplot as plt

In [4]:
torch.__version__

'2.5.1'

In [5]:
pl.__version__

'2.5.1.post0'

In [6]:
# Setting dataset path
path = "E:\\mini_ImageNet\\archive\\"

In [7]:
#Hardcoding the mean and std values; these were calculated in nb 1
mean = [0.4764, 0.4491, 0.4001]
std = [0.2264, 0.2224, 0.2212]

print("Mean:", mean)
print("Std:", std)

Mean: [0.4764, 0.4491, 0.4001]
Std: [0.2264, 0.2224, 0.2212]


### Image Transforms

In [8]:
transform=transforms.Compose([
        transforms.Resize(224),             # resize shortest side to 224 pixels
        transforms.CenterCrop(224),         # crop longest side to 224 pixels at center
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
    
        transforms.RandomRotation(20),      # rotate +/- 20 degrees
        transforms.RandomHorizontalFlip(p=0.25),  # flip 24% of images
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),  
        transforms.RandomAffine(degrees=0, translate=(0.2, 0.2), scale=(0.8, 1.2)),
        transforms.RandomApply([transforms.ColorJitter(brightness=0.2, contrast=0.2, 
                                                       saturation=0.2)], p=0.5), # jitters by +/- given value
        transforms.RandomApply([transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 0.7))], p=0.3)
    ])
#https://docs.pytorch.org/vision/main/transforms.html

In [9]:
#Getting class names from the folder names of images
class_names=sorted(os.listdir(path))
class_names = ['_'.join(c.split('_')[:-1]) for c in class_names]

### Data Module

In [10]:
class DataModule(pl.LightningDataModule):
    def __init__(self, root_dir, batch_size, transform):
        super().__init__()
        self.data_dir = root_dir
        self.batch_size = batch_size
        self.transform = transform
        self.num_w = 4
        
    def setup(self, stage=None):
        full_dataset = datasets.ImageFolder(root=self.data_dir, transform=self.transform)
        train_size = int(0.8 * len(full_dataset))
        val_size = int(0.2 * len(full_dataset))
        self.train_dataset, self.val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])
    
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, 
                         num_workers=self.num_w, persistent_workers=True)        
    
    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, 
                         num_workers=self.num_w, persistent_workers=True)        


### CNN Model

In [11]:
class CNNModel(pl.LightningModule):
    def __init__(self, trial, num_classes=50):
        super().__init__()
        self.save_hyperparameters()

        # Trial suggestions
        c1 = trial.suggest_categorical("conv1_out", [64])
        c2 = trial.suggest_categorical("conv2_out", [96])
        c3 = trial.suggest_categorical("conv3_out", [128])
        c4 = trial.suggest_categorical("conv4_out", [96, 128, 0])
        ksize = trial.suggest_categorical("kernel_size", [3, 5])

        fc1_size = trial.suggest_categorical("fc1_size", [512])
        fc2_size = trial.suggest_categorical("fc2_size", [256])
        fc3_size = trial.suggest_categorical("fc3_size", [0])
        
        dropout1 = trial.suggest_float("dropout1", 0.2, 0.5)
        dropout2 = trial.suggest_float("dropout2", 0.2, 0.5)
        dropout3 = trial.suggest_float("dropout3", 0.2, 0.5)
        
        self.learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-3, log=True)
        
        act_fn = nn.LeakyReLU(0.05)
        
        # Layers
        conv_layers = [
            nn.Conv2d(3, c1, kernel_size=ksize, padding=ksize // 2),
            nn.BatchNorm2d(c1),
            act_fn,
            nn.MaxPool2d(2),

            nn.Conv2d(c1, c2, kernel_size=ksize, padding=ksize // 2),
            nn.BatchNorm2d(c2),
            act_fn,
            nn.MaxPool2d(2),

            nn.Conv2d(c2, c3, kernel_size=ksize, padding=ksize // 2),
            nn.BatchNorm2d(c3),
            act_fn,
            nn.MaxPool2d(2),
        ]
        
        # Optionally add conv4
        if c4 > 0:
            conv_layers.extend([
                nn.Conv2d(c3, c4, kernel_size=ksize, padding=ksize // 2),
                nn.BatchNorm2d(c4),
                act_fn,
                nn.MaxPool2d(2),
            ])
            conv_out = c4
        else:
            conv_out = c3
        
        # Add flatten after convs
        conv_layers.append(nn.Flatten())
        
        # Wrap as Sequential
        self.model = nn.Sequential(*conv_layers)
        
        
        # Use dummy input to calculate flattened output size
        with torch.no_grad():
            dummy_input = torch.zeros(1, 3, 224, 224)
            n_features = self.model(dummy_input).shape[1]
        
        layers = [
            nn.Linear(n_features, fc1_size),
            nn.BatchNorm1d(fc1_size),
            act_fn,
            nn.Dropout(dropout1),
        ]

        final_in = fc1_size

        if fc2_size > 0:
            layers.extend([
                nn.Linear(fc1_size, fc2_size),
                nn.BatchNorm1d(fc2_size),
                act_fn,
                nn.Dropout(dropout2)
            ])
            final_in = fc2_size

        if fc3_size > 0:
            layers.extend([
                nn.Linear(final_in, fc3_size),
                nn.BatchNorm1d(fc3_size),
                act_fn,
                nn.Dropout(dropout3)
            ])
            final_in = fc3_size

        self.head = nn.Sequential(*layers)
        self.output = nn.Linear(final_in, num_classes)
        
    
    def forward(self, x):
        x = self.model(x)
        x = self.head(x)
        return self.output(x)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", acc, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log("val_loss", loss, prog_bar=True, on_epoch=True)
        self.log("val_acc", acc, prog_bar=True, on_epoch=True)
        return {"val_loss": loss, "val_acc": acc}

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.learning_rate)

In [12]:
#Removing the Validation DataLoader progress bar between epochs
class MinimalProgressBar(TQDMProgressBar):
    def init_validation_tqdm(self):
        # Return a dummy tqdm with no visible output to disable validation progress bar
        # Set total=0 to avoid progress display
        return tqdm(disable=True)

    def init_test_tqdm(self):
        # Same for test dataloader
        return tqdm(disable=True)

In [13]:
class ClearCacheCallback(Callback):
    def on_train_epoch_end(self, trainer, pl_module):
        torch.cuda.empty_cache()
        gc.collect()

In [14]:
class FixedPruningCallback(PyTorchLightningPruningCallback, Callback):
    def on_validation_end(self, trainer, pl_module):
        # Only start pruning after 3 epochs
        if trainer.current_epoch < 2:
            return  # Skip pruning

        # Call the original pruning logic
        super().on_validation_end(trainer, pl_module)


### Objective Function for Hyperparameter Tuning

In [15]:
def objective(trial):
    try:
        model = CNNModel(trial)
        datamodule = DataModule(root_dir=path, batch_size=32, transform=transform)
        datamodule.setup()
        
        trainer = pl.Trainer(
            logger=False,
            max_epochs=15,
            enable_checkpointing=False,
            callbacks=[MinimalProgressBar(), 
                       FixedPruningCallback(trial, monitor="val_loss"),
                       ClearCacheCallback()],
        )
        
        trainer.fit(model, datamodule)
        return trainer.callback_metrics["val_loss"].item()
    
    except RuntimeError as e:
        if "DefaultCPUAllocator" in str(e) or "out of memory" in str(e):
            print(f"Trial {trial.number} failed due to memory error.")
            return float('inf')
        raise  e
    
    finally:
        if 'model' in locals():
            del model
        if 'trainer' in locals():
            del trainer
        
        gc.collect()
        torch.cuda.empty_cache()


In [16]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)
print("Best trial:", study.best_trial.params)

[I 2025-06-10 14:26:58,392] A new study created in memory with name: no-name-280c5c2e-b987-442a-b195-0ca1f662dfaf
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 316 K  | train
1 | head   | Sequential | 13.0 M | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
13.3 M    Trainable params
0         Non-trainable params
13.3 M    Total params
53.230    Total estimated model params size (MB)
23        Modules in train mode
0         Modules in eval mode


Epoch 14: 100%|████| 750/750 [02:21<00:00,  5.32it/s, train_loss=2.000, train_acc=0.438, val_loss=2.170, val_acc=0.398]

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|████| 750/750 [02:21<00:00,  5.31it/s, train_loss=2.000, train_acc=0.438, val_loss=2.170, val_acc=0.398]


[I 2025-06-10 15:02:57,525] Trial 0 finished with value: 2.1694281101226807 and parameters: {'conv1_out': 64, 'conv2_out': 96, 'conv3_out': 128, 'conv4_out': 128, 'kernel_size': 3, 'fc1_size': 512, 'fc2_size': 256, 'fc3_size': 0, 'dropout1': 0.3526513480130451, 'dropout2': 0.4515211765967886, 'dropout3': 0.2351830722496837, 'learning_rate': 0.0003674188100420839}. Best is trial 0 with value: 2.1694281101226807.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 316 K  | train
1 | head   | Sequential | 13.0 M | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
13.3 M    Trainable params
0         Non-trainable params
13.3 M    Total params
53.230    Total estimated model params size (MB)
23        Modules in train mode
0     

Epoch 14: 100%|████| 750/750 [02:20<00:00,  5.35it/s, train_loss=2.610, train_acc=0.344, val_loss=2.100, val_acc=0.425]

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|████| 750/750 [02:20<00:00,  5.34it/s, train_loss=2.610, train_acc=0.344, val_loss=2.100, val_acc=0.425]


[I 2025-06-10 15:38:45,192] Trial 1 finished with value: 2.101747751235962 and parameters: {'conv1_out': 64, 'conv2_out': 96, 'conv3_out': 128, 'conv4_out': 128, 'kernel_size': 3, 'fc1_size': 512, 'fc2_size': 256, 'fc3_size': 0, 'dropout1': 0.39574746792575044, 'dropout2': 0.42445898800357507, 'dropout3': 0.31143781629928546, 'learning_rate': 0.0004936522175419208}. Best is trial 1 with value: 2.101747751235962.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 316 K  | train
1 | head   | Sequential | 13.0 M | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
13.3 M    Trainable params
0         Non-trainable params
13.3 M    Total params
53.230    Total estimated model params size (MB)
23        Modules in train mode
0    

Epoch 14: 100%|████| 750/750 [02:19<00:00,  5.37it/s, train_loss=2.310, train_acc=0.344, val_loss=2.100, val_acc=0.423]

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|████| 750/750 [02:19<00:00,  5.37it/s, train_loss=2.310, train_acc=0.344, val_loss=2.100, val_acc=0.423]


[I 2025-06-10 16:14:29,462] Trial 2 finished with value: 2.0992777347564697 and parameters: {'conv1_out': 64, 'conv2_out': 96, 'conv3_out': 128, 'conv4_out': 128, 'kernel_size': 3, 'fc1_size': 512, 'fc2_size': 256, 'fc3_size': 0, 'dropout1': 0.29052582909653063, 'dropout2': 0.3342236306486829, 'dropout3': 0.34840841032862135, 'learning_rate': 0.0004632153364643866}. Best is trial 2 with value: 2.0992777347564697.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 466 K  | train
1 | head   | Sequential | 51.5 M | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
52.0 M    Trainable params
0         Non-trainable params
52.0 M    Total params
207.972   Total estimated model params size (MB)
20        Modules in train mode
0   

Epoch 14: 100%|████| 750/750 [02:36<00:00,  4.78it/s, train_loss=2.130, train_acc=0.438, val_loss=2.310, val_acc=0.371]

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|████| 750/750 [02:37<00:00,  4.77it/s, train_loss=2.130, train_acc=0.438, val_loss=2.310, val_acc=0.371]


[I 2025-06-10 16:54:10,256] Trial 3 finished with value: 2.307535409927368 and parameters: {'conv1_out': 64, 'conv2_out': 96, 'conv3_out': 128, 'conv4_out': 0, 'kernel_size': 5, 'fc1_size': 512, 'fc2_size': 256, 'fc3_size': 0, 'dropout1': 0.210032093430855, 'dropout2': 0.44888417232129874, 'dropout3': 0.3357748630896292, 'learning_rate': 0.0004963560749658938}. Best is trial 2 with value: 2.0992777347564697.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 876 K  | train
1 | head   | Sequential | 13.0 M | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
13.9 M    Trainable params
0         Non-trainable params
13.9 M    Total params
55.471    Total estimated model params size (MB)
23        Modules in train mode
0        

Epoch 14: 100%|████| 750/750 [02:33<00:00,  4.88it/s, train_loss=2.460, train_acc=0.281, val_loss=2.200, val_acc=0.403]

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|████| 750/750 [02:33<00:00,  4.88it/s, train_loss=2.460, train_acc=0.281, val_loss=2.200, val_acc=0.403]


[I 2025-06-10 17:33:04,208] Trial 4 finished with value: 2.198068857192993 and parameters: {'conv1_out': 64, 'conv2_out': 96, 'conv3_out': 128, 'conv4_out': 128, 'kernel_size': 5, 'fc1_size': 512, 'fc2_size': 256, 'fc3_size': 0, 'dropout1': 0.2557415564172411, 'dropout2': 0.38070354576141086, 'dropout3': 0.2514728560483357, 'learning_rate': 0.00018401733020757555}. Best is trial 2 with value: 2.0992777347564697.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 466 K  | train
1 | head   | Sequential | 51.5 M | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
52.0 M    Trainable params
0         Non-trainable params
52.0 M    Total params
207.972   Total estimated model params size (MB)
20        Modules in train mode
0    

Epoch 2: 100%|█████| 750/750 [02:38<00:00,  4.73it/s, train_loss=2.780, train_acc=0.250, val_loss=2.960, val_acc=0.240]

[I 2025-06-10 17:41:32,312] Trial 5 pruned. Trial was pruned at epoch 2.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 279 K  | train
1 | head   | Sequential | 9.8 M  | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
10.1 M    Trainable params
0         Non-trainable params
10.1 M    Total params
40.238    Total estimated model params size (MB)
23        Modules in train mode
0         Modules in eval mode


Epoch 2: 100%|█████| 750/750 [05:43<00:00,  2.18it/s, train_loss=2.780, train_acc=0.250, val_loss=2.960, val_acc=0.240]
Epoch 5: 100%|█████| 750/750 [02:21<00:00,  5.29it/s, train_loss=3.260, train_acc=0.188, val_loss=2.550, val_acc=0.302]

[I 2025-06-10 17:56:50,807] Trial 6 pruned. Trial was pruned at epoch 5.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 773 K  | train
1 | head   | Sequential | 9.8 M  | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.216    Total estimated model params size (MB)
23        Modules in train mode
0         Modules in eval mode


Epoch 5: 100%|█████| 750/750 [05:23<00:00,  2.32it/s, train_loss=3.260, train_acc=0.188, val_loss=2.550, val_acc=0.302]
Epoch 2: 100%|████| 750/750 [02:30<00:00,  4.98it/s, train_loss=3.110, train_acc=0.0625, val_loss=2.870, val_acc=0.244]

[I 2025-06-10 18:05:14,155] Trial 7 pruned. Trial was pruned at epoch 2.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 876 K  | train
1 | head   | Sequential | 13.0 M | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
13.9 M    Trainable params
0         Non-trainable params
13.9 M    Total params
55.471    Total estimated model params size (MB)
23        Modules in train mode
0         Modules in eval mode


Epoch 2: 100%|████| 750/750 [05:43<00:00,  2.18it/s, train_loss=3.110, train_acc=0.0625, val_loss=2.870, val_acc=0.244]
Epoch 3: 100%|█████| 750/750 [02:32<00:00,  4.93it/s, train_loss=2.690, train_acc=0.188, val_loss=2.760, val_acc=0.268]

[I 2025-06-10 18:16:25,049] Trial 8 pruned. Trial was pruned at epoch 3.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 316 K  | train
1 | head   | Sequential | 13.0 M | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
13.3 M    Trainable params
0         Non-trainable params
13.3 M    Total params
53.230    Total estimated model params size (MB)
23        Modules in train mode
0         Modules in eval mode


Epoch 3: 100%|█████| 750/750 [05:30<00:00,  2.27it/s, train_loss=2.690, train_acc=0.188, val_loss=2.760, val_acc=0.268]
Epoch 2: 100%|█████| 750/750 [02:26<00:00,  5.14it/s, train_loss=3.060, train_acc=0.250, val_loss=2.860, val_acc=0.259]

[I 2025-06-10 18:24:33,966] Trial 9 pruned. Trial was pruned at epoch 2.


Best trial: {'conv1_out': 64, 'conv2_out': 96, 'conv3_out': 128, 'conv4_out': 128, 'kernel_size': 3, 'fc1_size': 512, 'fc2_size': 256, 'fc3_size': 0, 'dropout1': 0.29052582909653063, 'dropout2': 0.3342236306486829, 'dropout3': 0.34840841032862135, 'learning_rate': 0.0004632153364643866}


In [17]:
# Collect all trials into a list of dicts
rows = []
for trial in study.trials:
    row = {
        "trial_number": trial.number,
        "value": trial.value,
    }
    # Add all hyperparameters
    row.update(trial.params)
    
    # Add intermediate values (e.g., val_loss at each epoch)
    for step, intermediate in trial.intermediate_values.items():
        row[f"epoch_{step}_val_loss"] = intermediate
    
    rows.append(row)

# Convert to DataFrame and save
df = pd.DataFrame(rows)
df.to_csv("optuna_trials_log_2.csv", index=False)
print("Saved trial data to optuna_trials_log.csv")


Saved trial data to optuna_trials_log.csv
