In [1]:
import os
import random
from tqdm import tqdm
from datetime import datetime
import pandas as pd
import gc

In [2]:
import optuna
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from optuna.integration import PyTorchLightningPruningCallback
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, ProgressBar, LearningRateMonitor, Timer, StochasticWeightAveraging, TQDMProgressBar, Callback
from pytorch_lightning.callbacks.callback import Callback
from pytorch_lightning.profilers import SimpleProfiler, AdvancedProfiler
from pytorch_lightning.utilities import seed
from pytorch_lightning.tuner.tuning import Tuner
from pytorch_lightning import Trainer, seed_everything
from optuna.integration import PyTorchLightningPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from PIL import Image
import matplotlib.pyplot as plt

In [4]:
torch.__version__

'2.5.1'

In [5]:
pl.__version__

'2.5.1.post0'

In [6]:
# Setting dataset path
path = "E:\\mini_ImageNet\\archive\\"

In [7]:
#Using the previously calculated values (first notebook) of mean and std of the images in this dataset
mean = [0.4764, 0.4491, 0.4001]
std = [0.2264, 0.2224, 0.2212]

print("Mean:", mean)
print("Std:", std)

Mean: [0.4764, 0.4491, 0.4001]
Std: [0.2264, 0.2224, 0.2212]


### Image Transforms

In [8]:
transform=transforms.Compose([
        transforms.Resize(224),             # resize shortest side to 224 pixels
        transforms.CenterCrop(224),         # crop longest side to 224 pixels at center
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
    
        transforms.RandomRotation(20),      # rotate +/- 20 degrees
        transforms.RandomHorizontalFlip(p=0.25),  # flip 24% of images
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),  
        transforms.RandomAffine(degrees=0, translate=(0.2, 0.2), scale=(0.8, 1.2)),
        transforms.RandomApply([transforms.ColorJitter(brightness=0.2, contrast=0.2, 
                                                       saturation=0.2)], p=0.5), # jitters by +/- given value
        transforms.RandomApply([transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 0.7))], p=0.3)
    ])
#https://docs.pytorch.org/vision/main/transforms.html

In [9]:
#Getting class names from the folder names of images
class_names=sorted(os.listdir(path))
class_names = ['_'.join(c.split('_')[:-1]) for c in class_names]

### Data Module

In [10]:
class DataModule(pl.LightningDataModule):
    def __init__(self, root_dir, batch_size, transform):
        super().__init__()
        self.data_dir = root_dir
        self.batch_size = batch_size
        self.transform = transform
        self.num_w = 4
        
    def setup(self, stage=None):
        full_dataset = datasets.ImageFolder(root=self.data_dir, transform=self.transform)
        train_size = int(0.8 * len(full_dataset))
        val_size = int(0.2 * len(full_dataset))
        self.train_dataset, self.val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])
    
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, 
                         num_workers=self.num_w, persistent_workers=True)        
    
    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, 
                         num_workers=self.num_w, persistent_workers=True)        


### CNN Model

In [11]:
class CNNModel(pl.LightningModule):
    def __init__(self, trial, num_classes=50):
        super().__init__()
        self.save_hyperparameters()

        # Trial suggestions
        c1 = trial.suggest_categorical("conv1_out", [16, 32, 64])
        c2 = trial.suggest_categorical("conv2_out", [32, 64, 96])
        c3 = trial.suggest_categorical("conv3_out", [64, 128])
        c4 = trial.suggest_categorical("conv4_out", [64, 96, 128, 0])
        ksize = trial.suggest_categorical("kernel_size", [3, 5])

        fc1_size = trial.suggest_categorical("fc1_size", [256, 512])
        fc2_size = trial.suggest_categorical("fc2_size", [128, 256, 512, 0])
        fc3_size = trial.suggest_categorical("fc3_size", [64, 128, 256, 0])

        # Constraint 1: Avoid both conv3 and conv4 having 128 channels
        if c3 == 128 and c4 == 128:
            raise optuna.TrialPruned("Avoiding trial: conv3 and conv4 both 128")
        
        # Constraint 2: Avoid both fc1 and fc2 being 512
        if fc1_size == 512 and fc2_size == 512:
            raise optuna.TrialPruned("Avoiding trial: fc1 and fc2 both 512")

        # Constraint 3: Avoid fc2=0 and fc3>0
        if fc2_size == 0 and fc3_size>0:
            raise optuna.TrialPruned("Avoiding trial: fc2=0 and fc3>0")
        
        
        dropout1 = trial.suggest_float("dropout1", 0.2, 0.5)
        dropout2 = trial.suggest_float("dropout2", 0.2, 0.5)
        dropout3 = trial.suggest_float("dropout3", 0.2, 0.5)
        
        self.learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-3, log=True)
        
        act_fn = nn.LeakyReLU(0.05)
        
        # Layers
        conv_layers = [
            nn.Conv2d(3, c1, kernel_size=ksize, padding=ksize // 2),
            nn.BatchNorm2d(c1),
            act_fn,
            nn.MaxPool2d(2),

            nn.Conv2d(c1, c2, kernel_size=ksize, padding=ksize // 2),
            nn.BatchNorm2d(c2),
            act_fn,
            nn.MaxPool2d(2),

            nn.Conv2d(c2, c3, kernel_size=ksize, padding=ksize // 2),
            nn.BatchNorm2d(c3),
            act_fn,
            nn.MaxPool2d(2),
        ]
        
        # Optionally add conv4
        if c4 > 0:
            conv_layers.extend([
                nn.Conv2d(c3, c4, kernel_size=ksize, padding=ksize // 2),
                nn.BatchNorm2d(c4),
                act_fn,
                nn.MaxPool2d(2),
            ])
            conv_out = c4
        else:
            conv_out = c3
        
        # Add flatten after convs
        conv_layers.append(nn.Flatten())
        
        # Wrap as Sequential
        self.model = nn.Sequential(*conv_layers)
        
        
        # Use dummy input to calculate flattened output size
        with torch.no_grad():
            dummy_input = torch.zeros(1, 3, 224, 224)
            n_features = self.model(dummy_input).shape[1]
        
        layers = [
            nn.Linear(n_features, fc1_size),
            nn.BatchNorm1d(fc1_size),
            act_fn,
            nn.Dropout(dropout1),
        ]

        final_in = fc1_size

        if fc2_size > 0:
            layers.extend([
                nn.Linear(fc1_size, fc2_size),
                nn.BatchNorm1d(fc2_size),
                act_fn,
                nn.Dropout(dropout2)
            ])
            final_in = fc2_size

        if fc3_size > 0:
            layers.extend([
                nn.Linear(final_in, fc3_size),
                nn.BatchNorm1d(fc3_size),
                act_fn,
                nn.Dropout(dropout3)
            ])
            final_in = fc3_size

        self.head = nn.Sequential(*layers)
        self.output = nn.Linear(final_in, num_classes)
        
    
    def forward(self, x):
        x = self.model(x)
        x = self.head(x)
        return self.output(x)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", acc, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log("val_loss", loss, prog_bar=True, on_epoch=True)
        self.log("val_acc", acc, prog_bar=True, on_epoch=True)
        return {"val_loss": loss, "val_acc": acc}

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.learning_rate)

In [12]:
#Removing the Validation DataLoader progress bar between epochs
class MinimalProgressBar(TQDMProgressBar):
    def init_validation_tqdm(self):
        # Return a dummy tqdm with no visible output to disable validation progress bar
        # Set total=0 to avoid progress display
        return tqdm(disable=True)

    def init_test_tqdm(self):
        # Same for test dataloader
        return tqdm(disable=True)

In [13]:
class ClearCacheCallback(Callback):
    def on_train_epoch_end(self, trainer, pl_module):
        torch.cuda.empty_cache()
        gc.collect()

In [14]:
class FixedPruningCallback(PyTorchLightningPruningCallback, Callback):
    def on_validation_end(self, trainer, pl_module):
        # Only start pruning after 3 epochs
        if trainer.current_epoch < 2:
            return  # Skip pruning

        # Call the original pruning logic
        super().on_validation_end(trainer, pl_module)


### Objective Function for Hyperparameter Tuning

In [15]:
def objective(trial):
    try:
        model = CNNModel(trial)
        datamodule = DataModule(root_dir=path, batch_size=32, transform=transform)
        datamodule.setup()
        
        trainer = pl.Trainer(
            logger=False,
            max_epochs=15,
            enable_checkpointing=False,
            callbacks=[MinimalProgressBar(), 
                       FixedPruningCallback(trial, monitor="val_loss"),
                       ClearCacheCallback()],
        )
        
        trainer.fit(model, datamodule)
        return trainer.callback_metrics["val_loss"].item()
    
    except RuntimeError as e:
        if "DefaultCPUAllocator" in str(e) or "out of memory" in str(e):
            print(f"Trial {trial.number} failed due to memory error.")
            return float('inf')
        raise  e
    
    finally:
        if 'model' in locals():
            del model
        if 'trainer' in locals():
            del trainer
        
        gc.collect()
        torch.cuda.empty_cache()


In [16]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=25, timeout=54000)
print("Best trial:", study.best_trial.params)

[I 2025-06-09 23:45:39,309] A new study created in memory with name: no-name-598ea8c7-84fe-40b8-8b17-680ccdcccf04
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 566 K  | train
1 | head   | Sequential | 9.8 M  | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
10.4 M    Trainable params
0         Non-trainable params
10.4 M    Total params
41.652    Total estimated model params size (MB)
26        Modules in train mode
0         Modules in eval mode


Epoch 14: 100%|████| 750/750 [02:26<00:00,  5.10it/s, train_loss=1.950, train_acc=0.438, val_loss=2.270, val_acc=0.381]

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|████| 750/750 [02:27<00:00,  5.10it/s, train_loss=1.950, train_acc=0.438, val_loss=2.270, val_acc=0.381]


[I 2025-06-10 00:23:33,501] Trial 0 finished with value: 2.2656238079071045 and parameters: {'conv1_out': 32, 'conv2_out': 64, 'conv3_out': 128, 'conv4_out': 96, 'kernel_size': 5, 'fc1_size': 512, 'fc2_size': 256, 'fc3_size': 256, 'dropout1': 0.26199962483529415, 'dropout2': 0.2682733693525525, 'dropout3': 0.32604225068418363, 'learning_rate': 0.00011542742572824158}. Best is trial 0 with value: 2.2656238079071045.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 168 K  | train
1 | head   | Sequential | 51.4 M | train
2 | output | Linear     | 25.7 K | train
----------------------------------------------
51.6 M    Trainable params
0         Non-trainable params
51.6 M    Total params
206.304   Total estimated model params size (MB)
17        Modules in train mode
0 

Epoch 14: 100%|████| 750/750 [02:26<00:00,  5.13it/s, train_loss=2.570, train_acc=0.344, val_loss=2.280, val_acc=0.394]

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|████| 750/750 [02:26<00:00,  5.12it/s, train_loss=2.570, train_acc=0.344, val_loss=2.280, val_acc=0.394]


[I 2025-06-10 01:01:00,784] Trial 1 finished with value: 2.2806146144866943 and parameters: {'conv1_out': 64, 'conv2_out': 96, 'conv3_out': 128, 'conv4_out': 0, 'kernel_size': 3, 'fc1_size': 512, 'fc2_size': 0, 'fc3_size': 0, 'dropout1': 0.3065172188662013, 'dropout2': 0.22344681479126122, 'dropout3': 0.3720363418504191, 'learning_rate': 0.00021900524281016052}. Best is trial 0 with value: 2.2656238079071045.
[I 2025-06-10 01:01:00,934] Trial 2 pruned. Avoiding trial: fc2=0 and fc3>0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 312 K  | train
1 | head   | Sequential | 51.5 M | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
51.8 M    Trainable params
0         Non-trainable params
51.8 M    Total params
207.357   Tot

Epoch 14: 100%|████| 750/750 [02:33<00:00,  4.90it/s, train_loss=2.410, train_acc=0.406, val_loss=2.220, val_acc=0.410]

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|████| 750/750 [02:33<00:00,  4.89it/s, train_loss=2.410, train_acc=0.406, val_loss=2.220, val_acc=0.410]


[I 2025-06-10 01:39:55,678] Trial 3 finished with value: 2.2202320098876953 and parameters: {'conv1_out': 64, 'conv2_out': 64, 'conv3_out': 128, 'conv4_out': 0, 'kernel_size': 5, 'fc1_size': 512, 'fc2_size': 256, 'fc3_size': 0, 'dropout1': 0.23779195094934324, 'dropout2': 0.26330769965130246, 'dropout3': 0.21606656009558822, 'learning_rate': 0.0008472593038299467}. Best is trial 3 with value: 2.2202320098876953.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 76.1 K | train
1 | head   | Sequential | 3.5 M  | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.262    Total estimated model params size (MB)
26        Modules in train mode
0    

Epoch 14: 100%|████| 750/750 [02:34<00:00,  4.84it/s, train_loss=2.790, train_acc=0.281, val_loss=2.390, val_acc=0.354]

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|████| 750/750 [02:34<00:00,  4.84it/s, train_loss=2.790, train_acc=0.281, val_loss=2.390, val_acc=0.354]


[I 2025-06-10 02:19:29,935] Trial 4 finished with value: 2.3927574157714844 and parameters: {'conv1_out': 64, 'conv2_out': 32, 'conv3_out': 64, 'conv4_out': 64, 'kernel_size': 3, 'fc1_size': 256, 'fc2_size': 512, 'fc3_size': 256, 'dropout1': 0.3575261683718324, 'dropout2': 0.4811557721410208, 'dropout3': 0.46839249970500096, 'learning_rate': 0.00037689752416906574}. Best is trial 3 with value: 2.2202320098876953.
[I 2025-06-10 02:19:30,098] Trial 5 pruned. Avoiding trial: fc1 and fc2 both 512
[I 2025-06-10 02:19:30,223] Trial 6 pruned. Avoiding trial: fc2=0 and fc3>0
[I 2025-06-10 02:19:30,343] Trial 7 pruned. Avoiding trial: conv3 and conv4 both 128
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 168 K  | train
1 | head   | Sequential | 51.5 M | train
2 | output |

Epoch 14: 100%|████| 750/750 [02:30<00:00,  4.98it/s, train_loss=2.220, train_acc=0.281, val_loss=2.580, val_acc=0.307]

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|████| 750/750 [02:30<00:00,  4.97it/s, train_loss=2.220, train_acc=0.281, val_loss=2.580, val_acc=0.307]


[I 2025-06-10 02:58:01,268] Trial 8 finished with value: 2.5786826610565186 and parameters: {'conv1_out': 64, 'conv2_out': 96, 'conv3_out': 128, 'conv4_out': 0, 'kernel_size': 3, 'fc1_size': 512, 'fc2_size': 256, 'fc3_size': 128, 'dropout1': 0.2561433494543277, 'dropout2': 0.4511093232775636, 'dropout3': 0.3285082065819789, 'learning_rate': 0.00044749807171203085}. Best is trial 3 with value: 2.2202320098876953.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 210 K  | train
1 | head   | Sequential | 6.6 M  | train
2 | output | Linear     | 3.2 K  | train
----------------------------------------------
6.8 M     Trainable params
0         Non-trainable params
6.8 M     Total params
27.144    Total estimated model params size (MB)
26        Modules in train mode
0    

Epoch 2: 100%|█████| 750/750 [02:30<00:00,  4.97it/s, train_loss=3.480, train_acc=0.156, val_loss=3.250, val_acc=0.161]

[I 2025-06-10 03:06:08,901] Trial 9 pruned. Trial was pruned at epoch 2.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 116 K  | train
1 | head   | Sequential | 25.7 M | train
2 | output | Linear     | 6.5 K  | train
----------------------------------------------
25.8 M    Trainable params
0         Non-trainable params
25.8 M    Total params
103.390   Total estimated model params size (MB)
20        Modules in train mode
0         Modules in eval mode


Epoch 2: 100%|█████| 750/750 [05:37<00:00,  2.22it/s, train_loss=3.480, train_acc=0.156, val_loss=3.250, val_acc=0.161]
Epoch 4: 100%|█████| 750/750 [02:29<00:00,  5.02it/s, train_loss=2.330, train_acc=0.406, val_loss=2.820, val_acc=0.253]

[I 2025-06-10 03:19:34,992] Trial 10 pruned. Trial was pruned at epoch 4.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 566 K  | train
1 | head   | Sequential | 9.8 M  | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
10.4 M    Trainable params
0         Non-trainable params
10.4 M    Total params
41.652    Total estimated model params size (MB)
26        Modules in train mode
0         Modules in eval mode


Epoch 4: 100%|█████| 750/750 [05:29<00:00,  2.28it/s, train_loss=2.330, train_acc=0.406, val_loss=2.820, val_acc=0.253]
Epoch 2: 100%|████| 750/750 [02:28<00:00,  5.04it/s, train_loss=3.310, train_acc=0.0938, val_loss=3.050, val_acc=0.211]

[I 2025-06-10 03:27:53,298] Trial 11 pruned. Trial was pruned at epoch 2.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 566 K  | train
1 | head   | Sequential | 9.8 M  | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
10.3 M    Trainable params
0         Non-trainable params
10.3 M    Total params
41.386    Total estimated model params size (MB)
23        Modules in train mode
0         Modules in eval mode


Epoch 2: 100%|████| 750/750 [05:39<00:00,  2.21it/s, train_loss=3.310, train_acc=0.0938, val_loss=3.050, val_acc=0.211]
Epoch 14: 100%|████| 750/750 [02:30<00:00,  4.99it/s, train_loss=1.880, train_acc=0.500, val_loss=2.080, val_acc=0.432]

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|████| 750/750 [02:30<00:00,  4.98it/s, train_loss=1.880, train_acc=0.500, val_loss=2.080, val_acc=0.432]


[I 2025-06-10 04:06:36,860] Trial 12 finished with value: 2.0809056758880615 and parameters: {'conv1_out': 32, 'conv2_out': 64, 'conv3_out': 128, 'conv4_out': 96, 'kernel_size': 5, 'fc1_size': 512, 'fc2_size': 256, 'fc3_size': 0, 'dropout1': 0.3044326250278261, 'dropout2': 0.3024301614756779, 'dropout3': 0.2936390586560871, 'learning_rate': 0.0009792923216661608}. Best is trial 12 with value: 2.0809056758880615.
[I 2025-06-10 04:06:36,994] Trial 13 pruned. Avoiding trial: conv3 and conv4 both 128
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 232 K  | train
1 | head   | Sequential | 25.8 M | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
26.0 M    Trainable params
0         Non-trainable params
26.0 M    Total params


Epoch 2: 100%|█████| 750/750 [02:30<00:00,  4.99it/s, train_loss=3.090, train_acc=0.156, val_loss=2.960, val_acc=0.225]

[I 2025-06-10 04:14:40,599] Trial 14 pruned. Trial was pruned at epoch 2.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 773 K  | train
1 | head   | Sequential | 9.8 M  | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.216    Total estimated model params size (MB)
23        Modules in train mode
0         Modules in eval mode


Epoch 2: 100%|█████| 750/750 [05:33<00:00,  2.25it/s, train_loss=3.090, train_acc=0.156, val_loss=2.960, val_acc=0.225]
Epoch 14: 100%|████| 750/750 [02:29<00:00,  5.02it/s, train_loss=2.210, train_acc=0.406, val_loss=2.070, val_acc=0.435]

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|████| 750/750 [02:29<00:00,  5.02it/s, train_loss=2.210, train_acc=0.406, val_loss=2.070, val_acc=0.435]


[I 2025-06-10 04:52:57,670] Trial 15 finished with value: 2.0743117332458496 and parameters: {'conv1_out': 64, 'conv2_out': 96, 'conv3_out': 128, 'conv4_out': 96, 'kernel_size': 5, 'fc1_size': 512, 'fc2_size': 256, 'fc3_size': 0, 'dropout1': 0.29883094495102974, 'dropout2': 0.3061692504447138, 'dropout3': 0.2669174336821721, 'learning_rate': 0.0006189829762524606}. Best is trial 15 with value: 2.0743117332458496.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 694 K  | train
1 | head   | Sequential | 9.8 M  | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
10.5 M    Trainable params
0         Non-trainable params
10.5 M    Total params
41.899    Total estimated model params size (MB)
23        Modules in train mode
0   

Epoch 5: 100%|█████| 750/750 [02:25<00:00,  5.14it/s, train_loss=2.680, train_acc=0.219, val_loss=2.660, val_acc=0.288]

[I 2025-06-10 05:08:08,456] Trial 16 pruned. Trial was pruned at epoch 5.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 773 K  | train
1 | head   | Sequential | 9.7 M  | train
2 | output | Linear     | 6.5 K  | train
----------------------------------------------
10.5 M    Trainable params
0         Non-trainable params
10.5 M    Total params
41.927    Total estimated model params size (MB)
23        Modules in train mode
0         Modules in eval mode


Epoch 5: 100%|█████| 750/750 [05:26<00:00,  2.30it/s, train_loss=2.680, train_acc=0.219, val_loss=2.660, val_acc=0.288]
Epoch 2: 100%|█████| 750/750 [02:29<00:00,  5.02it/s, train_loss=2.700, train_acc=0.281, val_loss=2.970, val_acc=0.216]

[I 2025-06-10 05:16:27,627] Trial 17 pruned. Trial was pruned at epoch 2.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 694 K  | train
1 | head   | Sequential | 5.0 M  | train
2 | output | Linear     | 3.2 K  | train
----------------------------------------------
5.7 M     Trainable params
0         Non-trainable params
5.7 M     Total params
22.725    Total estimated model params size (MB)
26        Modules in train mode
0         Modules in eval mode


Epoch 2: 100%|█████| 750/750 [05:31<00:00,  2.26it/s, train_loss=2.700, train_acc=0.281, val_loss=2.970, val_acc=0.216]
Epoch 2: 100%|█████| 750/750 [02:29<00:00,  5.01it/s, train_loss=3.020, train_acc=0.188, val_loss=3.160, val_acc=0.173]

[I 2025-06-10 05:24:49,099] Trial 18 pruned. Trial was pruned at epoch 2.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 655 K  | train
1 | head   | Sequential | 9.8 M  | train
2 | output | Linear     | 6.5 K  | train
----------------------------------------------
10.5 M    Trainable params
0         Non-trainable params
10.5 M    Total params
41.847    Total estimated model params size (MB)
26        Modules in train mode
0         Modules in eval mode


Epoch 2: 100%|█████| 750/750 [05:29<00:00,  2.28it/s, train_loss=3.020, train_acc=0.188, val_loss=3.160, val_acc=0.173]
Epoch 2: 100%|████| 750/750 [02:27<00:00,  5.10it/s, train_loss=3.300, train_acc=0.0625, val_loss=3.040, val_acc=0.211]

[I 2025-06-10 05:33:02,387] Trial 19 pruned. Trial was pruned at epoch 2.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 773 K  | train
1 | head   | Sequential | 9.8 M  | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.216    Total estimated model params size (MB)
23        Modules in train mode
0         Modules in eval mode


Epoch 2: 100%|████| 750/750 [05:27<00:00,  2.29it/s, train_loss=3.300, train_acc=0.0625, val_loss=3.040, val_acc=0.211]
Epoch 3: 100%|█████| 750/750 [02:29<00:00,  5.02it/s, train_loss=2.960, train_acc=0.156, val_loss=2.850, val_acc=0.242]

[I 2025-06-10 05:43:50,594] Trial 20 pruned. Trial was pruned at epoch 3.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 312 K  | train
1 | head   | Sequential | 51.5 M | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
51.8 M    Trainable params
0         Non-trainable params
51.8 M    Total params
207.357   Total estimated model params size (MB)
20        Modules in train mode
0         Modules in eval mode


Epoch 3: 100%|█████| 750/750 [05:22<00:00,  2.33it/s, train_loss=2.960, train_acc=0.156, val_loss=2.850, val_acc=0.242]
Epoch 8: 100%|█████| 750/750 [02:22<00:00,  5.27it/s, train_loss=2.590, train_acc=0.375, val_loss=2.490, val_acc=0.330]

[I 2025-06-10 06:06:09,819] Trial 21 pruned. Trial was pruned at epoch 8.


Epoch 8: 100%|█████| 750/750 [02:23<00:00,  5.23it/s, train_loss=2.590, train_acc=0.375, val_loss=2.490, val_acc=0.330]


[I 2025-06-10 06:06:31,332] Trial 22 pruned. Avoiding trial: conv3 and conv4 both 128
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 159 K  | train
1 | head   | Sequential | 51.5 M | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
51.7 M    Trainable params
0         Non-trainable params
51.7 M    Total params
206.742   Total estimated model params size (MB)
20        Modules in train mode
0         Modules in eval mode


Epoch 3: 100%|█████| 750/750 [02:27<00:00,  5.09it/s, train_loss=2.580, train_acc=0.281, val_loss=2.870, val_acc=0.248]

[I 2025-06-10 06:16:55,523] Trial 23 pruned. Trial was pruned at epoch 3.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | model  | Sequential | 620 K  | train
1 | head   | Sequential | 9.8 M  | train
2 | output | Linear     | 12.8 K | train
----------------------------------------------
10.4 M    Trainable params
0         Non-trainable params
10.4 M    Total params
41.601    Total estimated model params size (MB)
23        Modules in train mode
0         Modules in eval mode


Epoch 3: 100%|█████| 750/750 [05:30<00:00,  2.27it/s, train_loss=2.580, train_acc=0.281, val_loss=2.870, val_acc=0.248]
Epoch 14: 100%|████| 750/750 [02:25<00:00,  5.15it/s, train_loss=1.800, train_acc=0.406, val_loss=2.080, val_acc=0.429]

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|████| 750/750 [02:25<00:00,  5.15it/s, train_loss=1.800, train_acc=0.406, val_loss=2.080, val_acc=0.429]


[I 2025-06-10 06:54:15,061] Trial 24 finished with value: 2.0770821571350098 and parameters: {'conv1_out': 64, 'conv2_out': 64, 'conv3_out': 128, 'conv4_out': 96, 'kernel_size': 5, 'fc1_size': 512, 'fc2_size': 256, 'fc3_size': 0, 'dropout1': 0.29230946924019013, 'dropout2': 0.2356787685263997, 'dropout3': 0.24057023643387784, 'learning_rate': 0.0007884227357931359}. Best is trial 15 with value: 2.0743117332458496.


Best trial: {'conv1_out': 64, 'conv2_out': 96, 'conv3_out': 128, 'conv4_out': 96, 'kernel_size': 5, 'fc1_size': 512, 'fc2_size': 256, 'fc3_size': 0, 'dropout1': 0.29883094495102974, 'dropout2': 0.3061692504447138, 'dropout3': 0.2669174336821721, 'learning_rate': 0.0006189829762524606}


In [17]:
# Collect all trials into a list of dicts
rows = []
for trial in study.trials:
    row = {
        "trial_number": trial.number,
        "value": trial.value,
    }
    # Add all hyperparameters
    row.update(trial.params)
    
    # Add intermediate values (e.g., val_loss at each epoch)
    for step, intermediate in trial.intermediate_values.items():
        row[f"epoch_{step}_val_loss"] = intermediate
    
    rows.append(row)

# Convert to DataFrame and save
df = pd.DataFrame(rows)
df.to_csv("optuna_trials_log.csv", index=False)
print("Saved trial data to optuna_trials_log.csv")


Saved trial data to optuna_trials_log.csv
