# Project : Optimization for Machine Learning

### Initial Setup

In [91]:
!pip install torch -q 
!pip install torchvision -q
!pip install optuna -q
!pip install matplotlib -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25h

In [85]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import random
import optuna
import optuna.visualization as vis
import time
import itertools
import os

In [3]:
# Code reproducibility
def set_seed(seed=1):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    set_seed(worker_seed)

In [4]:
BATCH_SIZE = 256
EPOCHS = 10
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Dataset Preprocessing and CNN benchmark model

In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [6]:
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

gen = torch.Generator()
gen.manual_seed(1)

test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# For reproductibility the train_loader will be initialized in the main function

100%|██████████| 170M/170M [00:03<00:00, 48.7MB/s] 


In [7]:
class CNN_model(nn.Module):
    def __init__(self):
        super(CNN_model, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 8 * 8, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

### Training and testing function

In [1]:
def train(model, optimizer, criterion, train_loader, epochs, trial=None):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")
        ### For the optuna study we make a report at each epoch and we prune trials with non-optimal hyperparameters
        if trial is not None:
            trial.report(avg_loss, step=epoch)
            if trial.should_prune():
                raise optuna.TrialPruned()
    return avg_loss

In [9]:
def test(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    return correct / total

### Comparison of optimizers

In [10]:
optimizers_list = {

    'SGD': lambda model, trial: optim.SGD(
        model.parameters(),
        lr=trial.suggest_float("lr", 1e-5, 1e-1, log=True),
        weight_decay=trial.suggest_float("weight_decay", 1e-5, 1e-2, log=True),
    ),

    'AdaGrad': lambda model, trial: optim.Adagrad(
        model.parameters(),
        lr=trial.suggest_float("lr", 1e-5, 1e-1, log=True),
        weight_decay=trial.suggest_float("weight_decay", 1e-5, 1e-2, log=True),
    ),

    'RMSprop': lambda model, trial: optim.RMSprop(
        model.parameters(),
        lr=trial.suggest_float("lr", 1e-5, 1e-1, log=True),
        alpha=trial.suggest_float("alpha", 0.8, 0.99),
        weight_decay=trial.suggest_float("weight_decay", 1e-5, 1e-2, log=True)
    ),

    'Adam': lambda model, trial: optim.Adam(
        model.parameters(),
        lr=trial.suggest_float("lr", 1e-5, 1e-1, log=True),
        betas=(
            trial.suggest_float("beta1", 0.85, 0.99),
            trial.suggest_float("beta2", 0.9, 0.999)
        ),
        weight_decay=trial.suggest_float("weight_decay", 1e-5, 1e-2, log=True)
    ),

    'AMSGrad': lambda model, trial: optim.Adam(
        model.parameters(),
        lr=trial.suggest_float("lr", 1e-5, 1e-1, log=True),
        betas=(
            trial.suggest_float("beta1", 0.85, 0.99),
            trial.suggest_float("beta2", 0.9, 0.999)
        ),
        weight_decay=trial.suggest_float("weight_decay", 1e-5, 1e-2, log=True),
        amsgrad=True
    ),

    'AdamW': lambda model, trial: optim.AdamW(
        model.parameters(),
        lr=trial.suggest_float("lr", 1e-5, 1e-1, log=True),
        betas=(
            trial.suggest_float("beta1", 0.85, 0.99),
            trial.suggest_float("beta2", 0.9, 0.999)
        ),
        weight_decay=trial.suggest_float("weight_decay", 1e-5, 1e-2, log=True)
    ),

    'Nadam': lambda model, trial: optim.NAdam(
        model.parameters(),
        lr=trial.suggest_float("lr", 1e-5, 1e-1, log=True),
        betas=(
            trial.suggest_float("beta1", 0.85, 0.99),
            trial.suggest_float("beta2", 0.9, 0.999)
        ),
        weight_decay=trial.suggest_float("weight_decay", 1e-5, 1e-2, log=True),
    ),

    'RAdam': lambda model, trial: optim.RAdam(
        model.parameters(),
        lr=trial.suggest_float("lr", 1e-5, 1e-1, log=True),
        betas=(
            trial.suggest_float("beta1", 0.85, 0.99),
            trial.suggest_float("beta2", 0.9, 0.999)
        ),
        weight_decay=trial.suggest_float("weight_decay", 1e-5, 1e-2, log=True)
    ),
}

In [None]:
def run_optuna_study(optimizer_name, n_trials):

    # The objective function that we seek to optimize in the optuna study is the accuracy
    def objective(trial):
        set_seed(1)
        model = CNN_model().to(DEVICE)
        optimizer = optimizers_list[optimizer_name](model, trial)
        criterion = torch.nn.CrossEntropyLoss()
        # For reproductibility we set the training
        gen = torch.Generator()
        gen.manual_seed(1)
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, generator=gen, worker_init_fn=seed_worker)
        train(model, optimizer, criterion, train_loader, epochs=EPOCHS, trial=trial)
        acc = test(model)
        return acc
        
    study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=1))
    start_time = time.time()
    
    study.optimize(objective, n_trials=n_trials)
    elapsed_time = time.time() - start_time

    print(f"\n Results for {optimizer_name} ")
    print("Best hyperparameters :", study.best_params)
    print(f"Best accuracy : {study.best_value:.2%}")
    print(f"Execution time : {elapsed_time:.2f} secondes")
    return study

In [12]:
SGD_study=run_optuna_study("SGD", 20)

[I 2025-06-13 01:16:28,733] A new study created in memory with name: no-name-407229f1-ffa9-4b76-b5f9-6f0b3e213509


Epoch 1/10 - Loss: 2.3019
Epoch 2/10 - Loss: 2.2967
Epoch 3/10 - Loss: 2.2917
Epoch 4/10 - Loss: 2.2865
Epoch 5/10 - Loss: 2.2811
Epoch 6/10 - Loss: 2.2754
Epoch 7/10 - Loss: 2.2694
Epoch 8/10 - Loss: 2.2628
Epoch 9/10 - Loss: 2.2557
Epoch 10/10 - Loss: 2.2477


[I 2025-06-13 01:19:01,333] Trial 0 finished with value: 0.2233 and parameters: {'lr': 0.00046568046379195655, 'weight_decay': 0.0014486833887239336}. Best is trial 0 with value: 0.2233.


Epoch 1/10 - Loss: 2.3045
Epoch 2/10 - Loss: 2.3044
Epoch 3/10 - Loss: 2.3043
Epoch 4/10 - Loss: 2.3042
Epoch 5/10 - Loss: 2.3040
Epoch 6/10 - Loss: 2.3039
Epoch 7/10 - Loss: 2.3038
Epoch 8/10 - Loss: 2.3037
Epoch 9/10 - Loss: 2.3036
Epoch 10/10 - Loss: 2.3035


[I 2025-06-13 01:21:33,526] Trial 1 finished with value: 0.1027 and parameters: {'lr': 1.0010539860510485e-05, 'weight_decay': 8.072307884499753e-05}. Best is trial 0 with value: 0.2233.


Epoch 1/10 - Loss: 2.3044
Epoch 2/10 - Loss: 2.3039
Epoch 3/10 - Loss: 2.3035
Epoch 4/10 - Loss: 2.3030
Epoch 5/10 - Loss: 2.3025
Epoch 6/10 - Loss: 2.3021
Epoch 7/10 - Loss: 2.3016
Epoch 8/10 - Loss: 2.3012
Epoch 9/10 - Loss: 2.3008
Epoch 10/10 - Loss: 2.3003


[I 2025-06-13 01:24:04,098] Trial 2 finished with value: 0.1258 and parameters: {'lr': 3.863879405158732e-05, 'weight_decay': 1.8924123965722247e-05}. Best is trial 0 with value: 0.2233.


Epoch 1/10 - Loss: 2.3043
Epoch 2/10 - Loss: 2.3036
Epoch 3/10 - Loss: 2.3030
Epoch 4/10 - Loss: 2.3023
Epoch 5/10 - Loss: 2.3016
Epoch 6/10 - Loss: 2.3010
Epoch 7/10 - Loss: 2.3004
Epoch 8/10 - Loss: 2.2998
Epoch 9/10 - Loss: 2.2992
Epoch 10/10 - Loss: 2.2986


[I 2025-06-13 01:26:34,517] Trial 3 finished with value: 0.1438 and parameters: {'lr': 5.5595654267125665e-05, 'weight_decay': 0.00010881335003635877}. Best is trial 0 with value: 0.2233.


Epoch 1/10 - Loss: 2.3024
Epoch 2/10 - Loss: 2.2980
Epoch 3/10 - Loss: 2.2938
Epoch 4/10 - Loss: 2.2896
Epoch 5/10 - Loss: 2.2852
Epoch 6/10 - Loss: 2.2807
Epoch 7/10 - Loss: 2.2760
Epoch 8/10 - Loss: 2.2710
Epoch 9/10 - Loss: 2.2657
Epoch 10/10 - Loss: 2.2599


[I 2025-06-13 01:29:05,481] Trial 4 finished with value: 0.2108 and parameters: {'lr': 0.00038642916535526494, 'weight_decay': 0.0004134759007834719}. Best is trial 0 with value: 0.2233.
[I 2025-06-13 01:29:20,313] Trial 5 pruned. 


Epoch 1/10 - Loss: 2.3019


[I 2025-06-13 01:29:35,123] Trial 6 pruned. 


Epoch 1/10 - Loss: 2.3042
Epoch 1/10 - Loss: 2.3045
Epoch 2/10 - Loss: 2.3044
Epoch 3/10 - Loss: 2.3042
Epoch 4/10 - Loss: 2.3040
Epoch 5/10 - Loss: 2.3039
Epoch 6/10 - Loss: 2.3037
Epoch 7/10 - Loss: 2.3036
Epoch 8/10 - Loss: 2.3034
Epoch 9/10 - Loss: 2.3033
Epoch 10/10 - Loss: 2.3031


[I 2025-06-13 01:32:05,888] Trial 7 finished with value: 0.1048 and parameters: {'lr': 1.2869165107815022e-05, 'weight_decay': 0.001026603003516424}. Best is trial 0 with value: 0.2233.
[I 2025-06-13 01:32:20,880] Trial 8 pruned. 


Epoch 1/10 - Loss: 2.3019
Epoch 1/10 - Loss: 2.3044
Epoch 2/10 - Loss: 2.3039
Epoch 3/10 - Loss: 2.3035
Epoch 4/10 - Loss: 2.3031
Epoch 5/10 - Loss: 2.3026
Epoch 6/10 - Loss: 2.3022
Epoch 7/10 - Loss: 2.3018
Epoch 8/10 - Loss: 2.3014
Epoch 9/10 - Loss: 2.3010
Epoch 10/10 - Loss: 2.3006


[I 2025-06-13 01:34:51,432] Trial 9 finished with value: 0.1243 and parameters: {'lr': 3.643743139281254e-05, 'weight_decay': 3.9292030026239486e-05}. Best is trial 0 with value: 0.2233.
[I 2025-06-13 01:35:06,200] Trial 10 pruned. 


Epoch 1/10 - Loss: 2.0048


[I 2025-06-13 01:35:20,993] Trial 11 pruned. 


Epoch 1/10 - Loss: 2.2819


[I 2025-06-13 01:35:35,881] Trial 12 pruned. 


Epoch 1/10 - Loss: 2.2913


[I 2025-06-13 01:35:50,651] Trial 13 pruned. 


Epoch 1/10 - Loss: 2.3030


[I 2025-06-13 01:36:05,410] Trial 14 pruned. 


Epoch 1/10 - Loss: 2.2286


[I 2025-06-13 01:36:20,144] Trial 15 pruned. 


Epoch 1/10 - Loss: 2.3033


[I 2025-06-13 01:36:34,920] Trial 16 pruned. 


Epoch 1/10 - Loss: 2.2968


[I 2025-06-13 01:36:49,747] Trial 17 pruned. 


Epoch 1/10 - Loss: 2.3037


[I 2025-06-13 01:37:04,642] Trial 18 pruned. 


Epoch 1/10 - Loss: 2.2453


[I 2025-06-13 01:37:19,444] Trial 19 pruned. 


Epoch 1/10 - Loss: 1.8518

 Results for SGD 
Best hyperparameters : {'lr': 0.00046568046379195655, 'weight_decay': 0.0014486833887239336}
Best accuracy : 22.33%
Execution time : 1250.71 secondes


In [13]:
AdaGrad_study=run_optuna_study("AdaGrad", 20)

[I 2025-06-13 01:37:19,451] A new study created in memory with name: no-name-519eacc6-5536-4b3b-94b4-3f4381407986


Epoch 1/10 - Loss: 1.7700
Epoch 2/10 - Loss: 1.5703
Epoch 3/10 - Loss: 1.4967
Epoch 4/10 - Loss: 1.4487
Epoch 5/10 - Loss: 1.4172
Epoch 6/10 - Loss: 1.3913
Epoch 7/10 - Loss: 1.3707
Epoch 8/10 - Loss: 1.3519
Epoch 9/10 - Loss: 1.3369
Epoch 10/10 - Loss: 1.3228


[I 2025-06-13 01:39:50,479] Trial 0 finished with value: 0.5263 and parameters: {'lr': 0.00046568046379195655, 'weight_decay': 0.0014486833887239336}. Best is trial 0 with value: 0.5263.


Epoch 1/10 - Loss: 2.2811
Epoch 2/10 - Loss: 2.2570
Epoch 3/10 - Loss: 2.2390
Epoch 4/10 - Loss: 2.2233
Epoch 5/10 - Loss: 2.2092
Epoch 6/10 - Loss: 2.1960
Epoch 7/10 - Loss: 2.1837
Epoch 8/10 - Loss: 2.1722
Epoch 9/10 - Loss: 2.1615
Epoch 10/10 - Loss: 2.1512


[I 2025-06-13 01:42:20,872] Trial 1 finished with value: 0.3051 and parameters: {'lr': 1.0010539860510485e-05, 'weight_decay': 8.072307884499753e-05}. Best is trial 0 with value: 0.5263.


Epoch 1/10 - Loss: 2.1906
Epoch 2/10 - Loss: 2.0681
Epoch 3/10 - Loss: 2.0017
Epoch 4/10 - Loss: 1.9576
Epoch 5/10 - Loss: 1.9251
Epoch 6/10 - Loss: 1.8992
Epoch 7/10 - Loss: 1.8779
Epoch 8/10 - Loss: 1.8593
Epoch 9/10 - Loss: 1.8437
Epoch 10/10 - Loss: 1.8289


[I 2025-06-13 01:44:50,598] Trial 2 finished with value: 0.3813 and parameters: {'lr': 3.863879405158732e-05, 'weight_decay': 1.8924123965722247e-05}. Best is trial 0 with value: 0.5263.


Epoch 1/10 - Loss: 2.1306
Epoch 2/10 - Loss: 1.9776
Epoch 3/10 - Loss: 1.9099
Epoch 4/10 - Loss: 1.8660
Epoch 5/10 - Loss: 1.8346
Epoch 6/10 - Loss: 1.8098
Epoch 7/10 - Loss: 1.7895
Epoch 8/10 - Loss: 1.7719
Epoch 9/10 - Loss: 1.7575
Epoch 10/10 - Loss: 1.7439


[I 2025-06-13 01:47:21,681] Trial 3 finished with value: 0.4028 and parameters: {'lr': 5.5595654267125665e-05, 'weight_decay': 0.00010881335003635877}. Best is trial 0 with value: 0.5263.


Epoch 1/10 - Loss: 1.7930
Epoch 2/10 - Loss: 1.6004
Epoch 3/10 - Loss: 1.5287
Epoch 4/10 - Loss: 1.4818
Epoch 5/10 - Loss: 1.4506
Epoch 6/10 - Loss: 1.4248
Epoch 7/10 - Loss: 1.4047
Epoch 8/10 - Loss: 1.3862
Epoch 9/10 - Loss: 1.3715
Epoch 10/10 - Loss: 1.3576


[I 2025-06-13 01:49:52,167] Trial 4 finished with value: 0.5155 and parameters: {'lr': 0.00038642916535526494, 'weight_decay': 0.0004134759007834719}. Best is trial 0 with value: 0.5263.
[I 2025-06-13 01:50:06,931] Trial 5 pruned. 


Epoch 1/10 - Loss: 1.7658


[I 2025-06-13 01:50:21,856] Trial 6 pruned. 


Epoch 1/10 - Loss: 2.1081
Epoch 1/10 - Loss: 2.2737
Epoch 2/10 - Loss: 2.2406
Epoch 3/10 - Loss: 2.2154
Epoch 4/10 - Loss: 2.1939
Epoch 5/10 - Loss: 2.1749
Epoch 6/10 - Loss: 2.1575
Epoch 7/10 - Loss: 2.1417
Epoch 8/10 - Loss: 2.1271
Epoch 9/10 - Loss: 2.1137
Epoch 10/10 - Loss: 2.1009


[I 2025-06-13 01:52:52,650] Trial 7 finished with value: 0.3119 and parameters: {'lr': 1.2869165107815022e-05, 'weight_decay': 0.001026603003516424}. Best is trial 0 with value: 0.5263.
[I 2025-06-13 01:53:07,334] Trial 8 pruned. 


Epoch 1/10 - Loss: 1.7674
Epoch 1/10 - Loss: 2.1982
Epoch 2/10 - Loss: 2.0811
Epoch 3/10 - Loss: 2.0150
Epoch 4/10 - Loss: 1.9709
Epoch 5/10 - Loss: 1.9386
Epoch 6/10 - Loss: 1.9129
Epoch 7/10 - Loss: 1.8917
Epoch 8/10 - Loss: 1.8732
Epoch 9/10 - Loss: 1.8576
Epoch 10/10 - Loss: 1.8429


[I 2025-06-13 01:55:37,998] Trial 9 finished with value: 0.378 and parameters: {'lr': 3.643743139281254e-05, 'weight_decay': 3.9292030026239486e-05}. Best is trial 0 with value: 0.5263.


Epoch 1/10 - Loss: 3.1625
Epoch 2/10 - Loss: 1.5857
Epoch 3/10 - Loss: 1.4925
Epoch 4/10 - Loss: 1.4363
Epoch 5/10 - Loss: 1.3958
Epoch 6/10 - Loss: 1.3496
Epoch 7/10 - Loss: 1.3264
Epoch 8/10 - Loss: 1.2926
Epoch 9/10 - Loss: 1.2723
Epoch 10/10 - Loss: 1.2503


[I 2025-06-13 01:58:08,680] Trial 10 finished with value: 0.5165 and parameters: {'lr': 0.03574529313620667, 'weight_decay': 0.008043846949906706}. Best is trial 0 with value: 0.5263.


Epoch 1/10 - Loss: 9.9781
Epoch 2/10 - Loss: 1.9989
Epoch 3/10 - Loss: 1.8111
Epoch 4/10 - Loss: 1.6979
Epoch 5/10 - Loss: 1.6384
Epoch 6/10 - Loss: 1.5935
Epoch 7/10 - Loss: 1.5700
Epoch 8/10 - Loss: 1.5434
Epoch 9/10 - Loss: 1.5300
Epoch 10/10 - Loss: 1.5133


[I 2025-06-13 02:00:38,999] Trial 11 finished with value: 0.3874 and parameters: {'lr': 0.07392326287467453, 'weight_decay': 0.00918266086818645}. Best is trial 0 with value: 0.5263.
[I 2025-06-13 02:00:53,740] Trial 12 pruned. 


Epoch 1/10 - Loss: 1.9769


[I 2025-06-13 02:01:08,481] Trial 13 pruned. 


Epoch 1/10 - Loss: 1.6311


[I 2025-06-13 02:01:23,331] Trial 14 pruned. 


Epoch 1/10 - Loss: 1.6724
Epoch 1/10 - Loss: 81.4793
Epoch 2/10 - Loss: 1.8026
Epoch 3/10 - Loss: 1.6458
Epoch 4/10 - Loss: 1.5631
Epoch 5/10 - Loss: 1.5051
Epoch 6/10 - Loss: 1.4579
Epoch 7/10 - Loss: 1.4281
Epoch 8/10 - Loss: 1.3798
Epoch 9/10 - Loss: 1.3562
Epoch 10/10 - Loss: 1.3239


[I 2025-06-13 02:03:53,781] Trial 15 finished with value: 0.4446 and parameters: {'lr': 0.0968027817138971, 'weight_decay': 0.001098599164943717}. Best is trial 0 with value: 0.5263.


Epoch 1/10 - Loss: 2.4635
Epoch 2/10 - Loss: 1.4466
Epoch 3/10 - Loss: 1.3288
Epoch 4/10 - Loss: 1.2500
Epoch 5/10 - Loss: 1.2000
Epoch 6/10 - Loss: 1.1617
Epoch 7/10 - Loss: 1.1318
Epoch 8/10 - Loss: 1.1008
Epoch 9/10 - Loss: 1.0766
Epoch 10/10 - Loss: 1.0493


[I 2025-06-13 02:06:23,718] Trial 16 finished with value: 0.6012 and parameters: {'lr': 0.02284910284964674, 'weight_decay': 0.0021752997681340367}. Best is trial 16 with value: 0.6012.
[I 2025-06-13 02:06:38,550] Trial 17 pruned. 


Epoch 1/10 - Loss: 1.5580


[I 2025-06-13 02:06:53,381] Trial 18 pruned. 


Epoch 1/10 - Loss: 1.7939


[I 2025-06-13 02:07:08,195] Trial 19 pruned. 


Epoch 1/10 - Loss: 1.6232

 Results for AdaGrad 
Best hyperparameters : {'lr': 0.02284910284964674, 'weight_decay': 0.0021752997681340367}
Best accuracy : 60.12%
Execution time : 1788.74 secondes


In [14]:
RMSprop_study=run_optuna_study("RMSprop", 20)

[I 2025-06-13 02:07:08,201] A new study created in memory with name: no-name-d62a04da-ef24-4818-a69e-f81a803e2b68


Epoch 1/10 - Loss: 1.5665
Epoch 2/10 - Loss: 1.2106
Epoch 3/10 - Loss: 1.0464
Epoch 4/10 - Loss: 0.9253
Epoch 5/10 - Loss: 0.8326
Epoch 6/10 - Loss: 0.7492
Epoch 7/10 - Loss: 0.6786
Epoch 8/10 - Loss: 0.6048
Epoch 9/10 - Loss: 0.5375
Epoch 10/10 - Loss: 0.4765


[I 2025-06-13 02:09:38,983] Trial 0 finished with value: 0.7225 and parameters: {'lr': 0.00046568046379195655, 'alpha': 0.93686165375401, 'weight_decay': 1.0007903854383775e-05}. Best is trial 0 with value: 0.7225.


Epoch 1/10 - Loss: 1.7251
Epoch 2/10 - Loss: 1.4396
Epoch 3/10 - Loss: 1.3174
Epoch 4/10 - Loss: 1.2273
Epoch 5/10 - Loss: 1.1576
Epoch 6/10 - Loss: 1.1000
Epoch 7/10 - Loss: 1.0519
Epoch 8/10 - Loss: 1.0059
Epoch 9/10 - Loss: 0.9676
Epoch 10/10 - Loss: 0.9315


[I 2025-06-13 02:12:07,898] Trial 1 finished with value: 0.6318 and parameters: {'lr': 0.0001619311091244073, 'alpha': 0.8278836192552516, 'weight_decay': 1.8924123965722247e-05}. Best is trial 0 with value: 0.7225.


Epoch 1/10 - Loss: 1.8442
Epoch 2/10 - Loss: 1.5813
Epoch 3/10 - Loss: 1.4710
Epoch 4/10 - Loss: 1.3947
Epoch 5/10 - Loss: 1.3391
Epoch 6/10 - Loss: 1.2929
Epoch 7/10 - Loss: 1.2549
Epoch 8/10 - Loss: 1.2201
Epoch 9/10 - Loss: 1.1912
Epoch 10/10 - Loss: 1.1643


[I 2025-06-13 02:14:37,543] Trial 2 finished with value: 0.5608 and parameters: {'lr': 5.5595654267125665e-05, 'alpha': 0.8656565381381791, 'weight_decay': 0.00015498955191377127}. Best is trial 0 with value: 0.7225.


Epoch 1/10 - Loss: 1.6622
Epoch 2/10 - Loss: 1.2369
Epoch 3/10 - Loss: 1.0738
Epoch 4/10 - Loss: 0.9688
Epoch 5/10 - Loss: 0.8924
Epoch 6/10 - Loss: 0.8200
Epoch 7/10 - Loss: 0.7657
Epoch 8/10 - Loss: 0.7133
Epoch 9/10 - Loss: 0.6588
Epoch 10/10 - Loss: 0.6163


[I 2025-06-13 02:17:08,354] Trial 3 finished with value: 0.6993 and parameters: {'lr': 0.0014297724879798392, 'alpha': 0.879646957736626, 'weight_decay': 0.0011367330868956233}. Best is trial 0 with value: 0.7225.


Epoch 1/10 - Loss: 1.7759
Epoch 2/10 - Loss: 1.5342
Epoch 3/10 - Loss: 1.4329
Epoch 4/10 - Loss: 1.3613
Epoch 5/10 - Loss: 1.3087
Epoch 6/10 - Loss: 1.2635
Epoch 7/10 - Loss: 1.2274
Epoch 8/10 - Loss: 1.1927
Epoch 9/10 - Loss: 1.1632
Epoch 10/10 - Loss: 1.1361


[I 2025-06-13 02:19:39,381] Trial 4 finished with value: 0.5685 and parameters: {'lr': 6.573686655138327e-05, 'alpha': 0.9668423129142796, 'weight_decay': 1.2082666253305453e-05}. Best is trial 0 with value: 0.7225.


Epoch 1/10 - Loss: 2.0241
Epoch 2/10 - Loss: 1.4438
Epoch 3/10 - Loss: 1.2929
Epoch 4/10 - Loss: 1.1933
Epoch 5/10 - Loss: 1.1079
Epoch 6/10 - Loss: 1.0533
Epoch 7/10 - Loss: 1.0022
Epoch 8/10 - Loss: 0.9599
Epoch 9/10 - Loss: 0.9439
Epoch 10/10 - Loss: 0.9032


[I 2025-06-13 02:22:09,650] Trial 5 finished with value: 0.61 and parameters: {'lr': 0.0048069548249615995, 'alpha': 0.8792879124497541, 'weight_decay': 0.00047431786810463216}. Best is trial 0 with value: 0.7225.


Epoch 1/10 - Loss: 1.9155
Epoch 2/10 - Loss: 1.6514
Epoch 3/10 - Loss: 1.5436
Epoch 4/10 - Loss: 1.4688
Epoch 5/10 - Loss: 1.4156
Epoch 6/10 - Loss: 1.3727
Epoch 7/10 - Loss: 1.3367
Epoch 8/10 - Loss: 1.3042
Epoch 9/10 - Loss: 1.2770
Epoch 10/10 - Loss: 1.2527


[I 2025-06-13 02:24:39,655] Trial 6 finished with value: 0.5322 and parameters: {'lr': 3.643743139281254e-05, 'alpha': 0.837639282926127, 'weight_decay': 0.0025248390936171007}. Best is trial 0 with value: 0.7225.


Epoch 1/10 - Loss: 1516.3142
Epoch 2/10 - Loss: 314.9837
Epoch 3/10 - Loss: 460.0846
Epoch 4/10 - Loss: 320.2748
Epoch 5/10 - Loss: 363.2704
Epoch 6/10 - Loss: 299.3923
Epoch 7/10 - Loss: 373.3758
Epoch 8/10 - Loss: 288.8888
Epoch 9/10 - Loss: 328.2672
Epoch 10/10 - Loss: 376.7276


[I 2025-06-13 02:27:09,380] Trial 7 finished with value: 0.0959 and parameters: {'lr': 0.0746528346269155, 'alpha': 0.8595505938502561, 'weight_decay': 0.0011938997108268642}. Best is trial 0 with value: 0.7225.


Epoch 1/10 - Loss: 1042.6793
Epoch 2/10 - Loss: 5.6304
Epoch 3/10 - Loss: 13.3415
Epoch 4/10 - Loss: 14.9540
Epoch 5/10 - Loss: 3.1558
Epoch 6/10 - Loss: 4.8222
Epoch 7/10 - Loss: 11.3660
Epoch 8/10 - Loss: 3.4211
Epoch 9/10 - Loss: 7.7326
Epoch 10/10 - Loss: 6.9922


[I 2025-06-13 02:29:39,432] Trial 8 finished with value: 0.0901 and parameters: {'lr': 0.032029975700290474, 'alpha': 0.969975266065731, 'weight_decay': 1.7994203765849863e-05}. Best is trial 0 with value: 0.7225.


Epoch 1/10 - Loss: 2.0925
Epoch 2/10 - Loss: 1.8355
Epoch 3/10 - Loss: 1.7239
Epoch 4/10 - Loss: 1.6507
Epoch 5/10 - Loss: 1.5968
Epoch 6/10 - Loss: 1.5514
Epoch 7/10 - Loss: 1.5141
Epoch 8/10 - Loss: 1.4808
Epoch 9/10 - Loss: 1.4542
Epoch 10/10 - Loss: 1.4304


[I 2025-06-13 02:32:09,538] Trial 9 finished with value: 0.4875 and parameters: {'lr': 1.432910723846784e-05, 'alpha': 0.8322677797172682, 'weight_decay': 0.004309506204261083}. Best is trial 0 with value: 0.7225.
[I 2025-06-13 02:32:24,567] Trial 10 pruned. 


Epoch 1/10 - Loss: 1.5617


[I 2025-06-13 02:32:39,455] Trial 11 pruned. 


Epoch 1/10 - Loss: 1.7258


[I 2025-06-13 02:32:54,185] Trial 12 pruned. 


Epoch 1/10 - Loss: 1.5599
Epoch 1/10 - Loss: 3.3564
Epoch 2/10 - Loss: 1.6939
Epoch 3/10 - Loss: 1.5009
Epoch 4/10 - Loss: 1.4010
Epoch 5/10 - Loss: 1.3498
Epoch 6/10 - Loss: 1.2928
Epoch 7/10 - Loss: 1.2923
Epoch 8/10 - Loss: 1.2399
Epoch 9/10 - Loss: 1.2284
Epoch 10/10 - Loss: 1.1911


[I 2025-06-13 02:35:23,579] Trial 13 finished with value: 0.5429 and parameters: {'lr': 0.006899083042418461, 'alpha': 0.9401932197827124, 'weight_decay': 0.0017887561505059819}. Best is trial 0 with value: 0.7225.
[I 2025-06-13 02:35:38,371] Trial 14 pruned. 


Epoch 1/10 - Loss: 1.6416


[I 2025-06-13 02:35:53,186] Trial 15 pruned. 


Epoch 1/10 - Loss: 1.7903
Epoch 1/10 - Loss: 6.3631
Epoch 2/10 - Loss: 1.6944
Epoch 3/10 - Loss: 1.4087
Epoch 4/10 - Loss: 1.2253
Epoch 5/10 - Loss: 1.1147
Epoch 6/10 - Loss: 1.0359
Epoch 7/10 - Loss: 0.9599
Epoch 8/10 - Loss: 0.9127
Epoch 9/10 - Loss: 0.8366
Epoch 10/10 - Loss: 0.7894


[I 2025-06-13 02:38:25,123] Trial 16 finished with value: 0.6206 and parameters: {'lr': 0.009689400090737979, 'alpha': 0.9448383992692024, 'weight_decay': 3.98626849678047e-05}. Best is trial 0 with value: 0.7225.
[I 2025-06-13 02:38:40,218] Trial 17 pruned. 


Epoch 1/10 - Loss: 1.6531


[I 2025-06-13 02:38:55,066] Trial 18 pruned. 


Epoch 1/10 - Loss: 1.7671
Epoch 1/10 - Loss: 2.0666
Epoch 2/10 - Loss: 1.5696
Epoch 3/10 - Loss: 1.4391
Epoch 4/10 - Loss: 1.3664
Epoch 5/10 - Loss: 1.3365
Epoch 6/10 - Loss: 1.3026
Epoch 7/10 - Loss: 1.2799
Epoch 8/10 - Loss: 1.2595
Epoch 9/10 - Loss: 1.2486
Epoch 10/10 - Loss: 1.2198


[I 2025-06-13 02:41:26,363] Trial 19 finished with value: 0.467 and parameters: {'lr': 0.0033533627242136807, 'alpha': 0.948356002096577, 'weight_decay': 0.004810170043087906}. Best is trial 0 with value: 0.7225.



 Results for RMSprop 
Best hyperparameters : {'lr': 0.00046568046379195655, 'alpha': 0.93686165375401, 'weight_decay': 1.0007903854383775e-05}
Best accuracy : 72.25%
Execution time : 2058.16 secondes


In [15]:
Adam_study=run_optuna_study("Adam", 20)

[I 2025-06-13 02:41:26,369] A new study created in memory with name: no-name-c56bf2b0-94a9-4815-a638-88ee66d9f588


Epoch 1/10 - Loss: 1.5251
Epoch 2/10 - Loss: 1.1734
Epoch 3/10 - Loss: 1.0173
Epoch 4/10 - Loss: 0.9055
Epoch 5/10 - Loss: 0.8234
Epoch 6/10 - Loss: 0.7478
Epoch 7/10 - Loss: 0.6769
Epoch 8/10 - Loss: 0.6056
Epoch 9/10 - Loss: 0.5424
Epoch 10/10 - Loss: 0.4802


[I 2025-06-13 02:43:56,434] Trial 0 finished with value: 0.7264 and parameters: {'lr': 0.00046568046379195655, 'beta1': 0.9508454290819022, 'beta2': 0.9000113231069171, 'weight_decay': 8.072307884499753e-05}. Best is trial 0 with value: 0.7264.


Epoch 1/10 - Loss: 1.9210
Epoch 2/10 - Loss: 1.6318
Epoch 3/10 - Loss: 1.5178
Epoch 4/10 - Loss: 1.4392
Epoch 5/10 - Loss: 1.3856
Epoch 6/10 - Loss: 1.3372
Epoch 7/10 - Loss: 1.2991
Epoch 8/10 - Loss: 1.2664
Epoch 9/10 - Loss: 1.2371
Epoch 10/10 - Loss: 1.2086


[I 2025-06-13 02:46:26,622] Trial 1 finished with value: 0.5691 and parameters: {'lr': 3.863879405158732e-05, 'beta1': 0.8629274032676316, 'beta2': 0.9184397609263895, 'weight_decay': 0.00010881335003635877}. Best is trial 0 with value: 0.7264.


Epoch 1/10 - Loss: 1.5555
Epoch 2/10 - Loss: 1.2248
Epoch 3/10 - Loss: 1.0981
Epoch 4/10 - Loss: 1.0032
Epoch 5/10 - Loss: 0.9342
Epoch 6/10 - Loss: 0.8656
Epoch 7/10 - Loss: 0.8175
Epoch 8/10 - Loss: 0.7742
Epoch 9/10 - Loss: 0.7256
Epoch 10/10 - Loss: 0.6805


[I 2025-06-13 02:48:57,309] Trial 2 finished with value: 0.7079 and parameters: {'lr': 0.00038642916535526494, 'beta1': 0.92543434276047, 'beta2': 0.9415002569259262, 'weight_decay': 0.0011367330868956233}. Best is trial 0 with value: 0.7264.


Epoch 1/10 - Loss: 1.8771
Epoch 2/10 - Loss: 1.5706
Epoch 3/10 - Loss: 1.4446
Epoch 4/10 - Loss: 1.3577
Epoch 5/10 - Loss: 1.2998
Epoch 6/10 - Loss: 1.2451
Epoch 7/10 - Loss: 1.2046
Epoch 8/10 - Loss: 1.1717
Epoch 9/10 - Loss: 1.1411
Epoch 10/10 - Loss: 1.1102


[I 2025-06-13 02:51:27,703] Trial 3 finished with value: 0.5977 and parameters: {'lr': 6.573686655138327e-05, 'beta1': 0.9729364410947323, 'beta2': 0.9027113717265947, 'weight_decay': 0.001026603003516424}. Best is trial 0 with value: 0.7264.


Epoch 1/10 - Loss: 1.5206
Epoch 2/10 - Loss: 1.1700
Epoch 3/10 - Loss: 1.0235
Epoch 4/10 - Loss: 0.9102
Epoch 5/10 - Loss: 0.8276
Epoch 6/10 - Loss: 0.7472
Epoch 7/10 - Loss: 0.6824
Epoch 8/10 - Loss: 0.6221
Epoch 9/10 - Loss: 0.5529
Epoch 10/10 - Loss: 0.4866


[I 2025-06-13 02:53:57,862] Trial 4 finished with value: 0.7204 and parameters: {'lr': 0.00046689498537088925, 'beta1': 0.9282165759824053, 'beta2': 0.9138983069209282, 'weight_decay': 3.9292030026239486e-05}. Best is trial 0 with value: 0.7264.


Epoch 1/10 - Loss: 2.3632
Epoch 2/10 - Loss: 4.0627
Epoch 3/10 - Loss: 4.1658
Epoch 4/10 - Loss: 9.9100
Epoch 5/10 - Loss: 20.3032
Epoch 6/10 - Loss: 21.2937
Epoch 7/10 - Loss: 289.0522
Epoch 8/10 - Loss: 505.1177
Epoch 9/10 - Loss: 181.4046
Epoch 10/10 - Loss: 428.6608


[I 2025-06-13 02:56:28,259] Trial 5 finished with value: 0.1 and parameters: {'lr': 0.015957993164212966, 'beta1': 0.9855566206007156, 'beta2': 0.931028993637765, 'weight_decay': 0.0011938997108268642}. Best is trial 0 with value: 0.7264.


Epoch 1/10 - Loss: 5.2529
Epoch 2/10 - Loss: 31.7799
Epoch 3/10 - Loss: 21.0025
Epoch 4/10 - Loss: 20.9307
Epoch 5/10 - Loss: 6.0482
Epoch 6/10 - Loss: 4.6108
Epoch 7/10 - Loss: 10.3891
Epoch 8/10 - Loss: 3.1590
Epoch 9/10 - Loss: 5.0684
Epoch 10/10 - Loss: 17.4122


[I 2025-06-13 02:58:58,649] Trial 6 finished with value: 0.1001 and parameters: {'lr': 0.032029975700290474, 'beta1': 0.9752449328905386, 'beta2': 0.908419376925608, 'weight_decay': 1.3096774493777596e-05}. Best is trial 0 with value: 0.7264.


Epoch 1/10 - Loss: 1.9236
Epoch 2/10 - Loss: 1.6312
Epoch 3/10 - Loss: 1.5024
Epoch 4/10 - Loss: 1.4189
Epoch 5/10 - Loss: 1.3586
Epoch 6/10 - Loss: 1.3091
Epoch 7/10 - Loss: 1.2647
Epoch 8/10 - Loss: 1.2328
Epoch 9/10 - Loss: 1.1987
Epoch 10/10 - Loss: 1.1714


[I 2025-06-13 03:01:29,880] Trial 7 finished with value: 0.5762 and parameters: {'lr': 4.7788310658576985e-05, 'beta1': 0.9729399504801178, 'beta2': 0.909736336549472, 'weight_decay': 0.0001833677157890362}. Best is trial 0 with value: 0.7264.


Epoch 1/10 - Loss: 27.3436
Epoch 2/10 - Loss: 3.3177
Epoch 3/10 - Loss: 2.6496
Epoch 4/10 - Loss: 2.3754
Epoch 5/10 - Loss: 2.5201
Epoch 6/10 - Loss: 3.3143
Epoch 7/10 - Loss: 5.5829
Epoch 8/10 - Loss: 4.5493
Epoch 9/10 - Loss: 15.6340
Epoch 10/10 - Loss: 2.8652


[I 2025-06-13 03:04:01,394] Trial 8 finished with value: 0.1 and parameters: {'lr': 0.06785129181994787, 'beta1': 0.9246431398962224, 'beta2': 0.9684958342810969, 'weight_decay': 8.841926348917726e-05}. Best is trial 0 with value: 0.7264.
[I 2025-06-13 03:04:16,111] Trial 9 pruned. 


Epoch 1/10 - Loss: 1.6050


[I 2025-06-13 03:04:31,052] Trial 10 pruned. 


Epoch 1/10 - Loss: 1.5142


[I 2025-06-13 03:04:45,845] Trial 11 pruned. 


Epoch 1/10 - Loss: 1.5381


[I 2025-06-13 03:05:00,832] Trial 12 pruned. 


Epoch 1/10 - Loss: 1.5818


[I 2025-06-13 03:05:15,618] Trial 13 pruned. 


Epoch 1/10 - Loss: 1.4170
Epoch 1/10 - Loss: 2.1362
Epoch 2/10 - Loss: 1.8804
Epoch 3/10 - Loss: 1.7599
Epoch 4/10 - Loss: 1.6845
Epoch 5/10 - Loss: 1.6291
Epoch 6/10 - Loss: 1.5829
Epoch 7/10 - Loss: 1.5438
Epoch 8/10 - Loss: 1.5105
Epoch 9/10 - Loss: 1.4810
Epoch 10/10 - Loss: 1.4545


[I 2025-06-13 03:07:47,660] Trial 14 finished with value: 0.4912 and parameters: {'lr': 1.1472979063615213e-05, 'beta1': 0.8942510984728449, 'beta2': 0.9432130318618027, 'weight_decay': 0.0003160859989111197}. Best is trial 0 with value: 0.7264.
[I 2025-06-13 03:08:02,794] Trial 15 pruned. 


Epoch 1/10 - Loss: 1.6987


[I 2025-06-13 03:08:17,868] Trial 16 pruned. 


Epoch 1/10 - Loss: 1.6151


[I 2025-06-13 03:08:32,919] Trial 17 pruned. 


Epoch 1/10 - Loss: 1.3971


[I 2025-06-13 03:08:47,923] Trial 18 pruned. 


Epoch 1/10 - Loss: 1.4751


[I 2025-06-13 03:09:02,839] Trial 19 pruned. 


Epoch 1/10 - Loss: 1.7038

 Results for Adam 
Best hyperparameters : {'lr': 0.00046568046379195655, 'beta1': 0.9508454290819022, 'beta2': 0.9000113231069171, 'weight_decay': 8.072307884499753e-05}
Best accuracy : 72.64%
Execution time : 1656.47 secondes


In [16]:
AMSGrad_study=run_optuna_study("AMSGrad", 20)

[I 2025-06-13 03:09:02,846] A new study created in memory with name: no-name-68987d89-4ff4-4106-96a6-299e970364e3


Epoch 1/10 - Loss: 1.5494
Epoch 2/10 - Loss: 1.2275
Epoch 3/10 - Loss: 1.1024
Epoch 4/10 - Loss: 1.0125
Epoch 5/10 - Loss: 0.9496
Epoch 6/10 - Loss: 0.8919
Epoch 7/10 - Loss: 0.8476
Epoch 8/10 - Loss: 0.7977
Epoch 9/10 - Loss: 0.7589
Epoch 10/10 - Loss: 0.7225


[I 2025-06-13 03:11:34,315] Trial 0 finished with value: 0.7034 and parameters: {'lr': 0.00046568046379195655, 'beta1': 0.9508454290819022, 'beta2': 0.9000113231069171, 'weight_decay': 8.072307884499753e-05}. Best is trial 0 with value: 0.7034.


Epoch 1/10 - Loss: 1.9325
Epoch 2/10 - Loss: 1.6561
Epoch 3/10 - Loss: 1.5505
Epoch 4/10 - Loss: 1.4784
Epoch 5/10 - Loss: 1.4284
Epoch 6/10 - Loss: 1.3853
Epoch 7/10 - Loss: 1.3521
Epoch 8/10 - Loss: 1.3224
Epoch 9/10 - Loss: 1.2963
Epoch 10/10 - Loss: 1.2714


[I 2025-06-13 03:14:04,616] Trial 1 finished with value: 0.5525 and parameters: {'lr': 3.863879405158732e-05, 'beta1': 0.8629274032676316, 'beta2': 0.9184397609263895, 'weight_decay': 0.00010881335003635877}. Best is trial 0 with value: 0.7034.


Epoch 1/10 - Loss: 1.5642
Epoch 2/10 - Loss: 1.2550
Epoch 3/10 - Loss: 1.1432
Epoch 4/10 - Loss: 1.0562
Epoch 5/10 - Loss: 0.9962
Epoch 6/10 - Loss: 0.9395
Epoch 7/10 - Loss: 0.8975
Epoch 8/10 - Loss: 0.8643
Epoch 9/10 - Loss: 0.8249
Epoch 10/10 - Loss: 0.7891


[I 2025-06-13 03:16:35,071] Trial 2 finished with value: 0.6841 and parameters: {'lr': 0.00038642916535526494, 'beta1': 0.92543434276047, 'beta2': 0.9415002569259262, 'weight_decay': 0.0011367330868956233}. Best is trial 0 with value: 0.7034.


Epoch 1/10 - Loss: 1.8866
Epoch 2/10 - Loss: 1.6057
Epoch 3/10 - Loss: 1.4888
Epoch 4/10 - Loss: 1.4139
Epoch 5/10 - Loss: 1.3607
Epoch 6/10 - Loss: 1.3174
Epoch 7/10 - Loss: 1.2804
Epoch 8/10 - Loss: 1.2523
Epoch 9/10 - Loss: 1.2240
Epoch 10/10 - Loss: 1.1999


[I 2025-06-13 03:19:06,465] Trial 3 finished with value: 0.5724 and parameters: {'lr': 6.573686655138327e-05, 'beta1': 0.9729364410947323, 'beta2': 0.9027113717265947, 'weight_decay': 0.001026603003516424}. Best is trial 0 with value: 0.7034.


Epoch 1/10 - Loss: 1.5345
Epoch 2/10 - Loss: 1.2181
Epoch 3/10 - Loss: 1.0967
Epoch 4/10 - Loss: 0.9994
Epoch 5/10 - Loss: 0.9355
Epoch 6/10 - Loss: 0.8716
Epoch 7/10 - Loss: 0.8235
Epoch 8/10 - Loss: 0.7866
Epoch 9/10 - Loss: 0.7373
Epoch 10/10 - Loss: 0.6962


[I 2025-06-13 03:21:37,697] Trial 4 finished with value: 0.6994 and parameters: {'lr': 0.00046689498537088925, 'beta1': 0.9282165759824053, 'beta2': 0.9138983069209282, 'weight_decay': 3.9292030026239486e-05}. Best is trial 0 with value: 0.7034.


Epoch 1/10 - Loss: 2.1131
Epoch 2/10 - Loss: 1.4254
Epoch 3/10 - Loss: 1.3144
Epoch 4/10 - Loss: 1.2592
Epoch 5/10 - Loss: 1.1866
Epoch 6/10 - Loss: 1.1473
Epoch 7/10 - Loss: 1.1165
Epoch 8/10 - Loss: 1.0891
Epoch 9/10 - Loss: 1.0607
Epoch 10/10 - Loss: 1.0231


[I 2025-06-13 03:24:08,238] Trial 5 finished with value: 0.6155 and parameters: {'lr': 0.015957993164212966, 'beta1': 0.9855566206007156, 'beta2': 0.931028993637765, 'weight_decay': 0.0011938997108268642}. Best is trial 0 with value: 0.7034.


Epoch 1/10 - Loss: 4.4600
Epoch 2/10 - Loss: 2.1964
Epoch 3/10 - Loss: 2.0080
Epoch 4/10 - Loss: 1.8978
Epoch 5/10 - Loss: 1.8265
Epoch 6/10 - Loss: 1.7580
Epoch 7/10 - Loss: 1.7693
Epoch 8/10 - Loss: 1.7177
Epoch 9/10 - Loss: 1.7403
Epoch 10/10 - Loss: 1.6931


[I 2025-06-13 03:26:38,428] Trial 6 finished with value: 0.3805 and parameters: {'lr': 0.032029975700290474, 'beta1': 0.9752449328905386, 'beta2': 0.908419376925608, 'weight_decay': 1.3096774493777596e-05}. Best is trial 0 with value: 0.7034.


Epoch 1/10 - Loss: 1.9337
Epoch 2/10 - Loss: 1.6589
Epoch 3/10 - Loss: 1.5450
Epoch 4/10 - Loss: 1.4697
Epoch 5/10 - Loss: 1.4152
Epoch 6/10 - Loss: 1.3728
Epoch 7/10 - Loss: 1.3348
Epoch 8/10 - Loss: 1.3061
Epoch 9/10 - Loss: 1.2771
Epoch 10/10 - Loss: 1.2537


[I 2025-06-13 03:29:09,218] Trial 7 finished with value: 0.5491 and parameters: {'lr': 4.7788310658576985e-05, 'beta1': 0.9729399504801178, 'beta2': 0.909736336549472, 'weight_decay': 0.0001833677157890362}. Best is trial 0 with value: 0.7034.


Epoch 1/10 - Loss: 26.6806
Epoch 2/10 - Loss: 2.3036
Epoch 3/10 - Loss: 2.3034
Epoch 4/10 - Loss: 2.3034
Epoch 5/10 - Loss: 2.3032
Epoch 6/10 - Loss: 2.3034
Epoch 7/10 - Loss: 2.3034
Epoch 8/10 - Loss: 2.3035
Epoch 9/10 - Loss: 2.3034
Epoch 10/10 - Loss: 2.3033


[I 2025-06-13 03:31:39,926] Trial 8 finished with value: 0.1 and parameters: {'lr': 0.06785129181994787, 'beta1': 0.9246431398962224, 'beta2': 0.9684958342810969, 'weight_decay': 8.841926348917726e-05}. Best is trial 0 with value: 0.7034.
[I 2025-06-13 03:31:54,875] Trial 9 pruned. 


Epoch 1/10 - Loss: 1.6325


[I 2025-06-13 03:32:10,037] Trial 10 pruned. 


Epoch 1/10 - Loss: 1.5139


[I 2025-06-13 03:32:24,908] Trial 11 pruned. 


Epoch 1/10 - Loss: 1.5531


[I 2025-06-13 03:32:39,871] Trial 12 pruned. 


Epoch 1/10 - Loss: 1.5864


[I 2025-06-13 03:32:54,918] Trial 13 pruned. 


Epoch 1/10 - Loss: 1.4294
Epoch 1/10 - Loss: 2.1435
Epoch 2/10 - Loss: 1.9005
Epoch 3/10 - Loss: 1.7848
Epoch 4/10 - Loss: 1.7120
Epoch 5/10 - Loss: 1.6589
Epoch 6/10 - Loss: 1.6160
Epoch 7/10 - Loss: 1.5796
Epoch 8/10 - Loss: 1.5484
Epoch 9/10 - Loss: 1.5211
Epoch 10/10 - Loss: 1.4965


[I 2025-06-13 03:35:26,405] Trial 14 finished with value: 0.4786 and parameters: {'lr': 1.1472979063615213e-05, 'beta1': 0.8942510984728449, 'beta2': 0.9432130318618027, 'weight_decay': 0.0003160859989111197}. Best is trial 0 with value: 0.7034.
[I 2025-06-13 03:35:41,229] Trial 15 pruned. 


Epoch 1/10 - Loss: 1.7014


[I 2025-06-13 03:35:56,075] Trial 16 pruned. 


Epoch 1/10 - Loss: 1.6449


[I 2025-06-13 03:36:10,980] Trial 17 pruned. 


Epoch 1/10 - Loss: 1.4122


[I 2025-06-13 03:36:25,891] Trial 18 pruned. 


Epoch 1/10 - Loss: 1.4816


[I 2025-06-13 03:36:40,819] Trial 19 pruned. 


Epoch 1/10 - Loss: 1.7153

 Results for AMSGrad 
Best hyperparameters : {'lr': 0.00046568046379195655, 'beta1': 0.9508454290819022, 'beta2': 0.9000113231069171, 'weight_decay': 8.072307884499753e-05}
Best accuracy : 70.34%
Execution time : 1657.97 secondes


In [17]:
AdamW_study=run_optuna_study("AdamW", 20)

[I 2025-06-13 03:36:40,826] A new study created in memory with name: no-name-6a5e4b02-45d9-4af7-967b-b248d5977840


Epoch 1/10 - Loss: 1.5271
Epoch 2/10 - Loss: 1.1728
Epoch 3/10 - Loss: 1.0220
Epoch 4/10 - Loss: 0.9208
Epoch 5/10 - Loss: 0.8368
Epoch 6/10 - Loss: 0.7640
Epoch 7/10 - Loss: 0.6949
Epoch 8/10 - Loss: 0.6287
Epoch 9/10 - Loss: 0.5720
Epoch 10/10 - Loss: 0.5134


[I 2025-06-13 03:39:11,569] Trial 0 finished with value: 0.7223 and parameters: {'lr': 0.00046568046379195655, 'beta1': 0.9508454290819022, 'beta2': 0.9000113231069171, 'weight_decay': 8.072307884499753e-05}. Best is trial 0 with value: 0.7223.


Epoch 1/10 - Loss: 1.9214
Epoch 2/10 - Loss: 1.6321
Epoch 3/10 - Loss: 1.5182
Epoch 4/10 - Loss: 1.4407
Epoch 5/10 - Loss: 1.3880
Epoch 6/10 - Loss: 1.3407
Epoch 7/10 - Loss: 1.3031
Epoch 8/10 - Loss: 1.2712
Epoch 9/10 - Loss: 1.2425
Epoch 10/10 - Loss: 1.2148


[I 2025-06-13 03:41:41,453] Trial 1 finished with value: 0.566 and parameters: {'lr': 3.863879405158732e-05, 'beta1': 0.8629274032676316, 'beta2': 0.9184397609263895, 'weight_decay': 0.00010881335003635877}. Best is trial 0 with value: 0.7223.


Epoch 1/10 - Loss: 1.5485
Epoch 2/10 - Loss: 1.2084
Epoch 3/10 - Loss: 1.0699
Epoch 4/10 - Loss: 0.9642
Epoch 5/10 - Loss: 0.8840
Epoch 6/10 - Loss: 0.8110
Epoch 7/10 - Loss: 0.7528
Epoch 8/10 - Loss: 0.7028
Epoch 9/10 - Loss: 0.6442
Epoch 10/10 - Loss: 0.5893


[I 2025-06-13 03:44:11,750] Trial 2 finished with value: 0.7124 and parameters: {'lr': 0.00038642916535526494, 'beta1': 0.92543434276047, 'beta2': 0.9415002569259262, 'weight_decay': 0.0011367330868956233}. Best is trial 0 with value: 0.7223.


Epoch 1/10 - Loss: 1.8766
Epoch 2/10 - Loss: 1.5704
Epoch 3/10 - Loss: 1.4412
Epoch 4/10 - Loss: 1.3575
Epoch 5/10 - Loss: 1.3004
Epoch 6/10 - Loss: 1.2488
Epoch 7/10 - Loss: 1.2058
Epoch 8/10 - Loss: 1.1754
Epoch 9/10 - Loss: 1.1437
Epoch 10/10 - Loss: 1.1139


[I 2025-06-13 03:46:41,483] Trial 3 finished with value: 0.5979 and parameters: {'lr': 6.573686655138327e-05, 'beta1': 0.9729364410947323, 'beta2': 0.9027113717265947, 'weight_decay': 0.001026603003516424}. Best is trial 0 with value: 0.7223.


Epoch 1/10 - Loss: 1.5214
Epoch 2/10 - Loss: 1.1738
Epoch 3/10 - Loss: 1.0251
Epoch 4/10 - Loss: 0.9130
Epoch 5/10 - Loss: 0.8307
Epoch 6/10 - Loss: 0.7535
Epoch 7/10 - Loss: 0.6867
Epoch 8/10 - Loss: 0.6282
Epoch 9/10 - Loss: 0.5620
Epoch 10/10 - Loss: 0.4981


[I 2025-06-13 03:49:12,288] Trial 4 finished with value: 0.7173 and parameters: {'lr': 0.00046689498537088925, 'beta1': 0.9282165759824053, 'beta2': 0.9138983069209282, 'weight_decay': 3.9292030026239486e-05}. Best is trial 0 with value: 0.7223.


Epoch 1/10 - Loss: 2.6525
Epoch 2/10 - Loss: 3.1362
Epoch 3/10 - Loss: 28162.5241
Epoch 4/10 - Loss: 52083.0217
Epoch 5/10 - Loss: 20282253.1983
Epoch 6/10 - Loss: 24185552.0775
Epoch 7/10 - Loss: 749027928.5252
Epoch 8/10 - Loss: 13747743535.1540
Epoch 9/10 - Loss: 7054809864.2272
Epoch 10/10 - Loss: 111535657.5543


[I 2025-06-13 03:51:42,903] Trial 5 finished with value: 0.1 and parameters: {'lr': 0.015957993164212966, 'beta1': 0.9855566206007156, 'beta2': 0.931028993637765, 'weight_decay': 0.0011938997108268642}. Best is trial 0 with value: 0.7223.


Epoch 1/10 - Loss: 4.5071
Epoch 2/10 - Loss: 2442.1911
Epoch 3/10 - Loss: 57.9023
Epoch 4/10 - Loss: 58.9122
Epoch 5/10 - Loss: 2.6165
Epoch 6/10 - Loss: 2.3131
Epoch 7/10 - Loss: 2.3038
Epoch 8/10 - Loss: 2.3049
Epoch 9/10 - Loss: 2.3040
Epoch 10/10 - Loss: 2.3042


[I 2025-06-13 03:54:12,852] Trial 6 finished with value: 0.1 and parameters: {'lr': 0.032029975700290474, 'beta1': 0.9752449328905386, 'beta2': 0.908419376925608, 'weight_decay': 1.3096774493777596e-05}. Best is trial 0 with value: 0.7223.


Epoch 1/10 - Loss: 1.9240
Epoch 2/10 - Loss: 1.6317
Epoch 3/10 - Loss: 1.5041
Epoch 4/10 - Loss: 1.4229
Epoch 5/10 - Loss: 1.3654
Epoch 6/10 - Loss: 1.3176
Epoch 7/10 - Loss: 1.2748
Epoch 8/10 - Loss: 1.2435
Epoch 9/10 - Loss: 1.2105
Epoch 10/10 - Loss: 1.1853


[I 2025-06-13 03:56:43,279] Trial 7 finished with value: 0.5697 and parameters: {'lr': 4.7788310658576985e-05, 'beta1': 0.9729399504801178, 'beta2': 0.909736336549472, 'weight_decay': 0.0001833677157890362}. Best is trial 0 with value: 0.7223.


Epoch 1/10 - Loss: 26.3989
Epoch 2/10 - Loss: 2.3062
Epoch 3/10 - Loss: 2.3057
Epoch 4/10 - Loss: 2.3062
Epoch 5/10 - Loss: 2.3053
Epoch 6/10 - Loss: 2.3058
Epoch 7/10 - Loss: 2.3047
Epoch 8/10 - Loss: 2.3058
Epoch 9/10 - Loss: 2.3054
Epoch 10/10 - Loss: 2.3053


[I 2025-06-13 03:59:14,088] Trial 8 finished with value: 0.1 and parameters: {'lr': 0.06785129181994787, 'beta1': 0.9246431398962224, 'beta2': 0.9684958342810969, 'weight_decay': 8.841926348917726e-05}. Best is trial 0 with value: 0.7223.
[I 2025-06-13 03:59:28,721] Trial 9 pruned. 


Epoch 1/10 - Loss: 1.8728


[I 2025-06-13 03:59:43,576] Trial 10 pruned. 


Epoch 1/10 - Loss: 1.4320


[I 2025-06-13 03:59:58,335] Trial 11 pruned. 


Epoch 1/10 - Loss: 1.5369


[I 2025-06-13 04:00:13,324] Trial 12 pruned. 


Epoch 1/10 - Loss: 1.5814


[I 2025-06-13 04:00:28,135] Trial 13 pruned. 


Epoch 1/10 - Loss: 1.4215
Epoch 1/10 - Loss: 2.1360
Epoch 2/10 - Loss: 1.8801
Epoch 3/10 - Loss: 1.7599
Epoch 4/10 - Loss: 1.6844
Epoch 5/10 - Loss: 1.6285
Epoch 6/10 - Loss: 1.5818
Epoch 7/10 - Loss: 1.5427
Epoch 8/10 - Loss: 1.5097
Epoch 9/10 - Loss: 1.4805
Epoch 10/10 - Loss: 1.4545


[I 2025-06-13 04:02:59,585] Trial 14 finished with value: 0.49 and parameters: {'lr': 1.1472979063615213e-05, 'beta1': 0.8942510984728449, 'beta2': 0.9432130318618027, 'weight_decay': 0.0003160859989111197}. Best is trial 0 with value: 0.7223.
[I 2025-06-13 04:03:14,334] Trial 15 pruned. 


Epoch 1/10 - Loss: 1.6982


[I 2025-06-13 04:03:29,177] Trial 16 pruned. 


Epoch 1/10 - Loss: 1.6409


[I 2025-06-13 04:03:43,994] Trial 17 pruned. 


Epoch 1/10 - Loss: 1.3991


[I 2025-06-13 04:03:58,866] Trial 18 pruned. 


Epoch 1/10 - Loss: 1.4762


[I 2025-06-13 04:04:13,672] Trial 19 pruned. 


Epoch 1/10 - Loss: 1.7036

 Results for AdamW 
Best hyperparameters : {'lr': 0.00046568046379195655, 'beta1': 0.9508454290819022, 'beta2': 0.9000113231069171, 'weight_decay': 8.072307884499753e-05}
Best accuracy : 72.23%
Execution time : 1652.85 secondes


In [18]:
Nadam_study=run_optuna_study("Nadam", 20)

[I 2025-06-13 04:04:13,679] A new study created in memory with name: no-name-cd88e9fe-e3a5-47db-ba62-e09235bea69d


Epoch 1/10 - Loss: 1.5773
Epoch 2/10 - Loss: 1.2148
Epoch 3/10 - Loss: 1.0488
Epoch 4/10 - Loss: 0.9260
Epoch 5/10 - Loss: 0.8301
Epoch 6/10 - Loss: 0.7461
Epoch 7/10 - Loss: 0.6732
Epoch 8/10 - Loss: 0.5991
Epoch 9/10 - Loss: 0.5302
Epoch 10/10 - Loss: 0.4640


[I 2025-06-13 04:06:44,188] Trial 0 finished with value: 0.7232 and parameters: {'lr': 0.00046568046379195655, 'beta1': 0.9508454290819022, 'beta2': 0.9000113231069171, 'weight_decay': 8.072307884499753e-05}. Best is trial 0 with value: 0.7232.


Epoch 1/10 - Loss: 1.9087
Epoch 2/10 - Loss: 1.6227
Epoch 3/10 - Loss: 1.5090
Epoch 4/10 - Loss: 1.4321
Epoch 5/10 - Loss: 1.3778
Epoch 6/10 - Loss: 1.3316
Epoch 7/10 - Loss: 1.2929
Epoch 8/10 - Loss: 1.2582
Epoch 9/10 - Loss: 1.2296
Epoch 10/10 - Loss: 1.2032


[I 2025-06-13 04:09:15,389] Trial 1 finished with value: 0.5604 and parameters: {'lr': 3.863879405158732e-05, 'beta1': 0.8629274032676316, 'beta2': 0.9184397609263895, 'weight_decay': 0.00010881335003635877}. Best is trial 0 with value: 0.7232.


Epoch 1/10 - Loss: 1.5991
Epoch 2/10 - Loss: 1.2632
Epoch 3/10 - Loss: 1.1205
Epoch 4/10 - Loss: 1.0146
Epoch 5/10 - Loss: 0.9358
Epoch 6/10 - Loss: 0.8685
Epoch 7/10 - Loss: 0.8120
Epoch 8/10 - Loss: 0.7556
Epoch 9/10 - Loss: 0.7059
Epoch 10/10 - Loss: 0.6577


[I 2025-06-13 04:11:46,309] Trial 2 finished with value: 0.691 and parameters: {'lr': 0.00038642916535526494, 'beta1': 0.92543434276047, 'beta2': 0.9415002569259262, 'weight_decay': 0.0011367330868956233}. Best is trial 0 with value: 0.7232.


Epoch 1/10 - Loss: 1.8573
Epoch 2/10 - Loss: 1.5427
Epoch 3/10 - Loss: 1.4197
Epoch 4/10 - Loss: 1.3391
Epoch 5/10 - Loss: 1.2798
Epoch 6/10 - Loss: 1.2307
Epoch 7/10 - Loss: 1.1901
Epoch 8/10 - Loss: 1.1539
Epoch 9/10 - Loss: 1.1235
Epoch 10/10 - Loss: 1.0948


[I 2025-06-13 04:14:16,867] Trial 3 finished with value: 0.5967 and parameters: {'lr': 6.573686655138327e-05, 'beta1': 0.9729364410947323, 'beta2': 0.9027113717265947, 'weight_decay': 0.001026603003516424}. Best is trial 0 with value: 0.7232.


Epoch 1/10 - Loss: 1.5696
Epoch 2/10 - Loss: 1.2104
Epoch 3/10 - Loss: 1.0436
Epoch 4/10 - Loss: 0.9200
Epoch 5/10 - Loss: 0.8217
Epoch 6/10 - Loss: 0.7352
Epoch 7/10 - Loss: 0.6587
Epoch 8/10 - Loss: 0.5813
Epoch 9/10 - Loss: 0.5092
Epoch 10/10 - Loss: 0.4396


[I 2025-06-13 04:16:47,113] Trial 4 finished with value: 0.7137 and parameters: {'lr': 0.00046689498537088925, 'beta1': 0.9282165759824053, 'beta2': 0.9138983069209282, 'weight_decay': 3.9292030026239486e-05}. Best is trial 0 with value: 0.7232.


Epoch 1/10 - Loss: 2.3200
Epoch 2/10 - Loss: 1.6358
Epoch 3/10 - Loss: 1.4756
Epoch 4/10 - Loss: 1.3772
Epoch 5/10 - Loss: 1.3068
Epoch 6/10 - Loss: 1.2558
Epoch 7/10 - Loss: 1.2111
Epoch 8/10 - Loss: 1.1844
Epoch 9/10 - Loss: 1.1448
Epoch 10/10 - Loss: 1.1268


[I 2025-06-13 04:19:17,694] Trial 5 finished with value: 0.5594 and parameters: {'lr': 0.015957993164212966, 'beta1': 0.9855566206007156, 'beta2': 0.931028993637765, 'weight_decay': 0.0011938997108268642}. Best is trial 0 with value: 0.7232.


Epoch 1/10 - Loss: 7.5861
Epoch 2/10 - Loss: 5.3945
Epoch 3/10 - Loss: 9.7455
Epoch 4/10 - Loss: 10.8444
Epoch 5/10 - Loss: 3.4702
Epoch 6/10 - Loss: 5.6973
Epoch 7/10 - Loss: 5.0917
Epoch 8/10 - Loss: 5.2431
Epoch 9/10 - Loss: 9.8825
Epoch 10/10 - Loss: 11.8911


[I 2025-06-13 04:21:48,976] Trial 6 finished with value: 0.1 and parameters: {'lr': 0.032029975700290474, 'beta1': 0.9752449328905386, 'beta2': 0.908419376925608, 'weight_decay': 1.3096774493777596e-05}. Best is trial 0 with value: 0.7232.


Epoch 1/10 - Loss: 1.9089
Epoch 2/10 - Loss: 1.5999
Epoch 3/10 - Loss: 1.4763
Epoch 4/10 - Loss: 1.3955
Epoch 5/10 - Loss: 1.3382
Epoch 6/10 - Loss: 1.2894
Epoch 7/10 - Loss: 1.2488
Epoch 8/10 - Loss: 1.2130
Epoch 9/10 - Loss: 1.1832
Epoch 10/10 - Loss: 1.1553


[I 2025-06-13 04:24:19,705] Trial 7 finished with value: 0.5785 and parameters: {'lr': 4.7788310658576985e-05, 'beta1': 0.9729399504801178, 'beta2': 0.909736336549472, 'weight_decay': 0.0001833677157890362}. Best is trial 0 with value: 0.7232.


Epoch 1/10 - Loss: 44.4505
Epoch 2/10 - Loss: 111.5583
Epoch 3/10 - Loss: 84.4509
Epoch 4/10 - Loss: 30.6925
Epoch 5/10 - Loss: 80.0778
Epoch 6/10 - Loss: 19.5910
Epoch 7/10 - Loss: 490.5848
Epoch 8/10 - Loss: 58.5300
Epoch 9/10 - Loss: 37.1266
Epoch 10/10 - Loss: 263.8037


[I 2025-06-13 04:26:49,632] Trial 8 finished with value: 0.0978 and parameters: {'lr': 0.06785129181994787, 'beta1': 0.9246431398962224, 'beta2': 0.9684958342810969, 'weight_decay': 8.841926348917726e-05}. Best is trial 0 with value: 0.7232.
[I 2025-06-13 04:27:04,365] Trial 9 pruned. 


Epoch 1/10 - Loss: 1.6757


[I 2025-06-13 04:27:19,157] Trial 10 pruned. 


Epoch 1/10 - Loss: 1.6981


[I 2025-06-13 04:27:33,916] Trial 11 pruned. 


Epoch 1/10 - Loss: 1.5882


[I 2025-06-13 04:27:48,707] Trial 12 pruned. 


Epoch 1/10 - Loss: 1.6171


[I 2025-06-13 04:28:03,446] Trial 13 pruned. 


Epoch 1/10 - Loss: 1.4596
Epoch 1/10 - Loss: 2.1302
Epoch 2/10 - Loss: 1.8727
Epoch 3/10 - Loss: 1.7548
Epoch 4/10 - Loss: 1.6797
Epoch 5/10 - Loss: 1.6246
Epoch 6/10 - Loss: 1.5783
Epoch 7/10 - Loss: 1.5398
Epoch 8/10 - Loss: 1.5055
Epoch 9/10 - Loss: 1.4771
Epoch 10/10 - Loss: 1.4512


[I 2025-06-13 04:30:33,683] Trial 14 finished with value: 0.4915 and parameters: {'lr': 1.1472979063615213e-05, 'beta1': 0.8942510984728449, 'beta2': 0.9432130318618027, 'weight_decay': 0.0003160859989111197}. Best is trial 0 with value: 0.7232.
[I 2025-06-13 04:30:48,456] Trial 15 pruned. 


Epoch 1/10 - Loss: 1.6963


[I 2025-06-13 04:31:03,281] Trial 16 pruned. 


Epoch 1/10 - Loss: 1.6352


[I 2025-06-13 04:31:18,153] Trial 17 pruned. 


Epoch 1/10 - Loss: 1.4592


[I 2025-06-13 04:31:33,002] Trial 18 pruned. 


Epoch 1/10 - Loss: 1.5424


[I 2025-06-13 04:31:47,892] Trial 19 pruned. 


Epoch 1/10 - Loss: 1.7048

 Results for Nadam 
Best hyperparameters : {'lr': 0.00046568046379195655, 'beta1': 0.9508454290819022, 'beta2': 0.9000113231069171, 'weight_decay': 8.072307884499753e-05}
Best accuracy : 72.32%
Execution time : 1654.21 secondes


In [19]:
RAdam_study=run_optuna_study("RAdam", 20)

[I 2025-06-13 04:31:47,899] A new study created in memory with name: no-name-4102d839-1453-45ef-9e72-82469c8090e0


Epoch 1/10 - Loss: 1.5793
Epoch 2/10 - Loss: 1.1768
Epoch 3/10 - Loss: 1.0148
Epoch 4/10 - Loss: 0.9024
Epoch 5/10 - Loss: 0.8163
Epoch 6/10 - Loss: 0.7424
Epoch 7/10 - Loss: 0.6664
Epoch 8/10 - Loss: 0.5972
Epoch 9/10 - Loss: 0.5310
Epoch 10/10 - Loss: 0.4736


[I 2025-06-13 04:34:18,645] Trial 0 finished with value: 0.7313 and parameters: {'lr': 0.00046568046379195655, 'beta1': 0.9508454290819022, 'beta2': 0.9000113231069171, 'weight_decay': 8.072307884499753e-05}. Best is trial 0 with value: 0.7313.


Epoch 1/10 - Loss: 1.9601
Epoch 2/10 - Loss: 1.6374
Epoch 3/10 - Loss: 1.5181
Epoch 4/10 - Loss: 1.4380
Epoch 5/10 - Loss: 1.3834
Epoch 6/10 - Loss: 1.3346
Epoch 7/10 - Loss: 1.2962
Epoch 8/10 - Loss: 1.2639
Epoch 9/10 - Loss: 1.2347
Epoch 10/10 - Loss: 1.2061


[I 2025-06-13 04:36:48,636] Trial 1 finished with value: 0.5686 and parameters: {'lr': 3.863879405158732e-05, 'beta1': 0.8629274032676316, 'beta2': 0.9184397609263895, 'weight_decay': 0.00010881335003635877}. Best is trial 0 with value: 0.7313.


Epoch 1/10 - Loss: 1.6196
Epoch 2/10 - Loss: 1.2313
Epoch 3/10 - Loss: 1.0896
Epoch 4/10 - Loss: 0.9861
Epoch 5/10 - Loss: 0.9224
Epoch 6/10 - Loss: 0.8549
Epoch 7/10 - Loss: 0.8023
Epoch 8/10 - Loss: 0.7582
Epoch 9/10 - Loss: 0.7101
Epoch 10/10 - Loss: 0.6643


[I 2025-06-13 04:39:19,787] Trial 2 finished with value: 0.7173 and parameters: {'lr': 0.00038642916535526494, 'beta1': 0.92543434276047, 'beta2': 0.9415002569259262, 'weight_decay': 0.0011367330868956233}. Best is trial 0 with value: 0.7313.


Epoch 1/10 - Loss: 1.9246
Epoch 2/10 - Loss: 1.5866
Epoch 3/10 - Loss: 1.4521
Epoch 4/10 - Loss: 1.3610
Epoch 5/10 - Loss: 1.3014
Epoch 6/10 - Loss: 1.2482
Epoch 7/10 - Loss: 1.2066
Epoch 8/10 - Loss: 1.1706
Epoch 9/10 - Loss: 1.1401
Epoch 10/10 - Loss: 1.1109


[I 2025-06-13 04:41:50,161] Trial 3 finished with value: 0.6051 and parameters: {'lr': 6.573686655138327e-05, 'beta1': 0.9729364410947323, 'beta2': 0.9027113717265947, 'weight_decay': 0.001026603003516424}. Best is trial 0 with value: 0.7313.


Epoch 1/10 - Loss: 1.5692
Epoch 2/10 - Loss: 1.1725
Epoch 3/10 - Loss: 1.0185
Epoch 4/10 - Loss: 0.9024
Epoch 5/10 - Loss: 0.8196
Epoch 6/10 - Loss: 0.7379
Epoch 7/10 - Loss: 0.6687
Epoch 8/10 - Loss: 0.6066
Epoch 9/10 - Loss: 0.5346
Epoch 10/10 - Loss: 0.4666


[I 2025-06-13 04:44:19,720] Trial 4 finished with value: 0.7269 and parameters: {'lr': 0.00046689498537088925, 'beta1': 0.9282165759824053, 'beta2': 0.9138983069209282, 'weight_decay': 3.9292030026239486e-05}. Best is trial 0 with value: 0.7313.
[I 2025-06-13 04:44:34,523] Trial 5 pruned. 


Epoch 1/10 - Loss: 1.4979
Epoch 1/10 - Loss: 2.7988
Epoch 2/10 - Loss: 79.3583
Epoch 3/10 - Loss: 17.8398
Epoch 4/10 - Loss: 15.8073
Epoch 5/10 - Loss: 16.4938
Epoch 6/10 - Loss: 12.8358
Epoch 7/10 - Loss: 21.5907
Epoch 8/10 - Loss: 16.7766
Epoch 9/10 - Loss: 10.3321
Epoch 10/10 - Loss: 19.1743


[I 2025-06-13 04:47:05,680] Trial 6 finished with value: 0.0996 and parameters: {'lr': 0.032029975700290474, 'beta1': 0.9752449328905386, 'beta2': 0.908419376925608, 'weight_decay': 1.3096774493777596e-05}. Best is trial 0 with value: 0.7313.


Epoch 1/10 - Loss: 1.9673
Epoch 2/10 - Loss: 1.6416
Epoch 3/10 - Loss: 1.5040
Epoch 4/10 - Loss: 1.4173
Epoch 5/10 - Loss: 1.3561
Epoch 6/10 - Loss: 1.3061
Epoch 7/10 - Loss: 1.2635
Epoch 8/10 - Loss: 1.2306
Epoch 9/10 - Loss: 1.1972
Epoch 10/10 - Loss: 1.1707


[I 2025-06-13 04:49:36,396] Trial 7 finished with value: 0.5741 and parameters: {'lr': 4.7788310658576985e-05, 'beta1': 0.9729399504801178, 'beta2': 0.909736336549472, 'weight_decay': 0.0001833677157890362}. Best is trial 0 with value: 0.7313.


Epoch 1/10 - Loss: 2.6181
Epoch 2/10 - Loss: 2.5719
Epoch 3/10 - Loss: 2.2616
Epoch 4/10 - Loss: 2.7221
Epoch 5/10 - Loss: 3.0123
Epoch 6/10 - Loss: 2.2131
Epoch 7/10 - Loss: 2.0706
Epoch 8/10 - Loss: 2.6567
Epoch 9/10 - Loss: 2.5887
Epoch 10/10 - Loss: 2.4322


[I 2025-06-13 04:52:06,715] Trial 8 finished with value: 0.1181 and parameters: {'lr': 0.06785129181994787, 'beta1': 0.9246431398962224, 'beta2': 0.9684958342810969, 'weight_decay': 8.841926348917726e-05}. Best is trial 0 with value: 0.7313.
[I 2025-06-13 04:52:21,600] Trial 9 pruned. 


Epoch 1/10 - Loss: 1.4095


[I 2025-06-13 04:52:36,353] Trial 10 pruned. 


Epoch 1/10 - Loss: 1.5848


[I 2025-06-13 04:52:51,094] Trial 11 pruned. 


Epoch 1/10 - Loss: 1.6046


[I 2025-06-13 04:53:05,705] Trial 12 pruned. 


Epoch 1/10 - Loss: 1.6628


[I 2025-06-13 04:53:20,487] Trial 13 pruned. 


Epoch 1/10 - Loss: 1.4400
Epoch 1/10 - Loss: 2.1675
Epoch 2/10 - Loss: 1.8973
Epoch 3/10 - Loss: 1.7689
Epoch 4/10 - Loss: 1.6905
Epoch 5/10 - Loss: 1.6335
Epoch 6/10 - Loss: 1.5860
Epoch 7/10 - Loss: 1.5462
Epoch 8/10 - Loss: 1.5123
Epoch 9/10 - Loss: 1.4823
Epoch 10/10 - Loss: 1.4556


[I 2025-06-13 04:55:51,533] Trial 14 finished with value: 0.4908 and parameters: {'lr': 1.1472979063615213e-05, 'beta1': 0.8942510984728449, 'beta2': 0.9432130318618027, 'weight_decay': 0.0003160859989111197}. Best is trial 0 with value: 0.7313.
[I 2025-06-13 04:56:06,439] Trial 15 pruned. 


Epoch 1/10 - Loss: 1.7938


[I 2025-06-13 04:56:21,236] Trial 16 pruned. 


Epoch 1/10 - Loss: 1.3506


[I 2025-06-13 04:56:36,106] Trial 17 pruned. 


Epoch 1/10 - Loss: 1.4138


[I 2025-06-13 04:56:51,018] Trial 18 pruned. 


Epoch 1/10 - Loss: 1.5608


[I 2025-06-13 04:57:05,928] Trial 19 pruned. 


Epoch 1/10 - Loss: 1.7639

 Results for RAdam 
Best hyperparameters : {'lr': 0.00046568046379195655, 'beta1': 0.9508454290819022, 'beta2': 0.9000113231069171, 'weight_decay': 8.072307884499753e-05}
Best accuracy : 73.13%
Execution time : 1518.03 secondes


### Visualisation and saves

In [111]:
def visualization (study, params) :
    vis.plot_optimization_history(study).show()
    vis.plot_param_importances(study).show()
    vis.plot_parallel_coordinate(study).show()
    vis.plot_intermediate_values(study).show()
    contour_params = list(itertools.combinations(params, 2))        
    for p1, p2 in contour_params:
            vis.plot_contour(study, params=[p1, p2])

In [112]:
def save_optuna_plots(study, params, save_dir="optuna_plots"):
    os.makedirs(save_dir, exist_ok=True)
    
    def save_fig(fig, name):
        path = os.path.join(save_dir, f"{name}.html")
        fig.write_html(path)
        print(f"Saved {name} plot to {path}")
        
    save_fig(vis.plot_optimization_history(study), "optimization_history")
    save_fig(vis.plot_param_importances(study), "param_importances")
    save_fig(vis.plot_parallel_coordinate(study), "parallel_coordinate")
    save_fig(vis.plot_intermediate_values(study), "intermediate_values")
    contour_params = list(itertools.combinations(params, 2))        
    for p1, p2 in contour_params:
        save_fig(vis.plot_contour(study, params=[p1, p2]), f"contour_{p1}_{p2}")

In [114]:
visualization(SGD_study, ["lr", "weight_decay"])
save_optuna_plots(SGD_study, ["lr", "weight_decay"], "figs_optuna/SGD")

Saved optimization_history plot to figs_optuna/SGD/optimization_history.html
Saved param_importances plot to figs_optuna/SGD/param_importances.html
Saved parallel_coordinate plot to figs_optuna/SGD/parallel_coordinate.html
Saved intermediate_values plot to figs_optuna/SGD/intermediate_values.html
Saved contour_lr_weight_decay plot to figs_optuna/SGD/contour_lr_weight_decay.html


In [115]:
visualization(AdaGrad_study, ["lr", "weight_decay"])
save_optuna_plots(SGD_study, ["lr", "weight_decay"], "figs_optuna/AdaGrad")

Saved optimization_history plot to figs_optuna/AdaGrad/optimization_history.html
Saved param_importances plot to figs_optuna/AdaGrad/param_importances.html
Saved parallel_coordinate plot to figs_optuna/AdaGrad/parallel_coordinate.html
Saved intermediate_values plot to figs_optuna/AdaGrad/intermediate_values.html
Saved contour_lr_weight_decay plot to figs_optuna/AdaGrad/contour_lr_weight_decay.html


In [116]:
visualization(RMSprop_study, ["lr", "weight_decay", "alpha"])
save_optuna_plots(RMSprop_study, ["lr", "weight_decay", "alpha"], "figs_optuna/RMSprop")

Saved optimization_history plot to figs_optuna/RMSprop/optimization_history.html
Saved param_importances plot to figs_optuna/RMSprop/param_importances.html
Saved parallel_coordinate plot to figs_optuna/RMSprop/parallel_coordinate.html
Saved intermediate_values plot to figs_optuna/RMSprop/intermediate_values.html
Saved contour_lr_weight_decay plot to figs_optuna/RMSprop/contour_lr_weight_decay.html
Saved contour_lr_alpha plot to figs_optuna/RMSprop/contour_lr_alpha.html
Saved contour_weight_decay_alpha plot to figs_optuna/RMSprop/contour_weight_decay_alpha.html


In [171]:
visualization(Adam_study, ["lr", "weight_decay", "beta1", "beta2"])
save_optuna_plots(Adam_study, ["lr", "weight_decay", "beta1", "beta2"], "figs_optuna/Adam")

Saved optimization_history plot to figs_optuna/Adam_vrai/optimization_history.html
Saved param_importances plot to figs_optuna/Adam_vrai/param_importances.html
Saved parallel_coordinate plot to figs_optuna/Adam_vrai/parallel_coordinate.html
Saved intermediate_values plot to figs_optuna/Adam_vrai/intermediate_values.html
Saved contour_lr_weight_decay plot to figs_optuna/Adam_vrai/contour_lr_weight_decay.html
Saved contour_lr_beta1 plot to figs_optuna/Adam_vrai/contour_lr_beta1.html
Saved contour_lr_beta2 plot to figs_optuna/Adam_vrai/contour_lr_beta2.html
Saved contour_weight_decay_beta1 plot to figs_optuna/Adam_vrai/contour_weight_decay_beta1.html
Saved contour_weight_decay_beta2 plot to figs_optuna/Adam_vrai/contour_weight_decay_beta2.html
Saved contour_beta1_beta2 plot to figs_optuna/Adam_vrai/contour_beta1_beta2.html


In [170]:
visualization(AMSGrad_study, ["lr", "weight_decay", "beta1", "beta2"])
save_optuna_plots(AMSGrad_study, ["lr", "weight_decay", "beta1", "beta2"], "figs_optuna/AMSGrad")

Saved optimization_history plot to figs_optuna/AMSGrad/optimization_history.html
Saved param_importances plot to figs_optuna/AMSGrad/param_importances.html
Saved parallel_coordinate plot to figs_optuna/AMSGrad/parallel_coordinate.html
Saved intermediate_values plot to figs_optuna/AMSGrad/intermediate_values.html
Saved contour_lr_weight_decay plot to figs_optuna/AMSGrad/contour_lr_weight_decay.html
Saved contour_lr_beta1 plot to figs_optuna/AMSGrad/contour_lr_beta1.html
Saved contour_lr_beta2 plot to figs_optuna/AMSGrad/contour_lr_beta2.html
Saved contour_weight_decay_beta1 plot to figs_optuna/AMSGrad/contour_weight_decay_beta1.html
Saved contour_weight_decay_beta2 plot to figs_optuna/AMSGrad/contour_weight_decay_beta2.html
Saved contour_beta1_beta2 plot to figs_optuna/AMSGrad/contour_beta1_beta2.html


In [125]:
visualization(AdamW_study, ["lr", "weight_decay", "beta1", "beta2"])
save_optuna_plots(AdamW_study, ["lr", "weight_decay", "beta1", "beta2"], "figs_optuna/AdamW")

Saved optimization_history plot to figs_optuna/AdamW/optimization_history.html
Saved param_importances plot to figs_optuna/AdamW/param_importances.html
Saved parallel_coordinate plot to figs_optuna/AdamW/parallel_coordinate.html
Saved intermediate_values plot to figs_optuna/AdamW/intermediate_values.html
Saved contour_lr_weight_decay plot to figs_optuna/AdamW/contour_lr_weight_decay.html
Saved contour_lr_beta1 plot to figs_optuna/AdamW/contour_lr_beta1.html
Saved contour_lr_beta2 plot to figs_optuna/AdamW/contour_lr_beta2.html
Saved contour_weight_decay_beta1 plot to figs_optuna/AdamW/contour_weight_decay_beta1.html
Saved contour_weight_decay_beta2 plot to figs_optuna/AdamW/contour_weight_decay_beta2.html
Saved contour_beta1_beta2 plot to figs_optuna/AdamW/contour_beta1_beta2.html


In [126]:
visualization(Nadam_study, ["lr", "weight_decay", "beta1", "beta2"])
save_optuna_plots(Nadam_study, ["lr", "weight_decay", "beta1", "beta2"], "figs_optuna/Nadam")

Saved optimization_history plot to figs_optuna/Nadam/optimization_history.html
Saved param_importances plot to figs_optuna/Nadam/param_importances.html
Saved parallel_coordinate plot to figs_optuna/Nadam/parallel_coordinate.html
Saved intermediate_values plot to figs_optuna/Nadam/intermediate_values.html
Saved contour_lr_weight_decay plot to figs_optuna/Nadam/contour_lr_weight_decay.html
Saved contour_lr_beta1 plot to figs_optuna/Nadam/contour_lr_beta1.html
Saved contour_lr_beta2 plot to figs_optuna/Nadam/contour_lr_beta2.html
Saved contour_weight_decay_beta1 plot to figs_optuna/Nadam/contour_weight_decay_beta1.html
Saved contour_weight_decay_beta2 plot to figs_optuna/Nadam/contour_weight_decay_beta2.html
Saved contour_beta1_beta2 plot to figs_optuna/Nadam/contour_beta1_beta2.html


In [143]:
visualization(RAdam_study, ["lr", "weight_decay", "beta1", "beta2"])
save_optuna_plots(RAdam_study, ["lr", "weight_decay", "beta1", "beta2"], "figs_optuna/RAdam")

Saved optimization_history plot to figs_optuna/RAdam/optimization_history.html
Saved param_importances plot to figs_optuna/RAdam/param_importances.html
Saved parallel_coordinate plot to figs_optuna/RAdam/parallel_coordinate.html
Saved intermediate_values plot to figs_optuna/RAdam/intermediate_values.html
Saved contour_lr_weight_decay plot to figs_optuna/RAdam/contour_lr_weight_decay.html
Saved contour_lr_beta1 plot to figs_optuna/RAdam/contour_lr_beta1.html
Saved contour_lr_beta2 plot to figs_optuna/RAdam/contour_lr_beta2.html
Saved contour_weight_decay_beta1 plot to figs_optuna/RAdam/contour_weight_decay_beta1.html
Saved contour_weight_decay_beta2 plot to figs_optuna/RAdam/contour_weight_decay_beta2.html
Saved contour_beta1_beta2 plot to figs_optuna/RAdam/contour_beta1_beta2.html
