In [25]:
# import time
# time.sleep(5000)

### Import packages

In [23]:
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Subset
import optuna
from torch.optim import lr_scheduler
import numpy as np
import pandas as pd
from datetime import datetime
import os
import json

from data_loader import get_cifar10_dataloaders, get_test_dataloader, get_kaggle_test_dataloader
from helper import optimizer_map, scheduler_map, num_params, update_study_details, cifar_10_mean_std
from models import BaseResNet, EfficientNetB0, SmallResNet0, LargeResNet0, SmallResNet1
from trainer import train_model
from run import single_run

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Configure the device

In [24]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

Using device: cuda


In [25]:
# Define the objective function for Optuna
def objective(trial):
    study_dir = f"studies/{trial.study.study_name}"
    os.makedirs(study_dir, exist_ok=True) # Create a directory for checkpoints if it doesn't exist

    # Suggest hyperparameters
    num_epochs = trial.suggest_int("num_epochs", 50, 125)
    model_type = trial.suggest_categorical("model_type", ["base","smallresnet", "efficientnet", "largeresnet"])
    batch_size = trial.suggest_categorical("batch_size", [64, 128, 256, 512])
    optimizer_type = trial.suggest_categorical("optimizer_type", ["Adam", "SGD", "AdamW"])
    scheduler_type = trial.suggest_categorical("scheduler_type", ["CosineAnnealingLR", "OneCycleLR", "ReduceLROnPlateau"])
    
    optimizer_params = {}
    if optimizer_type == "SGD":
        optimizer_params["lr"] = trial.suggest_float("lr", 0.001, 0.1, log=True)
        optimizer_params["momentum"] = trial.suggest_float("momentum", 0.8, 0.99)
        optimizer_params["weight_decay"] = trial.suggest_float("weight_decay", 1e-5, 5e-4, log=True)
        scheduler_type = trial.suggest_categorical("scheduler_type", ["CosineAnnealingLR", "OneCycleLR", "ReduceLROnPlateau"])
        optimizer_params["nesterov"] = scheduler_type != "ReduceLROnPlateau"
    
    elif optimizer_type == "Adam":
        optimizer_params["betas"] = (
            trial.suggest_float("beta1", 0.85, 0.95), 
            trial.suggest_float("beta2", 0.99, 0.999)
        )
        optimizer_params["lr"] = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
        optimizer_params["weight_decay"] = trial.suggest_float("weight_decay", 1e-6, 1e-3, log=True)
        if scheduler_type == "OneCycleLR":
            scheduler_type = None

    elif optimizer_type == "AdamW":
        optimizer_params["betas"] = (
            trial.suggest_float("beta1", 0.85, 0.95), 
            trial.suggest_float("beta2", 0.99, 0.999)
        )
        optimizer_params["lr"] = trial.suggest_float("lr", 1e-5, 5e-3, log=True)
        optimizer_params["weight_decay"] = trial.suggest_float("weight_decay", 1e-3, 1e-1, log=True)
        scheduler_type = trial.suggest_categorical("scheduler_type", ["CosineAnnealingLR", "OneCycleLR", "ReduceLROnPlateau"])

    
    # Realistic tranformation for better generalization
    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05),  # Mild color variations
        transforms.ToTensor(),
        transforms.Normalize(**cifar_10_mean_std),
    ])
        
    train_loader, valid_loader = get_cifar10_dataloaders(
        train_transform,
        subset_percent=1, 
        valid_size=0.1,
        batch_size=batch_size,
        num_workers=8,
        use_kaggle=True
    )

    scheduler_params = {}
    if scheduler_type == "CosineAnnealingLR":
        # num_epochs = trial.suggest_int("num_epochs", 100, 150)
        scheduler_params["T_max"] = num_epochs
        scheduler_params["eta_min"] = trial.suggest_float("eta_min", 1e-6, 1e-3, log=True)
        
    elif scheduler_type == "ReduceLROnPlateau":
        # num_epochs = trial.suggest_int("num_epochs", 75, 125)
        scheduler_params["factor"] = trial.suggest_float("factor", 0.1, 0.5)
        scheduler_params["patience"] = trial.suggest_int("patience", 5, 20)
        scheduler_params["threshold"] = trial.suggest_float("threshold", 0.01, 0.1)
        scheduler_params["mode"] = "min"
        
    elif scheduler_type == "OneCycleLR":
        # num_epochs = trial.suggest_int("num_epochs", 50, 75)
        if optimizer_type == "SGD":
            scheduler_params["max_lr"] = trial.suggest_float("max_lr", 0.01, 0.3)
        else: # AdamW
            scheduler_params["max_lr"] = trial.suggest_float("max_lr", 0.001, 0.01)
        scheduler_params["steps_per_epoch"] = len(train_loader)
        scheduler_params["epochs"] = num_epochs
        scheduler_params["anneal_strategy"] = "cos"

                
    # Select Model
    if model_type == "smallresnet":
        model = SmallResNet0()
    elif model_type == "efficientnet":
        model = EfficientNetB0()
    elif model_type == "largeresnet":
        model = LargeResNet0()
    else:
        model = BaseResNet()
        
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    trial_details = trial.params.copy()
    trial_details["trainable_parameters"] = num_params(model)
    
    # Print trial details
    print("-" * 50)
    print(f"{trial.number=}")
    for param, val in trial_details.items():
        print(f"{param}: {val}")
    print("- " * 25)
    update_study_details(study_dir, trial.number, trial_details)

    optimizer = optimizer_map[optimizer_type](model.parameters(), **optimizer_params)
    scheduler = scheduler_map[scheduler_type](optimizer, **scheduler_params) if scheduler_type else None
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

    # Checkpoint the model with the best validation accuracy
    chkpt_dir = os.path.join(study_dir, "checkpoint")
    plot_dir = os.path.join(study_dir, "plots")
    os.makedirs(chkpt_dir, exist_ok=True)
    os.makedirs(plot_dir, exist_ok=True)

    # Training
    best_val_accuracy = train_model(
        model, train_loader, criterion, optimizer, valid_loader=valid_loader, num_epochs=num_epochs, 
        device=device, scheduler=scheduler, trial=trial, chkpt_dir=chkpt_dir, plot_dir=plot_dir
    )
    
    trial_details["best_val_accuracy"] = best_val_accuracy
    update_study_details(study_dir, trial.number, trial_details)
    return best_val_accuracy

### Start new study

In [None]:
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
study_name = f"study_{timestamp}"

study = optuna.create_study(
    study_name=study_name,
    storage="sqlite:///study.db",
    direction="maximize",
    load_if_exists=True,
)

study.optimize(objective, n_trials=100)

print("Best trial:", study.best_trial.number)
print("Best hyperparameters:", study.best_params)
print("Best validation accuracy:", study.best_value)

[I 2025-03-12 02:29:53,436] A new study created in RDB with name: study_2025-03-12_02-29-51


--------------------------------------------------
trial.number=0
num_epochs: 113
model_type: smallresnet
batch_size: 512
optimizer_type: AdamW
scheduler_type: CosineAnnealingLR
beta1: 0.9202155036052446
beta2: 0.9977411320853791
lr: 3.494686994613835e-05
weight_decay: 0.01796373082906369
eta_min: 2.4823405413306104e-06
trainable_parameters: 2998402
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/113], Batch [80/88], Train Acc: 16.5234 Loss: 2.1038
  Validation Accuracy after Epoch 1: 24.0200
  Cidar10.1 Accuracy: 22.45
  Epoch [2/113], Batch [80/88], Train Acc: 21.4917 Loss: 2.0582
  Validation Accuracy after Epoch 2: 24.5000
  Cidar10.1 Accuracy: 23.35
  Epoch [3/113], Batch [80/88], Train Acc: 27.2290 Loss: 1.9905
  Validation Accuracy after Epoch 3: 32.2800
  Cidar10.1 Accuracy: 27.3
  Epoch [4/113], Batch [80/88], Train Acc: 32.4854 Loss: 1.9102
  Validation Accuracy after Epoch 4: 35.9600
  Cidar10.1 Accuracy: 29.4
  Epoch [5/113], Batch [80/88], Train Acc: 33.6157 

[I 2025-03-12 02:48:33,594] Trial 0 finished with value: 77.5 and parameters: {'num_epochs': 113, 'model_type': 'smallresnet', 'batch_size': 512, 'optimizer_type': 'AdamW', 'scheduler_type': 'CosineAnnealingLR', 'beta1': 0.9202155036052446, 'beta2': 0.9977411320853791, 'lr': 3.494686994613835e-05, 'weight_decay': 0.01796373082906369, 'eta_min': 2.4823405413306104e-06}. Best is trial 0 with value: 77.5.


--------------------------------------------------
trial.number=1
num_epochs: 99
model_type: efficientnet
batch_size: 128
optimizer_type: Adam
scheduler_type: ReduceLROnPlateau
beta1: 0.879263309837687
beta2: 0.9902483573125087
lr: 1.0489767423709725e-05
weight_decay: 6.32754805913792e-06
factor: 0.11188403904979145
patience: 6
threshold: 0.092594865117007
trainable_parameters: 3599686
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/99], Batch [350/352], Train Acc: 13.0625 Loss: 2.3211
  Validation Accuracy after Epoch 1: 14.4400
  Cidar10.1 Accuracy: 13.95
  Epoch [2/99], Batch [350/352], Train Acc: 14.7612 Loss: 2.2246
  Validation Accuracy after Epoch 2: 15.4200
  Cidar10.1 Accuracy: 15.3
  Epoch [3/99], Batch [350/352], Train Acc: 16.6272 Loss: 2.2201
  Validation Accuracy after Epoch 3: 19.8200
  Cidar10.1 Accuracy: 17.75
  Epoch [4/99], Batch [350/352], Train Acc: 19.0781 Loss: 2.1436
  Validation Accuracy after Epoch 4: 20.7400
  Cidar10.1 Accuracy: 18.95
  Epoch [

[I 2025-03-12 03:16:02,770] Trial 1 finished with value: 37.0 and parameters: {'num_epochs': 99, 'model_type': 'efficientnet', 'batch_size': 128, 'optimizer_type': 'Adam', 'scheduler_type': 'ReduceLROnPlateau', 'beta1': 0.879263309837687, 'beta2': 0.9902483573125087, 'lr': 1.0489767423709725e-05, 'weight_decay': 6.32754805913792e-06, 'factor': 0.11188403904979145, 'patience': 6, 'threshold': 0.092594865117007}. Best is trial 0 with value: 77.5.


--------------------------------------------------
trial.number=2
num_epochs: 78
model_type: base
batch_size: 256
optimizer_type: Adam
scheduler_type: ReduceLROnPlateau
beta1: 0.8554047923562503
beta2: 0.9900537807605059
lr: 0.0008874696628000282
weight_decay: 1.6390033154606387e-05
factor: 0.10502947244187953
patience: 20
threshold: 0.021875882097500743
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/78], Batch [170/176], Train Acc: 41.3649 Loss: 1.5172
  Validation Accuracy after Epoch 1: 51.7600
  Cidar10.1 Accuracy: 41.5
  Epoch [2/78], Batch [170/176], Train Acc: 60.7973 Loss: 1.3195
  Validation Accuracy after Epoch 2: 64.1800
  Cidar10.1 Accuracy: 51.2
  Epoch [3/78], Batch [170/176], Train Acc: 68.4903 Loss: 1.1378
  Validation Accuracy after Epoch 3: 71.2400
  Cidar10.1 Accuracy: 59.3
  Epoch [4/78], Batch [170/176], Train Acc: 73.5547 Loss: 1.1219
  Validation Accuracy after Epoch 4: 72.0200
  Cidar10.1 Accuracy: 61.2
  Epoch [5/78]

[I 2025-03-12 03:28:18,313] Trial 2 finished with value: 92.52 and parameters: {'num_epochs': 78, 'model_type': 'base', 'batch_size': 256, 'optimizer_type': 'Adam', 'scheduler_type': 'ReduceLROnPlateau', 'beta1': 0.8554047923562503, 'beta2': 0.9900537807605059, 'lr': 0.0008874696628000282, 'weight_decay': 1.6390033154606387e-05, 'factor': 0.10502947244187953, 'patience': 20, 'threshold': 0.021875882097500743}. Best is trial 2 with value: 92.52.


--------------------------------------------------
trial.number=3
num_epochs: 87
model_type: smallresnet
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9467467502610648
beta2: 0.9986599029303749
lr: 0.0005757323449046855
weight_decay: 0.008336338695006893
max_lr: 0.0019789098923762923
trainable_parameters: 2998402
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/87], Batch [80/88], Train Acc: 22.1118 Loss: 1.9481
  Validation Accuracy after Epoch 1: 33.1800
  Cidar10.1 Accuracy: 28.45
  Epoch [2/87], Batch [80/88], Train Acc: 35.7568 Loss: 1.7400
  Validation Accuracy after Epoch 2: 41.7000
  Cidar10.1 Accuracy: 33.45
  Epoch [3/87], Batch [80/88], Train Acc: 42.9541 Loss: 1.7105
  Validation Accuracy after Epoch 3: 45.1200
  Cidar10.1 Accuracy: 37.65
  Epoch [4/87], Batch [80/88], Train Acc: 49.0894 Loss: 1.5621
  Validation Accuracy after Epoch 4: 50.6400
  Cidar10.1 Accuracy: 41.35
  Epoch [5/87], Batch [80/88], Train Acc: 53.8623 Loss: 1.4621

[I 2025-03-12 03:42:40,992] Trial 3 finished with value: 91.84 and parameters: {'num_epochs': 87, 'model_type': 'smallresnet', 'batch_size': 512, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.9467467502610648, 'beta2': 0.9986599029303749, 'lr': 0.0005757323449046855, 'weight_decay': 0.008336338695006893, 'max_lr': 0.0019789098923762923}. Best is trial 2 with value: 92.52.


--------------------------------------------------
trial.number=4
num_epochs: 121
model_type: efficientnet
batch_size: 64
optimizer_type: SGD
scheduler_type: CosineAnnealingLR
lr: 0.08528869231926088
momentum: 0.8889096409901536
weight_decay: 0.00016615937852276983
eta_min: 0.00019452924876717267
trainable_parameters: 3599686
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/121], Batch [700/704], Train Acc: 24.6094 Loss: 1.8851
  Validation Accuracy after Epoch 1: 26.7800
  Cidar10.1 Accuracy: 22.35
  Epoch [2/121], Batch [700/704], Train Acc: 38.3326 Loss: 1.6546
  Validation Accuracy after Epoch 2: 41.8400
  Cidar10.1 Accuracy: 32.1
  Epoch [3/121], Batch [700/704], Train Acc: 48.1161 Loss: 1.5427
  Validation Accuracy after Epoch 3: 48.0200
  Cidar10.1 Accuracy: 38.95
  Epoch [4/121], Batch [700/704], Train Acc: 54.5647 Loss: 1.2488
  Validation Accuracy after Epoch 4: 60.0800
  Cidar10.1 Accuracy: 49.3
  Epoch [5/121], Batch [700/704], Train Acc: 59.8862 Loss: 1.4566
 

[I 2025-03-12 04:38:19,025] Trial 4 finished with value: 88.16 and parameters: {'num_epochs': 121, 'model_type': 'efficientnet', 'batch_size': 64, 'optimizer_type': 'SGD', 'scheduler_type': 'CosineAnnealingLR', 'lr': 0.08528869231926088, 'momentum': 0.8889096409901536, 'weight_decay': 0.00016615937852276983, 'eta_min': 0.00019452924876717267}. Best is trial 2 with value: 92.52.


--------------------------------------------------
trial.number=5
num_epochs: 108
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8971607284559575
beta2: 0.9963589435517188
lr: 0.0004240424765580441
weight_decay: 0.001461400798408547
max_lr: 0.00941063463217609
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/108], Batch [80/88], Train Acc: 36.2036 Loss: 1.6910
  Validation Accuracy after Epoch 1: 45.6200
  Cidar10.1 Accuracy: 38.45
  Epoch [2/108], Batch [80/88], Train Acc: 52.0239 Loss: 1.3593
  Validation Accuracy after Epoch 2: 54.1800
  Cidar10.1 Accuracy: 42.95
  Epoch [3/108], Batch [80/88], Train Acc: 60.8398 Loss: 1.3085
  Validation Accuracy after Epoch 3: 61.8200
  Cidar10.1 Accuracy: 49.65
  Epoch [4/108], Batch [80/88], Train Acc: 66.4868 Loss: 1.3033
  Validation Accuracy after Epoch 4: 69.7400
  Cidar10.1 Accuracy: 58.4
  Epoch [5/108], Batch [80/88], Train Acc: 69.9438 Loss: 1.1213
  V

[I 2025-03-12 04:55:42,942] Trial 5 finished with value: 93.28 and parameters: {'num_epochs': 108, 'model_type': 'base', 'batch_size': 512, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.8971607284559575, 'beta2': 0.9963589435517188, 'lr': 0.0004240424765580441, 'weight_decay': 0.001461400798408547, 'max_lr': 0.00941063463217609}. Best is trial 5 with value: 93.28.


--------------------------------------------------
trial.number=6
num_epochs: 81
model_type: largeresnet
batch_size: 256
optimizer_type: Adam
scheduler_type: CosineAnnealingLR
beta1: 0.863700704711385
beta2: 0.9948206675016444
lr: 2.2989649318469157e-05
weight_decay: 3.9733422288800845e-05
eta_min: 0.00010769406377503403
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/81], Batch [170/176], Train Acc: 37.4494 Loss: 1.6510
  Validation Accuracy after Epoch 1: 48.2800
  Cidar10.1 Accuracy: 37.7
  Epoch [2/81], Batch [170/176], Train Acc: 51.3419 Loss: 1.4049
  Validation Accuracy after Epoch 2: 55.8200
  Cidar10.1 Accuracy: 44.25
  Epoch [3/81], Batch [170/176], Train Acc: 58.0905 Loss: 1.3460
  Validation Accuracy after Epoch 3: 60.2200
  Cidar10.1 Accuracy: 47.85
  Epoch [4/81], Batch [170/176], Train Acc: 62.2013 Loss: 1.2643
  Validation Accuracy after Epoch 4: 61.8000
  Cidar10.1 Accuracy: 51.5
  Epoch [5/81], Batch [170/176], Train Acc: 65

[I 2025-03-12 05:04:58,595] Trial 6 pruned. 


--------------------------------------------------
trial.number=7
num_epochs: 89
model_type: smallresnet
batch_size: 256
optimizer_type: AdamW
scheduler_type: ReduceLROnPlateau
beta1: 0.8859498453506164
beta2: 0.9967813605196271
lr: 1.0059573326543053e-05
weight_decay: 0.04875703995157717
factor: 0.12647946217796507
patience: 19
threshold: 0.06502352547749993
trainable_parameters: 2998402
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/89], Batch [170/176], Train Acc: 13.9614 Loss: 2.2219
  Validation Accuracy after Epoch 1: 19.6000
  Trial pruned due to no improvement.


[I 2025-03-12 05:05:10,136] Trial 7 pruned. 


--------------------------------------------------
trial.number=8
num_epochs: 92
model_type: base
batch_size: 128
optimizer_type: Adam
scheduler_type: OneCycleLR
beta1: 0.8597063166954072
beta2: 0.9963855278011335
lr: 0.008386110732810157
weight_decay: 0.0006982151077958207
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/92], Batch [350/352], Train Acc: 30.1473 Loss: 1.8962
  Validation Accuracy after Epoch 1: 32.2600
  Cidar10.1 Accuracy: 29.9
  Epoch [2/92], Batch [350/352], Train Acc: 44.7366 Loss: 1.5528
  Validation Accuracy after Epoch 2: 49.9600
  Cidar10.1 Accuracy: 41.45
  Epoch [3/92], Batch [350/352], Train Acc: 51.6808 Loss: 1.4989
  Validation Accuracy after Epoch 3: 53.7200
  Cidar10.1 Accuracy: 42.15
  Epoch [4/92], Batch [350/352], Train Acc: 54.7879 Loss: 1.4957
  Validation Accuracy after Epoch 4: 56.0000
  Cidar10.1 Accuracy: 44.0
  Epoch [5/92], Batch [350/352], Train Acc: 56.1384 Loss: 1.4127
  Validation Accuracy after E

[I 2025-03-12 05:05:59,909] Trial 8 pruned. 


--------------------------------------------------
trial.number=9
num_epochs: 114
model_type: efficientnet
batch_size: 256
optimizer_type: SGD
scheduler_type: ReduceLROnPlateau
lr: 0.0052294740757562914
momentum: 0.80681824073355
weight_decay: 0.00011864881536734163
factor: 0.1520927056129971
patience: 18
threshold: 0.07781767567531292
trainable_parameters: 3599686
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/114], Batch [170/176], Train Acc: 19.3796 Loss: 2.0004
  Validation Accuracy after Epoch 1: 28.4200
  Trial pruned due to no improvement.


[I 2025-03-12 05:06:12,235] Trial 9 pruned. 


--------------------------------------------------
trial.number=10
num_epochs: 57
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9128690632714375
beta2: 0.9930747172712939
lr: 0.00014377927960525607
weight_decay: 0.0045845774091663625
max_lr: 0.009086002505118474
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/57], Batch [80/88], Train Acc: 36.2842 Loss: 1.7012
  Validation Accuracy after Epoch 1: 45.1200
  Cidar10.1 Accuracy: 38.35
  Epoch [2/57], Batch [80/88], Train Acc: 51.9385 Loss: 1.4978
  Validation Accuracy after Epoch 2: 50.7800
  Cidar10.1 Accuracy: 46.05
  Epoch [3/57], Batch [80/88], Train Acc: 61.5894 Loss: 1.3164
  Validation Accuracy after Epoch 3: 63.5200
  Cidar10.1 Accuracy: 52.1
  Epoch [4/57], Batch [80/88], Train Acc: 66.5186 Loss: 1.2203
  Validation Accuracy after Epoch 4: 67.5800
  Cidar10.1 Accuracy: 55.55
  Epoch [5/57], Batch [80/88], Train Acc: 70.8447 Loss: 1.0925
  Val

[I 2025-03-12 05:15:23,487] Trial 10 finished with value: 93.24 and parameters: {'num_epochs': 57, 'model_type': 'base', 'batch_size': 512, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.9128690632714375, 'beta2': 0.9930747172712939, 'lr': 0.00014377927960525607, 'weight_decay': 0.0045845774091663625, 'max_lr': 0.009086002505118474}. Best is trial 5 with value: 93.28.


--------------------------------------------------
trial.number=11
num_epochs: 51
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.912756498839181
beta2: 0.9928518894897843
lr: 0.00012490446662229772
weight_decay: 0.004260164450297314
max_lr: 0.009923399724315335
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/51], Batch [80/88], Train Acc: 36.4868 Loss: 1.6885
  Validation Accuracy after Epoch 1: 42.2000
  Cidar10.1 Accuracy: 35.65
  Epoch [2/51], Batch [80/88], Train Acc: 52.7905 Loss: 1.4218
  Validation Accuracy after Epoch 2: 55.7600
  Cidar10.1 Accuracy: 43.35
  Epoch [3/51], Batch [80/88], Train Acc: 61.3965 Loss: 1.2951
  Validation Accuracy after Epoch 3: 64.1800
  Cidar10.1 Accuracy: 52.3
  Epoch [4/51], Batch [80/88], Train Acc: 66.8823 Loss: 1.2550
  Validation Accuracy after Epoch 4: 61.7400
  Cidar10.1 Accuracy: 51.35
  Epoch [5/51], Batch [80/88], Train Acc: 70.5615 Loss: 1.1575
  Valid

[I 2025-03-12 05:23:36,742] Trial 11 finished with value: 92.64 and parameters: {'num_epochs': 51, 'model_type': 'base', 'batch_size': 512, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.912756498839181, 'beta2': 0.9928518894897843, 'lr': 0.00012490446662229772, 'weight_decay': 0.004260164450297314, 'max_lr': 0.009923399724315335}. Best is trial 5 with value: 93.28.


--------------------------------------------------
trial.number=12
num_epochs: 59
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9030418653359881
beta2: 0.9931820503572736
lr: 0.00014374931533466727
weight_decay: 0.0033257058352376477
max_lr: 0.00962143667834775
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/59], Batch [80/88], Train Acc: 36.3232 Loss: 1.6782
  Validation Accuracy after Epoch 1: 44.0600
  Cidar10.1 Accuracy: 36.5
  Epoch [2/59], Batch [80/88], Train Acc: 52.3633 Loss: 1.4337
  Validation Accuracy after Epoch 2: 57.7200
  Cidar10.1 Accuracy: 46.7
  Epoch [3/59], Batch [80/88], Train Acc: 61.3428 Loss: 1.3655
  Validation Accuracy after Epoch 3: 51.0000
  Cidar10.1 Accuracy: 43.4
  Epoch [4/59], Batch [80/88], Train Acc: 66.4478 Loss: 1.1959
  Validation Accuracy after Epoch 4: 60.0200
  Trial pruned due to no improvement.


[I 2025-03-12 05:24:17,062] Trial 12 pruned. 


--------------------------------------------------
trial.number=13
num_epochs: 69
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9329524789313232
beta2: 0.9946991943427499
lr: 0.0003926871765026719
weight_decay: 0.0034737743539454947
max_lr: 0.007444671068341157
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/69], Batch [80/88], Train Acc: 35.8521 Loss: 1.7002
  Validation Accuracy after Epoch 1: 46.7400
  Cidar10.1 Accuracy: 37.6
  Epoch [2/69], Batch [80/88], Train Acc: 50.8984 Loss: 1.4651
  Validation Accuracy after Epoch 2: 51.7800
  Cidar10.1 Accuracy: 40.9
  Epoch [3/69], Batch [80/88], Train Acc: 60.2246 Loss: 1.3522
  Validation Accuracy after Epoch 3: 61.0200
  Cidar10.1 Accuracy: 51.15
  Epoch [4/69], Batch [80/88], Train Acc: 65.5469 Loss: 1.2422
  Validation Accuracy after Epoch 4: 67.4200
  Cidar10.1 Accuracy: 53.7
  Epoch [5/69], Batch [80/88], Train Acc: 68.8452 Loss: 1.1366
  Valida

[I 2025-03-12 05:35:23,347] Trial 13 finished with value: 93.18 and parameters: {'num_epochs': 69, 'model_type': 'base', 'batch_size': 512, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.9329524789313232, 'beta2': 0.9946991943427499, 'lr': 0.0003926871765026719, 'weight_decay': 0.0034737743539454947, 'max_lr': 0.007444671068341157}. Best is trial 5 with value: 93.28.


--------------------------------------------------
trial.number=14
num_epochs: 104
model_type: largeresnet
batch_size: 64
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8954429170116952
beta2: 0.9925876389184227
lr: 7.144800659348217e-05
weight_decay: 0.006984521915656169
max_lr: 0.006740961536442372
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/104], Batch [700/704], Train Acc: 47.6295 Loss: 1.3209
  Validation Accuracy after Epoch 1: 57.7000
  Cidar10.1 Accuracy: 44.55
  Epoch [2/104], Batch [700/704], Train Acc: 65.3304 Loss: 1.2309
  Validation Accuracy after Epoch 2: 68.0000
  Cidar10.1 Accuracy: 58.25
  Epoch [3/104], Batch [700/704], Train Acc: 72.3438 Loss: 1.2083
  Validation Accuracy after Epoch 3: 70.9800
  Cidar10.1 Accuracy: 59.95
  Epoch [4/104], Batch [700/704], Train Acc: 75.5603 Loss: 1.0982
  Validation Accuracy after Epoch 4: 75.5800
  Cidar10.1 Accuracy: 63.25
  Epoch [5/104], Batch [700/704], Train Acc: 77.69

[I 2025-03-12 05:43:48,472] Trial 14 pruned. 


--------------------------------------------------
trial.number=15
num_epochs: 70
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9169275626977532
beta2: 0.995929131948824
lr: 0.00027823650102328873
weight_decay: 0.0022394883480038293
max_lr: 0.008180380869424356
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/70], Batch [80/88], Train Acc: 36.0913 Loss: 1.6928
  Validation Accuracy after Epoch 1: 45.4800
  Cidar10.1 Accuracy: 37.4
  Epoch [2/70], Batch [80/88], Train Acc: 51.8945 Loss: 1.4526
  Validation Accuracy after Epoch 2: 53.2800
  Cidar10.1 Accuracy: 42.0
  Epoch [3/70], Batch [80/88], Train Acc: 60.0684 Loss: 1.3156
  Validation Accuracy after Epoch 3: 63.7200
  Cidar10.1 Accuracy: 51.0
  Epoch [4/70], Batch [80/88], Train Acc: 65.2979 Loss: 1.2572
  Validation Accuracy after Epoch 4: 63.3400
  Cidar10.1 Accuracy: 52.55
  Epoch [5/70], Batch [80/88], Train Acc: 69.4507 Loss: 1.1640
  Valida

[I 2025-03-12 05:44:38,253] Trial 15 pruned. 


--------------------------------------------------
trial.number=16
num_epochs: 105
model_type: base
batch_size: 512
optimizer_type: SGD
scheduler_type: OneCycleLR
lr: 0.0024165960179776754
momentum: 0.9876208747698934
weight_decay: 0.00025215382997647565
max_lr: 0.15886158536574177
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/105], Batch [80/88], Train Acc: 29.4849 Loss: 1.7922
  Validation Accuracy after Epoch 1: 41.0800
  Trial pruned due to no improvement.


[I 2025-03-12 05:44:49,390] Trial 16 pruned. 


--------------------------------------------------
trial.number=17
num_epochs: 125
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8776815504096155
beta2: 0.9918481155052591
lr: 6.989331356093778e-05
weight_decay: 0.01892142649855459
max_lr: 0.004318334586906785
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/125], Batch [80/88], Train Acc: 33.9014 Loss: 1.7378
  Validation Accuracy after Epoch 1: 43.0400
  Cidar10.1 Accuracy: 36.1
  Epoch [2/125], Batch [80/88], Train Acc: 46.9360 Loss: 1.5142
  Validation Accuracy after Epoch 2: 52.4800
  Cidar10.1 Accuracy: 44.4
  Epoch [3/125], Batch [80/88], Train Acc: 55.5688 Loss: 1.3599
  Validation Accuracy after Epoch 3: 60.6000
  Trial pruned due to no improvement.


[I 2025-03-12 05:45:19,876] Trial 17 pruned. 


--------------------------------------------------
trial.number=18
num_epochs: 62
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9034976339196583
beta2: 0.9939339592034316
lr: 0.0007950305001671975
weight_decay: 0.0020546792129433615
max_lr: 0.008925590026308522
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/62], Batch [350/352], Train Acc: 45.7009 Loss: 1.5209
  Validation Accuracy after Epoch 1: 54.7000
  Cidar10.1 Accuracy: 43.2
  Epoch [2/62], Batch [350/352], Train Acc: 63.4442 Loss: 1.3998
  Validation Accuracy after Epoch 2: 59.4400
  Cidar10.1 Accuracy: 45.0
  Epoch [3/62], Batch [350/352], Train Acc: 69.7857 Loss: 1.1611
  Validation Accuracy after Epoch 3: 71.0600
  Cidar10.1 Accuracy: 58.4
  Epoch [4/62], Batch [350/352], Train Acc: 73.8616 Loss: 1.2149
  Validation Accuracy after Epoch 4: 74.1800
  Cidar10.1 Accuracy: 62.5
  Epoch [5/62], Batch [350/352], Train Acc: 76.5871 Loss:

[I 2025-03-12 05:55:17,999] Trial 18 finished with value: 93.66 and parameters: {'num_epochs': 62, 'model_type': 'largeresnet', 'batch_size': 128, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.9034976339196583, 'beta2': 0.9939339592034316, 'lr': 0.0007950305001671975, 'weight_decay': 0.0020546792129433615, 'max_lr': 0.008925590026308522}. Best is trial 18 with value: 93.66.


--------------------------------------------------
trial.number=19
num_epochs: 67
model_type: largeresnet
batch_size: 128
optimizer_type: SGD
scheduler_type: CosineAnnealingLR
lr: 0.044514508068941326
momentum: 0.9856837213372819
weight_decay: 0.00028048928502502624
eta_min: 1.3617921042441039e-06
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/67], Batch [350/352], Train Acc: 39.7277 Loss: 1.6286
  Validation Accuracy after Epoch 1: 48.0800
  Cidar10.1 Accuracy: 38.9
  Epoch [2/67], Batch [350/352], Train Acc: 53.8170 Loss: 1.3418
  Validation Accuracy after Epoch 2: 57.5600
  Cidar10.1 Accuracy: 44.75
  Epoch [3/67], Batch [350/352], Train Acc: 62.6272 Loss: 1.3476
  Validation Accuracy after Epoch 3: 60.0400
  Trial pruned due to no improvement.


[I 2025-03-12 05:55:47,885] Trial 19 pruned. 


--------------------------------------------------
trial.number=20
num_epochs: 79
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8966741749809605
beta2: 0.9953296889087532
lr: 0.0009949003229868505
weight_decay: 0.002220705473846143
max_lr: 0.005605644909963183
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/79], Batch [350/352], Train Acc: 45.2031 Loss: 1.5746
  Validation Accuracy after Epoch 1: 53.5200
  Cidar10.1 Accuracy: 44.55
  Epoch [2/79], Batch [350/352], Train Acc: 62.1406 Loss: 1.2866
  Validation Accuracy after Epoch 2: 65.4200
  Cidar10.1 Accuracy: 53.9
  Epoch [3/79], Batch [350/352], Train Acc: 69.5134 Loss: 1.1916
  Validation Accuracy after Epoch 3: 67.0800
  Cidar10.1 Accuracy: 54.7
  Epoch [4/79], Batch [350/352], Train Acc: 74.0692 Loss: 0.9409
  Validation Accuracy after Epoch 4: 70.8600
  Cidar10.1 Accuracy: 55.4
  Epoch [5/79], Batch [350/352], Train Acc: 76.2545 Loss:

[I 2025-03-12 06:08:29,591] Trial 20 finished with value: 93.92 and parameters: {'num_epochs': 79, 'model_type': 'largeresnet', 'batch_size': 128, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.8966741749809605, 'beta2': 0.9953296889087532, 'lr': 0.0009949003229868505, 'weight_decay': 0.002220705473846143, 'max_lr': 0.005605644909963183}. Best is trial 20 with value: 93.92.


--------------------------------------------------
trial.number=21
num_epochs: 76
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8989450645955427
beta2: 0.9958117445521014
lr: 0.0010497858203851631
weight_decay: 0.0021788986817215493
max_lr: 0.005119536702611704
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/76], Batch [350/352], Train Acc: 45.1473 Loss: 1.5743
  Validation Accuracy after Epoch 1: 51.9600
  Cidar10.1 Accuracy: 43.2
  Epoch [2/76], Batch [350/352], Train Acc: 61.7522 Loss: 1.3614
  Validation Accuracy after Epoch 2: 64.5600
  Cidar10.1 Accuracy: 51.2
  Epoch [3/76], Batch [350/352], Train Acc: 69.1027 Loss: 1.1643
  Validation Accuracy after Epoch 3: 61.0400
  Cidar10.1 Accuracy: 46.05
  Epoch [4/76], Batch [350/352], Train Acc: 73.3103 Loss: 1.1182
  Validation Accuracy after Epoch 4: 71.6800
  Cidar10.1 Accuracy: 58.6
  Epoch [5/76], Batch [350/352], Train Acc: 75.6451 Loss

[I 2025-03-12 06:20:40,150] Trial 21 finished with value: 93.64 and parameters: {'num_epochs': 76, 'model_type': 'largeresnet', 'batch_size': 128, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.8989450645955427, 'beta2': 0.9958117445521014, 'lr': 0.0010497858203851631, 'weight_decay': 0.0021788986817215493, 'max_lr': 0.005119536702611704}. Best is trial 20 with value: 93.92.


--------------------------------------------------
trial.number=22
num_epochs: 76
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8892095972869166
beta2: 0.9952335347814336
lr: 0.0012216922003737806
weight_decay: 0.002300171785168988
max_lr: 0.004790854126159991
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/76], Batch [350/352], Train Acc: 44.9598 Loss: 1.5482
  Validation Accuracy after Epoch 1: 54.5600
  Cidar10.1 Accuracy: 44.25
  Epoch [2/76], Batch [350/352], Train Acc: 61.6786 Loss: 1.2348
  Validation Accuracy after Epoch 2: 63.5000
  Cidar10.1 Accuracy: 51.15
  Epoch [3/76], Batch [350/352], Train Acc: 68.9241 Loss: 1.2612
  Validation Accuracy after Epoch 3: 68.2600
  Cidar10.1 Accuracy: 56.15
  Epoch [4/76], Batch [350/352], Train Acc: 73.3058 Loss: 1.1454
  Validation Accuracy after Epoch 4: 68.9600
  Cidar10.1 Accuracy: 54.65
  Epoch [5/76], Batch [350/352], Train Acc: 75.8192 Lo

[I 2025-03-12 06:32:54,216] Trial 22 finished with value: 93.5 and parameters: {'num_epochs': 76, 'model_type': 'largeresnet', 'batch_size': 128, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.8892095972869166, 'beta2': 0.9952335347814336, 'lr': 0.0012216922003737806, 'weight_decay': 0.002300171785168988, 'max_lr': 0.004790854126159991}. Best is trial 20 with value: 93.92.


--------------------------------------------------
trial.number=23
num_epochs: 63
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9066341681534535
beta2: 0.9945001521636125
lr: 0.0012798428035638134
weight_decay: 0.0997729796430048
max_lr: 0.0033205714453662747
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/63], Batch [350/352], Train Acc: 43.8281 Loss: 1.6026
  Validation Accuracy after Epoch 1: 52.2600
  Cidar10.1 Accuracy: 44.2
  Epoch [2/63], Batch [350/352], Train Acc: 59.7076 Loss: 1.3059
  Validation Accuracy after Epoch 2: 61.9600
  Cidar10.1 Accuracy: 49.8
  Epoch [3/63], Batch [350/352], Train Acc: 66.3884 Loss: 1.3199
  Validation Accuracy after Epoch 3: 65.2200
  Cidar10.1 Accuracy: 53.1
  Epoch [4/63], Batch [350/352], Train Acc: 71.2656 Loss: 1.1055
  Validation Accuracy after Epoch 4: 70.0600
  Cidar10.1 Accuracy: 55.5
  Epoch [5/63], Batch [350/352], Train Acc: 74.0692 Loss: 1

[I 2025-03-12 06:35:01,339] Trial 23 pruned. 


--------------------------------------------------
trial.number=24
num_epochs: 74
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9237793377344584
beta2: 0.9936891185794973
lr: 0.0006419363282453342
weight_decay: 0.002131817133368106
max_lr: 0.005485628581173591
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/74], Batch [350/352], Train Acc: 45.0446 Loss: 1.5614
  Validation Accuracy after Epoch 1: 53.8400
  Cidar10.1 Accuracy: 43.7
  Epoch [2/74], Batch [350/352], Train Acc: 61.8237 Loss: 1.3453
  Validation Accuracy after Epoch 2: 63.2400
  Cidar10.1 Accuracy: 51.8
  Epoch [3/74], Batch [350/352], Train Acc: 69.4308 Loss: 1.0852
  Validation Accuracy after Epoch 3: 68.4400
  Cidar10.1 Accuracy: 53.8
  Epoch [4/74], Batch [350/352], Train Acc: 73.6317 Loss: 1.1833
  Validation Accuracy after Epoch 4: 67.6400
  Cidar10.1 Accuracy: 55.85
  Epoch [5/74], Batch [350/352], Train Acc: 76.1384 Loss:

[I 2025-03-12 06:46:53,918] Trial 24 finished with value: 93.76 and parameters: {'num_epochs': 74, 'model_type': 'largeresnet', 'batch_size': 128, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.9237793377344584, 'beta2': 0.9936891185794973, 'lr': 0.0006419363282453342, 'weight_decay': 0.002131817133368106, 'max_lr': 0.005485628581173591}. Best is trial 20 with value: 93.92.


--------------------------------------------------
trial.number=25
num_epochs: 84
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9428400373757252
beta2: 0.9937819065658108
lr: 0.002056578257710991
weight_decay: 0.013935024052936505
max_lr: 0.006501391629359601
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/84], Batch [350/352], Train Acc: 45.3571 Loss: 1.5528
  Validation Accuracy after Epoch 1: 53.6200
  Cidar10.1 Accuracy: 44.7
  Epoch [2/84], Batch [350/352], Train Acc: 62.7031 Loss: 1.2012
  Validation Accuracy after Epoch 2: 65.3400
  Cidar10.1 Accuracy: 52.5
  Epoch [3/84], Batch [350/352], Train Acc: 70.4732 Loss: 1.0393
  Validation Accuracy after Epoch 3: 71.6800
  Cidar10.1 Accuracy: 57.55
  Epoch [4/84], Batch [350/352], Train Acc: 74.1228 Loss: 0.9924
  Validation Accuracy after Epoch 4: 68.2000
  Cidar10.1 Accuracy: 58.65
  Epoch [5/84], Batch [350/352], Train Acc: 76.9888 Loss:

[I 2025-03-12 06:50:55,214] Trial 25 pruned. 


--------------------------------------------------
trial.number=26
num_epochs: 94
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9237005066109784
beta2: 0.9916430427463933
lr: 0.0005442520140501909
weight_decay: 0.004998604466921799
max_lr: 0.005958193621535537
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/94], Batch [350/352], Train Acc: 45.0156 Loss: 1.5414
  Validation Accuracy after Epoch 1: 55.3200
  Cidar10.1 Accuracy: 45.25
  Epoch [2/94], Batch [350/352], Train Acc: 62.5893 Loss: 1.3104
  Validation Accuracy after Epoch 2: 65.1400
  Cidar10.1 Accuracy: 54.2
  Epoch [3/94], Batch [350/352], Train Acc: 69.6451 Loss: 1.2034
  Validation Accuracy after Epoch 3: 72.7200
  Cidar10.1 Accuracy: 60.1
  Epoch [4/94], Batch [350/352], Train Acc: 74.0089 Loss: 1.1189
  Validation Accuracy after Epoch 4: 72.0000
  Cidar10.1 Accuracy: 59.15
  Epoch [5/94], Batch [350/352], Train Acc: 76.5737 Loss

[I 2025-03-12 06:55:25,446] Trial 26 pruned. 


--------------------------------------------------
trial.number=27
num_epochs: 51
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9302203578127347
beta2: 0.9940021142253973
lr: 0.0007164495594447045
weight_decay: 0.006508624466658578
max_lr: 0.0036585600818756277
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/51], Batch [350/352], Train Acc: 44.0045 Loss: 1.5973
  Validation Accuracy after Epoch 1: 52.3000
  Cidar10.1 Accuracy: 43.3
  Epoch [2/51], Batch [350/352], Train Acc: 60.0982 Loss: 1.4598
  Validation Accuracy after Epoch 2: 60.1400
  Cidar10.1 Accuracy: 47.95
  Epoch [3/51], Batch [350/352], Train Acc: 67.4732 Loss: 1.1577
  Validation Accuracy after Epoch 3: 63.9600
  Cidar10.1 Accuracy: 51.9
  Epoch [4/51], Batch [350/352], Train Acc: 72.2455 Loss: 1.1026
  Validation Accuracy after Epoch 4: 72.4000
  Cidar10.1 Accuracy: 58.35
  Epoch [5/51], Batch [350/352], Train Acc: 75.1071 Los

[I 2025-03-12 07:03:38,311] Trial 27 finished with value: 93.46 and parameters: {'num_epochs': 51, 'model_type': 'largeresnet', 'batch_size': 128, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.9302203578127347, 'beta2': 0.9940021142253973, 'lr': 0.0007164495594447045, 'weight_decay': 0.006508624466658578, 'max_lr': 0.0036585600818756277}. Best is trial 20 with value: 93.92.


--------------------------------------------------
trial.number=28
num_epochs: 73
model_type: largeresnet
batch_size: 128
optimizer_type: SGD
scheduler_type: CosineAnnealingLR
lr: 0.00238369333690372
momentum: 0.8081171763424195
weight_decay: 0.0003351100948064262
eta_min: 2.2358247100084492e-05
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/73], Batch [350/352], Train Acc: 31.0871 Loss: 1.8207
  Validation Accuracy after Epoch 1: 38.4200
  Trial pruned due to no improvement.


[I 2025-03-12 07:03:49,277] Trial 28 pruned. 


--------------------------------------------------
trial.number=29
num_epochs: 62
model_type: smallresnet
batch_size: 64
optimizer_type: Adam
scheduler_type: ReduceLROnPlateau
beta1: 0.9340475997279718
beta2: 0.9937511673456696
lr: 0.0002704457631765823
weight_decay: 0.0005039225923947062
factor: 0.4808583638157734
patience: 5
threshold: 0.01247080576255135
trainable_parameters: 2998402
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/62], Batch [700/704], Train Acc: 38.0603 Loss: 1.5746
  Validation Accuracy after Epoch 1: 47.7200
  Cidar10.1 Accuracy: 39.2
  Epoch [2/62], Batch [700/704], Train Acc: 56.1830 Loss: 1.2488
  Validation Accuracy after Epoch 2: 58.2600
  Cidar10.1 Accuracy: 50.5
  Epoch [3/62], Batch [700/704], Train Acc: 65.6205 Loss: 1.2196
  Validation Accuracy after Epoch 3: 68.0400
  Cidar10.1 Accuracy: 55.25
  Epoch [4/62], Batch [700/704], Train Acc: 71.1897 Loss: 1.0720
  Validation Accuracy after Epoch 4: 72.7400
  Cidar10.1 Accuracy: 58.55
  Epoch [

[I 2025-03-12 07:08:56,614] Trial 29 pruned. 


--------------------------------------------------
trial.number=30
num_epochs: 65
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: CosineAnnealingLR
beta1: 0.9079294319315979
beta2: 0.9973713639817936
lr: 0.0017555866700141353
weight_decay: 0.012525383077033568
eta_min: 0.0009628102373520947
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/65], Batch [350/352], Train Acc: 42.1138 Loss: 1.5540
  Validation Accuracy after Epoch 1: 54.0400
  Cidar10.1 Accuracy: 43.1
  Epoch [2/65], Batch [350/352], Train Acc: 60.7946 Loss: 1.2801
  Validation Accuracy after Epoch 2: 63.9600
  Cidar10.1 Accuracy: 52.85
  Epoch [3/65], Batch [350/352], Train Acc: 68.3259 Loss: 1.1169
  Validation Accuracy after Epoch 3: 65.4200
  Cidar10.1 Accuracy: 53.4
  Epoch [4/65], Batch [350/352], Train Acc: 73.8304 Loss: 1.1034
  Validation Accuracy after Epoch 4: 71.6800
  Cidar10.1 Accuracy: 59.75
  Epoch [5/65], Batch [350/352], Train Acc: 77.

[I 2025-03-12 07:15:02,435] Trial 30 pruned. 


--------------------------------------------------
trial.number=31
num_epochs: 74
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8921002719215918
beta2: 0.9954717049767702
lr: 0.000988724043189535
weight_decay: 0.002128337349619262
max_lr: 0.005342362606133308
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/74], Batch [350/352], Train Acc: 45.1205 Loss: 1.5639
  Validation Accuracy after Epoch 1: 53.1200
  Cidar10.1 Accuracy: 44.9
  Epoch [2/74], Batch [350/352], Train Acc: 62.0112 Loss: 1.2907
  Validation Accuracy after Epoch 2: 64.2800
  Cidar10.1 Accuracy: 50.85
  Epoch [3/74], Batch [350/352], Train Acc: 69.3839 Loss: 1.1386
  Validation Accuracy after Epoch 3: 67.6400
  Cidar10.1 Accuracy: 57.0
  Epoch [4/74], Batch [350/352], Train Acc: 73.4353 Loss: 1.1005
  Validation Accuracy after Epoch 4: 72.0400
  Cidar10.1 Accuracy: 56.4
  Epoch [5/74], Batch [350/352], Train Acc: 76.0223 Loss: 

[I 2025-03-12 07:26:55,772] Trial 31 finished with value: 93.5 and parameters: {'num_epochs': 74, 'model_type': 'largeresnet', 'batch_size': 128, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.8921002719215918, 'beta2': 0.9954717049767702, 'lr': 0.000988724043189535, 'weight_decay': 0.002128337349619262, 'max_lr': 0.005342362606133308}. Best is trial 20 with value: 93.92.


--------------------------------------------------
trial.number=32
num_epochs: 83
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8763810986600915
beta2: 0.9955351356527492
lr: 0.0014115182488918117
weight_decay: 0.001647284097663436
max_lr: 0.005371114620981091
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/83], Batch [350/352], Train Acc: 45.0446 Loss: 1.5581
  Validation Accuracy after Epoch 1: 54.1000
  Cidar10.1 Accuracy: 45.85
  Epoch [2/83], Batch [350/352], Train Acc: 62.0603 Loss: 1.3650
  Validation Accuracy after Epoch 2: 62.7400
  Cidar10.1 Accuracy: 48.55
  Epoch [3/83], Batch [350/352], Train Acc: 69.3348 Loss: 1.1520
  Validation Accuracy after Epoch 3: 70.6600
  Cidar10.1 Accuracy: 57.7
  Epoch [4/83], Batch [350/352], Train Acc: 73.6004 Loss: 1.0338
  Validation Accuracy after Epoch 4: 72.8800
  Cidar10.1 Accuracy: 60.15
  Epoch [5/83], Batch [350/352], Train Acc: 75.9241 Los

[I 2025-03-12 07:33:31,171] Trial 32 pruned. 


--------------------------------------------------
trial.number=33
num_epochs: 79
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9001718422710838
beta2: 0.9940176430135621
lr: 0.0006715050944127252
weight_decay: 0.002985920992930216
max_lr: 0.007472250594440746
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/79], Batch [350/352], Train Acc: 45.3906 Loss: 1.5493
  Validation Accuracy after Epoch 1: 54.6000
  Cidar10.1 Accuracy: 44.35
  Epoch [2/79], Batch [350/352], Train Acc: 63.0179 Loss: 1.2564
  Validation Accuracy after Epoch 2: 63.3200
  Cidar10.1 Accuracy: 51.65
  Epoch [3/79], Batch [350/352], Train Acc: 70.1406 Loss: 1.1126
  Validation Accuracy after Epoch 3: 71.6800
  Cidar10.1 Accuracy: 57.95
  Epoch [4/79], Batch [350/352], Train Acc: 74.2812 Loss: 1.0171
  Validation Accuracy after Epoch 4: 70.1600
  Cidar10.1 Accuracy: 55.2
  Epoch [5/79], Batch [350/352], Train Acc: 76.6942 Los

[I 2025-03-12 07:37:22,741] Trial 33 pruned. 


--------------------------------------------------
trial.number=34
num_epochs: 72
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.884271023376763
beta2: 0.9951349307077931
lr: 0.0004005652168834438
weight_decay: 0.0015319351676706355
max_lr: 0.0026813770655853665
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/72], Batch [350/352], Train Acc: 43.1585 Loss: 1.6426
  Validation Accuracy after Epoch 1: 50.4000
  Cidar10.1 Accuracy: 41.9
  Epoch [2/72], Batch [350/352], Train Acc: 58.1853 Loss: 1.4540
  Validation Accuracy after Epoch 2: 59.6600
  Cidar10.1 Accuracy: 45.6
  Epoch [3/72], Batch [350/352], Train Acc: 65.2254 Loss: 1.1853
  Validation Accuracy after Epoch 3: 64.3800
  Cidar10.1 Accuracy: 53.75
  Epoch [4/72], Batch [350/352], Train Acc: 70.2991 Loss: 1.1232
  Validation Accuracy after Epoch 4: 71.7200
  Cidar10.1 Accuracy: 60.25
  Epoch [5/72], Batch [350/352], Train Acc: 73.7254 Los

[I 2025-03-12 07:48:54,056] Trial 34 finished with value: 93.62 and parameters: {'num_epochs': 72, 'model_type': 'largeresnet', 'batch_size': 128, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.884271023376763, 'beta2': 0.9951349307077931, 'lr': 0.0004005652168834438, 'weight_decay': 0.0015319351676706355, 'max_lr': 0.0026813770655853665}. Best is trial 20 with value: 93.92.


--------------------------------------------------
trial.number=35
num_epochs: 56
model_type: efficientnet
batch_size: 128
optimizer_type: Adam
scheduler_type: ReduceLROnPlateau
beta1: 0.9236113193432982
beta2: 0.9971087219192833
lr: 0.0036197521756823913
weight_decay: 1.9304968623112836e-06
factor: 0.395420447090195
patience: 12
threshold: 0.04073536840989944
trainable_parameters: 3599686
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/56], Batch [350/352], Train Acc: 28.8973 Loss: 1.6696
  Validation Accuracy after Epoch 1: 38.5400
  Trial pruned due to no improvement.


[I 2025-03-12 07:49:12,593] Trial 35 pruned. 


--------------------------------------------------
trial.number=36
num_epochs: 77
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8691036942242727
beta2: 0.9980585970032295
lr: 0.0007645024828404152
weight_decay: 0.0026090938989054946
max_lr: 0.004431443918776483
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/77], Batch [350/352], Train Acc: 44.5915 Loss: 1.5845
  Validation Accuracy after Epoch 1: 53.6600
  Cidar10.1 Accuracy: 45.15
  Epoch [2/77], Batch [350/352], Train Acc: 61.3036 Loss: 1.2755
  Validation Accuracy after Epoch 2: 61.1400
  Cidar10.1 Accuracy: 48.3
  Epoch [3/77], Batch [350/352], Train Acc: 68.7388 Loss: 1.1205
  Validation Accuracy after Epoch 3: 68.8800
  Cidar10.1 Accuracy: 56.05
  Epoch [4/77], Batch [350/352], Train Acc: 72.8705 Loss: 1.0422
  Validation Accuracy after Epoch 4: 68.2600
  Trial pruned due to no improvement.


[I 2025-03-12 07:49:52,643] Trial 36 pruned. 


--------------------------------------------------
trial.number=37
num_epochs: 86
model_type: smallresnet
batch_size: 64
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9076854929173471
beta2: 0.9920098939691865
lr: 0.000940856322310039
weight_decay: 0.0016902872483510808
max_lr: 0.00621611095613139
trainable_parameters: 2998402
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/86], Batch [700/704], Train Acc: 38.1272 Loss: 1.5431
  Validation Accuracy after Epoch 1: 46.7200
  Trial pruned due to no improvement.


[I 2025-03-12 07:50:11,136] Trial 37 pruned. 


--------------------------------------------------
trial.number=38
num_epochs: 98
model_type: largeresnet
batch_size: 128
optimizer_type: Adam
scheduler_type: CosineAnnealingLR
beta1: 0.9196180293656578
beta2: 0.9961911456860105
lr: 0.003991954007558586
weight_decay: 0.0005701230415544971
eta_min: 1.0625331144193528e-05
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/98], Batch [350/352], Train Acc: 33.4799 Loss: 1.7667
  Validation Accuracy after Epoch 1: 37.9000
  Trial pruned due to no improvement.


[I 2025-03-12 07:50:22,186] Trial 38 pruned. 


--------------------------------------------------
trial.number=39
num_epochs: 80
model_type: efficientnet
batch_size: 256
optimizer_type: AdamW
scheduler_type: ReduceLROnPlateau
beta1: 0.9010629419283134
beta2: 0.9943807084798203
lr: 0.00020755762816292655
weight_decay: 0.0057415202671418685
factor: 0.2839614991187505
patience: 12
threshold: 0.04758901736319455
trainable_parameters: 3599686
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/80], Batch [170/176], Train Acc: 20.9858 Loss: 2.0030
  Validation Accuracy after Epoch 1: 31.1800
  Trial pruned due to no improvement.


[I 2025-03-12 07:50:34,645] Trial 39 pruned. 


--------------------------------------------------
trial.number=40
num_epochs: 90
model_type: largeresnet
batch_size: 128
optimizer_type: SGD
scheduler_type: OneCycleLR
lr: 0.027964770773100004
momentum: 0.9042583323913435
weight_decay: 6.123421436856628e-05
max_lr: 0.05758463112165749
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/90], Batch [350/352], Train Acc: 40.6696 Loss: 1.6255
  Validation Accuracy after Epoch 1: 49.1800
  Trial pruned due to no improvement.


[I 2025-03-12 07:50:45,745] Trial 40 pruned. 


--------------------------------------------------
trial.number=41
num_epochs: 71
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8837644722680488
beta2: 0.9951729922333051
lr: 0.0004160919351921743
weight_decay: 0.0017044795021567002
max_lr: 0.00183825401141263
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/71], Batch [350/352], Train Acc: 41.4509 Loss: 1.7010
  Validation Accuracy after Epoch 1: 47.2400
  Trial pruned due to no improvement.


[I 2025-03-12 07:50:56,999] Trial 41 pruned. 


--------------------------------------------------
trial.number=42
num_epochs: 74
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8938022965540827
beta2: 0.9956319331245538
lr: 0.00046342202431285637
weight_decay: 0.0019033866361377423
max_lr: 0.0010529073568140606
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/74], Batch [350/352], Train Acc: 38.7121 Loss: 1.6966
  Validation Accuracy after Epoch 1: 44.0400
  Trial pruned due to no improvement.


[I 2025-03-12 07:51:07,944] Trial 42 pruned. 


--------------------------------------------------
trial.number=43
num_epochs: 66
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8827833613868782
beta2: 0.9948819985082877
lr: 0.0003217088697463318
weight_decay: 0.00140822873912098
max_lr: 0.0028305892300322334
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/66], Batch [350/352], Train Acc: 43.3527 Loss: 1.6379
  Validation Accuracy after Epoch 1: 49.4200
  Trial pruned due to no improvement.


[I 2025-03-12 07:51:19,038] Trial 43 pruned. 


--------------------------------------------------
trial.number=44
num_epochs: 82
model_type: smallresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8696683153987115
beta2: 0.9933488909899162
lr: 0.0009424375622591268
weight_decay: 0.0038466011240915064
max_lr: 0.003845667803896496
trainable_parameters: 2998402
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/82], Batch [350/352], Train Acc: 33.5781 Loss: 1.7974
  Validation Accuracy after Epoch 1: 41.1600
  Trial pruned due to no improvement.


[I 2025-03-12 07:51:32,056] Trial 44 pruned. 


--------------------------------------------------
trial.number=45
num_epochs: 60
model_type: largeresnet
batch_size: 256
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8886740104738021
beta2: 0.9942850551333878
lr: 0.0005842473726413349
weight_decay: 0.0028001051402299806
max_lr: 0.0050481093738457905
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/60], Batch [170/176], Train Acc: 42.4724 Loss: 1.5731
  Validation Accuracy after Epoch 1: 50.1800
  Trial pruned due to no improvement.


[I 2025-03-12 07:51:43,129] Trial 45 pruned. 


--------------------------------------------------
trial.number=46
num_epochs: 69
model_type: efficientnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8981082613493693
beta2: 0.990535915134037
lr: 0.00020903391918654964
weight_decay: 0.001345224847402742
max_lr: 0.0024101471229715115
trainable_parameters: 3599686
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/69], Batch [350/352], Train Acc: 19.5625 Loss: 2.0142
  Validation Accuracy after Epoch 1: 29.1400
  Trial pruned due to no improvement.


[I 2025-03-12 07:52:00,518] Trial 46 pruned. 


--------------------------------------------------
trial.number=47
num_epochs: 54
model_type: largeresnet
batch_size: 64
optimizer_type: Adam
scheduler_type: ReduceLROnPlateau
beta1: 0.8502254994861959
beta2: 0.9967344893434553
lr: 0.0014869594084288497
weight_decay: 0.0006995646411842861
factor: 0.2548562311613708
patience: 15
threshold: 0.09236393370963508
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/54], Batch [700/704], Train Acc: 43.0022 Loss: 1.4216
  Validation Accuracy after Epoch 1: 45.6800
  Trial pruned due to no improvement.


[I 2025-03-12 07:52:12,922] Trial 47 pruned. 


--------------------------------------------------
trial.number=48
num_epochs: 77
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9124125557788891
beta2: 0.9959659941716346
lr: 2.3400960978663865e-05
weight_decay: 0.02777045789100667
max_lr: 0.006012142952119021
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/77], Batch [350/352], Train Acc: 45.1897 Loss: 1.5633
  Validation Accuracy after Epoch 1: 53.6800
  Cidar10.1 Accuracy: 45.0
  Epoch [2/77], Batch [350/352], Train Acc: 62.2545 Loss: 1.2840
  Validation Accuracy after Epoch 2: 66.2600
  Cidar10.1 Accuracy: 51.75
  Epoch [3/77], Batch [350/352], Train Acc: 69.9621 Loss: 1.2777
  Validation Accuracy after Epoch 3: 66.6200
  Cidar10.1 Accuracy: 54.7
  Epoch [4/77], Batch [350/352], Train Acc: 73.8772 Loss: 1.0304
  Validation Accuracy after Epoch 4: 75.7600
  Cidar10.1 Accuracy: 62.8
  Epoch [5/77], Batch [350/352], Train Acc: 76.4576 Loss:

[I 2025-03-12 07:55:54,199] Trial 48 pruned. 


--------------------------------------------------
trial.number=49
num_epochs: 72
model_type: largeresnet
batch_size: 256
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8747378195132446
beta2: 0.9949716424990954
lr: 0.000489195494827169
weight_decay: 0.0032382862422623135
max_lr: 0.007359554936660896
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/72], Batch [170/176], Train Acc: 43.4145 Loss: 1.5123
  Validation Accuracy after Epoch 1: 52.3800
  Cidar10.1 Accuracy: 42.45
  Epoch [2/72], Batch [170/176], Train Acc: 60.0483 Loss: 1.2626
  Validation Accuracy after Epoch 2: 62.2000
  Cidar10.1 Accuracy: 49.25
  Epoch [3/72], Batch [170/176], Train Acc: 67.2082 Loss: 1.1931
  Validation Accuracy after Epoch 3: 68.0800
  Cidar10.1 Accuracy: 55.4
  Epoch [4/72], Batch [170/176], Train Acc: 72.4449 Loss: 1.1066
  Validation Accuracy after Epoch 4: 69.9600
  Cidar10.1 Accuracy: 58.65
  Epoch [5/72], Batch [170/176], Train Acc: 75.4527 Los

[I 2025-03-12 07:56:42,929] Trial 49 pruned. 


--------------------------------------------------
trial.number=50
num_epochs: 64
model_type: largeresnet
batch_size: 128
optimizer_type: SGD
scheduler_type: OneCycleLR
lr: 0.0017464490337696342
momentum: 0.8863135872978273
weight_decay: 0.00041514290082108615
max_lr: 0.0476661958017129
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/64], Batch [350/352], Train Acc: 39.7902 Loss: 1.6639
  Validation Accuracy after Epoch 1: 47.1200
  Trial pruned due to no improvement.


[I 2025-03-12 07:56:54,053] Trial 50 pruned. 


--------------------------------------------------
trial.number=51
num_epochs: 76
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8885776387771799
beta2: 0.9951495644701408
lr: 0.0011159502593613038
weight_decay: 0.002247646316542258
max_lr: 0.004346873790834831
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/76], Batch [350/352], Train Acc: 44.6897 Loss: 1.5811
  Validation Accuracy after Epoch 1: 54.8400
  Cidar10.1 Accuracy: 45.55
  Epoch [2/76], Batch [350/352], Train Acc: 61.2500 Loss: 1.3459
  Validation Accuracy after Epoch 2: 63.1800
  Cidar10.1 Accuracy: 51.95
  Epoch [3/76], Batch [350/352], Train Acc: 68.4576 Loss: 1.1335
  Validation Accuracy after Epoch 3: 70.0600
  Cidar10.1 Accuracy: 57.25
  Epoch [4/76], Batch [350/352], Train Acc: 72.7746 Loss: 1.0476
  Validation Accuracy after Epoch 4: 70.2400
  Cidar10.1 Accuracy: 57.7
  Epoch [5/76], Batch [350/352], Train Acc: 75.3549 Los

[I 2025-03-12 08:01:25,492] Trial 51 pruned. 


--------------------------------------------------
trial.number=52
num_epochs: 86
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8896254073135439
beta2: 0.993515144656502
lr: 0.0008102148743091755
weight_decay: 0.0023995388962418734
max_lr: 0.004840680956060926
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/86], Batch [350/352], Train Acc: 45.1362 Loss: 1.5498
  Validation Accuracy after Epoch 1: 54.1400
  Cidar10.1 Accuracy: 44.65
  Epoch [2/86], Batch [350/352], Train Acc: 61.8616 Loss: 1.1796
  Validation Accuracy after Epoch 2: 63.3600
  Cidar10.1 Accuracy: 50.55
  Epoch [3/86], Batch [350/352], Train Acc: 68.9554 Loss: 1.2551
  Validation Accuracy after Epoch 3: 67.3000
  Cidar10.1 Accuracy: 54.25
  Epoch [4/86], Batch [350/352], Train Acc: 73.2946 Loss: 1.0155
  Validation Accuracy after Epoch 4: 72.2400
  Cidar10.1 Accuracy: 61.4
  Epoch [5/86], Batch [350/352], Train Acc: 75.8862 Los

[I 2025-03-12 08:02:34,716] Trial 52 pruned. 


--------------------------------------------------
trial.number=53
num_epochs: 68
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8805357481275744
beta2: 0.9955924937929359
lr: 0.0012276282152372906
weight_decay: 0.001972683877992677
max_lr: 0.005663524668808249
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/68], Batch [350/352], Train Acc: 45.1830 Loss: 1.5523
  Validation Accuracy after Epoch 1: 52.1200
  Cidar10.1 Accuracy: 43.9
  Epoch [2/68], Batch [350/352], Train Acc: 62.4353 Loss: 1.2947
  Validation Accuracy after Epoch 2: 66.3200
  Cidar10.1 Accuracy: 53.8
  Epoch [3/68], Batch [350/352], Train Acc: 69.9844 Loss: 1.1308
  Validation Accuracy after Epoch 3: 67.9800
  Cidar10.1 Accuracy: 53.6
  Epoch [4/68], Batch [350/352], Train Acc: 74.0379 Loss: 1.0700
  Validation Accuracy after Epoch 4: 70.0600
  Cidar10.1 Accuracy: 57.9
  Epoch [5/68], Batch [350/352], Train Acc: 76.1540 Loss: 

[I 2025-03-12 08:13:31,906] Trial 53 finished with value: 93.46 and parameters: {'num_epochs': 68, 'model_type': 'largeresnet', 'batch_size': 128, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.8805357481275744, 'beta2': 0.9955924937929359, 'lr': 0.0012276282152372906, 'weight_decay': 0.001972683877992677, 'max_lr': 0.005663524668808249}. Best is trial 20 with value: 93.92.


--------------------------------------------------
trial.number=54
num_epochs: 80
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.903403689050069
beta2: 0.9944854353997653
lr: 0.0006508055442592868
weight_decay: 0.00133983275181981
max_lr: 0.004827358361331066
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/80], Batch [350/352], Train Acc: 45.1116 Loss: 1.5649
  Validation Accuracy after Epoch 1: 53.6600
  Cidar10.1 Accuracy: 44.65
  Epoch [2/80], Batch [350/352], Train Acc: 61.7656 Loss: 1.3281
  Validation Accuracy after Epoch 2: 63.2600
  Cidar10.1 Accuracy: 51.1
  Epoch [3/80], Batch [350/352], Train Acc: 68.6920 Loss: 1.2910
  Validation Accuracy after Epoch 3: 63.5200
  Trial pruned due to no improvement.


[I 2025-03-12 08:14:02,098] Trial 54 pruned. 


--------------------------------------------------
trial.number=55
num_epochs: 75
model_type: smallresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8936407149473528
beta2: 0.9966856937168362
lr: 0.0003744134239293919
weight_decay: 0.003928592864932334
max_lr: 0.008498398639597823
trainable_parameters: 2998402
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/75], Batch [350/352], Train Acc: 36.8460 Loss: 1.7281
  Validation Accuracy after Epoch 1: 45.0800
  Trial pruned due to no improvement.


[I 2025-03-12 08:14:15,464] Trial 55 pruned. 


--------------------------------------------------
trial.number=56
num_epochs: 70
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: CosineAnnealingLR
beta1: 0.8965602798454313
beta2: 0.9926179784685416
lr: 0.0011127033334149478
weight_decay: 0.0026495466031555013
eta_min: 0.0009446330554198705
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/70], Batch [350/352], Train Acc: 44.5290 Loss: 1.4570
  Validation Accuracy after Epoch 1: 55.6200
  Cidar10.1 Accuracy: 43.1
  Epoch [2/70], Batch [350/352], Train Acc: 62.9464 Loss: 1.1584
  Validation Accuracy after Epoch 2: 65.4200
  Cidar10.1 Accuracy: 52.7
  Epoch [3/70], Batch [350/352], Train Acc: 70.3750 Loss: 1.2020
  Validation Accuracy after Epoch 3: 72.2000
  Cidar10.1 Accuracy: 59.3
  Epoch [4/70], Batch [350/352], Train Acc: 75.7545 Loss: 1.0654
  Validation Accuracy after Epoch 4: 74.2200
  Cidar10.1 Accuracy: 62.15
  Epoch [5/70], Batch [350/352], Train Acc: 78.

### Resume study
Helps run more studies since we only have 4 hour time limit

In [4]:
# study_2025-03-11_16-30-52 = peaks at 93% without SWA or Lookahead
study_name = "study_2025-03-12_02-29-51"

# Load and continue running trials
study = optuna.create_study(
    study_name=study_name,
    storage="sqlite:///study.db",
    direction="maximize",
    load_if_exists=True
)

study.optimize(objective, n_trials=25)  # Run another batch
print("Continued Study:")
print("Best trial:", study.best_trial.number)
print("Best hyperparameters:", study.best_params)
print("Best validation accuracy:", study.best_value)

[I 2025-03-12 10:04:13,481] Using an existing study with name 'study_2025-03-12_02-29-51' instead of creating a new one.


--------------------------------------------------
trial.number=98
num_epochs: 72
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8797085934596846
beta2: 0.9949971606374951
lr: 0.0004585314299323415
weight_decay: 0.0014826433974480398
max_lr: 0.007995567499122606
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/72], Batch [350/352], Train Acc: 45.5223 Loss: 1.5296
  Validation Accuracy after Epoch 1: 54.1200
  Cidar10.1 Accuracy: 45.0
  Epoch [2/72], Batch [350/352], Train Acc: 63.0960 Loss: 1.4150
  Validation Accuracy after Epoch 2: 62.1200
  Cidar10.1 Accuracy: 47.45
  Epoch [3/72], Batch [350/352], Train Acc: 70.1987 Loss: 1.0942
  Validation Accuracy after Epoch 3: 68.2400
  Cidar10.1 Accuracy: 52.65
  Epoch [4/72], Batch [350/352], Train Acc: 74.2098 Loss: 1.1175
  Validation Accuracy after Epoch 4: 70.5200
  Cidar10.1 Accuracy: 57.95
  Epoch [5/72], Batch [350/352], Train Acc: 76.7701 Lo

[I 2025-03-12 10:05:05,184] Trial 98 pruned. 


--------------------------------------------------
trial.number=99
num_epochs: 122
model_type: largeresnet
batch_size: 256
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9356739822937291
beta2: 0.9965169612969706
lr: 0.0009038583428644764
weight_decay: 0.002498746736724739
max_lr: 0.009283908511003962
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/122], Batch [170/176], Train Acc: 43.5179 Loss: 1.5090
  Validation Accuracy after Epoch 1: 52.4200
  Cidar10.1 Accuracy: 40.5
  Epoch [2/122], Batch [170/176], Train Acc: 60.9122 Loss: 1.2876
  Validation Accuracy after Epoch 2: 60.6000
  Cidar10.1 Accuracy: 47.85
  Epoch [3/122], Batch [170/176], Train Acc: 68.2100 Loss: 1.0749
  Validation Accuracy after Epoch 3: 69.4600
  Cidar10.1 Accuracy: 56.6
  Epoch [4/122], Batch [170/176], Train Acc: 72.9710 Loss: 1.0929
  Validation Accuracy after Epoch 4: 71.5600
  Cidar10.1 Accuracy: 58.55
  Epoch [5/122], Batch [170/176], Train Acc: 76.296

[I 2025-03-12 10:05:51,497] Trial 99 pruned. 


--------------------------------------------------
trial.number=100
num_epochs: 97
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9054578308096426
beta2: 0.9928351988376284
lr: 0.00039836359566732514
weight_decay: 0.002020023957157541
max_lr: 0.005111543238136063
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/97], Batch [350/352], Train Acc: 44.9397 Loss: 1.5705
  Validation Accuracy after Epoch 1: 53.4800
  Cidar10.1 Accuracy: 44.45
  Epoch [2/97], Batch [350/352], Train Acc: 62.2388 Loss: 1.3454
  Validation Accuracy after Epoch 2: 62.5400
  Cidar10.1 Accuracy: 47.75
  Epoch [3/97], Batch [350/352], Train Acc: 69.3594 Loss: 1.2327
  Validation Accuracy after Epoch 3: 70.8400
  Cidar10.1 Accuracy: 58.25
  Epoch [4/97], Batch [350/352], Train Acc: 73.8371 Loss: 0.9862
  Validation Accuracy after Epoch 4: 64.9400
  Cidar10.1 Accuracy: 53.2
  Epoch [5/97], Batch [350/352], Train Acc: 75.9487 L

[I 2025-03-12 10:07:24,955] Trial 100 pruned. 


--------------------------------------------------
trial.number=101
num_epochs: 76
model_type: smallresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8918593244155517
beta2: 0.9944024312986199
lr: 0.0007119518884413643
weight_decay: 0.002912324293743115
max_lr: 0.003537885816041659
trainable_parameters: 2998402
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/76], Batch [350/352], Train Acc: 32.8504 Loss: 1.8230
  Validation Accuracy after Epoch 1: 42.5200
  Trial pruned due to no improvement.


[I 2025-03-12 10:07:38,232] Trial 101 pruned. 


--------------------------------------------------
trial.number=102
num_epochs: 58
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8983088740571669
beta2: 0.9935912919091471
lr: 0.0001480248733571373
weight_decay: 0.002144226779334068
max_lr: 0.009264621780957272
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/58], Batch [80/88], Train Acc: 36.2451 Loss: 1.6828
  Validation Accuracy after Epoch 1: 44.4400
  Trial pruned due to no improvement.


[I 2025-03-12 10:07:49,131] Trial 102 pruned. 


--------------------------------------------------
trial.number=103
num_epochs: 54
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9084553535104866
beta2: 0.9932877699895527
lr: 1.1717074401882927e-05
weight_decay: 0.00676551637520512
max_lr: 0.009987731930500862
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/54], Batch [80/88], Train Acc: 36.5186 Loss: 1.6868
  Validation Accuracy after Epoch 1: 42.5200
  Trial pruned due to no improvement.


[I 2025-03-12 10:07:59,824] Trial 103 pruned. 


--------------------------------------------------
trial.number=104
num_epochs: 56
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9124646207127496
beta2: 0.9921437608581041
lr: 0.00011898030805969763
weight_decay: 0.008481889158182696
max_lr: 0.0061521201369953
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/56], Batch [80/88], Train Acc: 35.0806 Loss: 1.7035
  Validation Accuracy after Epoch 1: 44.5600
  Trial pruned due to no improvement.


[I 2025-03-12 10:08:10,743] Trial 104 pruned. 


--------------------------------------------------
trial.number=105
num_epochs: 60
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9025263265570616
beta2: 0.9953365586550577
lr: 8.855991039224038e-05
weight_decay: 0.0044659714296329915
max_lr: 0.00980716033445798
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/60], Batch [80/88], Train Acc: 36.3330 Loss: 1.6863
  Validation Accuracy after Epoch 1: 44.5000
  Trial pruned due to no improvement.


[I 2025-03-12 10:08:21,563] Trial 105 pruned. 


--------------------------------------------------
trial.number=106
num_epochs: 74
model_type: largeresnet
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8879282347377033
beta2: 0.9924882304230533
lr: 0.0010471332932495206
weight_decay: 0.0036487773053808137
max_lr: 0.008466196862411854
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/74], Batch [80/88], Train Acc: 40.8325 Loss: 1.6142
  Validation Accuracy after Epoch 1: 48.2200
  Trial pruned due to no improvement.


[I 2025-03-12 10:08:32,319] Trial 106 pruned. 


--------------------------------------------------
trial.number=107
num_epochs: 62
model_type: largeresnet
batch_size: 128
optimizer_type: Adam
scheduler_type: ReduceLROnPlateau
beta1: 0.8748728773927489
beta2: 0.9947434024815887
lr: 4.264504258358601e-05
weight_decay: 0.0008373199051726582
factor: 0.3560793810460522
patience: 16
threshold: 0.05161227131799969
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/62], Batch [350/352], Train Acc: 38.9978 Loss: 1.6984
  Validation Accuracy after Epoch 1: 47.4400
  Trial pruned due to no improvement.


[I 2025-03-12 10:08:43,233] Trial 107 pruned. 


--------------------------------------------------
trial.number=108
num_epochs: 67
model_type: base
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9172077528010634
beta2: 0.9976842212947235
lr: 0.0020705947007980657
weight_decay: 0.0016383963794087523
max_lr: 0.0041080471185484125
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/67], Batch [350/352], Train Acc: 38.8482 Loss: 1.6959
  Validation Accuracy after Epoch 1: 48.0600
  Trial pruned due to no improvement.


[I 2025-03-12 10:08:53,918] Trial 108 pruned. 


--------------------------------------------------
trial.number=109
num_epochs: 88
model_type: efficientnet
batch_size: 64
optimizer_type: SGD
scheduler_type: OneCycleLR
lr: 0.0012196273260883152
momentum: 0.841851006744877
weight_decay: 0.0004675516902296319
max_lr: 0.022229194832149635
trainable_parameters: 3599686
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/88], Batch [700/704], Train Acc: 27.7567 Loss: 1.6920
  Validation Accuracy after Epoch 1: 38.2400
  Trial pruned due to no improvement.


[I 2025-03-12 10:09:24,921] Trial 109 pruned. 


--------------------------------------------------
trial.number=110
num_epochs: 52
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.894764889043421
beta2: 0.9912308584069558
lr: 0.0006205569699526801
weight_decay: 0.010380439919807818
max_lr: 0.004554572280395164
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/52], Batch [350/352], Train Acc: 44.7567 Loss: 1.5697
  Validation Accuracy after Epoch 1: 52.8400
  Cidar10.1 Accuracy: 43.35
  Epoch [2/52], Batch [350/352], Train Acc: 61.3237 Loss: 1.3875
  Validation Accuracy after Epoch 2: 59.2600
  Trial pruned due to no improvement.


[I 2025-03-12 10:09:45,084] Trial 110 pruned. 


--------------------------------------------------
trial.number=111
num_epochs: 71
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: CosineAnnealingLR
beta1: 0.9219296936193148
beta2: 0.9958910529184268
lr: 0.0007810414334461005
weight_decay: 0.002699577040237585
eta_min: 0.00025590242923997366
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/71], Batch [350/352], Train Acc: 44.0045 Loss: 1.4632
  Validation Accuracy after Epoch 1: 57.3800
  Cidar10.1 Accuracy: 45.05
  Epoch [2/71], Batch [350/352], Train Acc: 62.4911 Loss: 1.3459
  Validation Accuracy after Epoch 2: 61.2800
  Cidar10.1 Accuracy: 49.25
  Epoch [3/71], Batch [350/352], Train Acc: 69.8638 Loss: 1.0482
  Validation Accuracy after Epoch 3: 73.4000
  Cidar10.1 Accuracy: 60.0
  Epoch [4/71], Batch [350/352], Train Acc: 75.2455 Loss: 1.0049
  Validation Accuracy after Epoch 4: 72.1800
  Cidar10.1 Accuracy: 59.7
  Epoch [5/71], Batch [350/352], Train Acc: 7

[I 2025-03-12 10:14:52,104] Trial 111 pruned. 


--------------------------------------------------
trial.number=112
num_epochs: 63
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9261927258194593
beta2: 0.995586028892556
lr: 0.00018659042034161868
weight_decay: 0.00401967742922052
max_lr: 0.007067838477091528
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/63], Batch [80/88], Train Acc: 35.4761 Loss: 1.7060
  Validation Accuracy after Epoch 1: 46.0600
  Trial pruned due to no improvement.


[I 2025-03-12 10:15:03,458] Trial 112 pruned. 


--------------------------------------------------
trial.number=113
num_epochs: 73
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9333367137503523
beta2: 0.9937453061933577
lr: 0.0002505885135661564
weight_decay: 0.0053155588087843535
max_lr: 0.007813890183324118
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/73], Batch [80/88], Train Acc: 36.0522 Loss: 1.6936
  Validation Accuracy after Epoch 1: 46.4200
  Trial pruned due to no improvement.


[I 2025-03-12 10:15:14,214] Trial 113 pruned. 


--------------------------------------------------
trial.number=114
num_epochs: 50
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9281776992951007
beta2: 0.9950517011364965
lr: 0.00029937858921139787
weight_decay: 0.003318797420316735
max_lr: 0.006582491994335012
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/50], Batch [80/88], Train Acc: 35.2686 Loss: 1.6987
  Validation Accuracy after Epoch 1: 46.5600
  Trial pruned due to no improvement.


[I 2025-03-12 10:15:24,814] Trial 114 pruned. 


--------------------------------------------------
trial.number=115
num_epochs: 69
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8816564919608195
beta2: 0.9941393895855996
lr: 0.001488581403996354
weight_decay: 0.001827951035585541
max_lr: 0.0053472603373523045
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/69], Batch [80/88], Train Acc: 34.4092 Loss: 1.7223
  Validation Accuracy after Epoch 1: 44.9000
  Trial pruned due to no improvement.


[I 2025-03-12 10:15:35,730] Trial 115 pruned. 


--------------------------------------------------
trial.number=116
num_epochs: 80
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8901217548120502
beta2: 0.9929318441137307
lr: 0.0007035074009781589
weight_decay: 0.0022201852674045607
max_lr: 0.008805814542417086
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/80], Batch [350/352], Train Acc: 45.6451 Loss: 1.4993
  Validation Accuracy after Epoch 1: 53.7800
  Cidar10.1 Accuracy: 44.3
  Epoch [2/80], Batch [350/352], Train Acc: 63.1763 Loss: 1.1626
  Validation Accuracy after Epoch 2: 64.3000
  Cidar10.1 Accuracy: 51.4
  Epoch [3/80], Batch [350/352], Train Acc: 70.6585 Loss: 1.1948
  Validation Accuracy after Epoch 3: 68.1800
  Cidar10.1 Accuracy: 54.1
  Epoch [4/80], Batch [350/352], Train Acc: 74.6786 Loss: 1.0813
  Validation Accuracy after Epoch 4: 73.6800
  Cidar10.1 Accuracy: 61.0
  Epoch [5/80], Batch [350/352], Train Acc: 76.6741 Loss

[I 2025-03-12 10:19:45,807] Trial 116 pruned. 


--------------------------------------------------
trial.number=117
num_epochs: 78
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9376129182505581
beta2: 0.9952771388873903
lr: 0.00042859287357707634
weight_decay: 0.0013625885487977437
max_lr: 0.004859676126701241
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/78], Batch [350/352], Train Acc: 45.0112 Loss: 1.5606
  Validation Accuracy after Epoch 1: 54.4600
  Cidar10.1 Accuracy: 44.05
  Epoch [2/78], Batch [350/352], Train Acc: 61.4040 Loss: 1.2901
  Validation Accuracy after Epoch 2: 65.9600
  Cidar10.1 Accuracy: 53.6
  Epoch [3/78], Batch [350/352], Train Acc: 68.7433 Loss: 1.0609
  Validation Accuracy after Epoch 3: 69.5800
  Cidar10.1 Accuracy: 57.35
  Epoch [4/78], Batch [350/352], Train Acc: 73.2433 Loss: 1.0600
  Validation Accuracy after Epoch 4: 67.9200
  Cidar10.1 Accuracy: 55.8
  Epoch [5/78], Batch [350/352], Train Acc: 75.8326 L

[I 2025-03-12 10:23:38,083] Trial 117 pruned. 


--------------------------------------------------
trial.number=118
num_epochs: 65
model_type: largeresnet
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9321544261242359
beta2: 0.9944390989851679
lr: 0.0009446875065050093
weight_decay: 0.003072598828103059
max_lr: 0.0026428631566659338
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/65], Batch [80/88], Train Acc: 37.6001 Loss: 1.6652
  Validation Accuracy after Epoch 1: 45.7400
  Trial pruned due to no improvement.


[I 2025-03-12 10:23:48,727] Trial 118 pruned. 


--------------------------------------------------
trial.number=119
num_epochs: 76
model_type: largeresnet
batch_size: 256
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8967238158133813
beta2: 0.9947476765978296
lr: 0.0003908780278443123
weight_decay: 0.004287796775144126
max_lr: 0.005925043949960674
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/76], Batch [170/176], Train Acc: 42.7413 Loss: 1.5579
  Validation Accuracy after Epoch 1: 49.5000
  Trial pruned due to no improvement.


[I 2025-03-12 10:23:59,398] Trial 119 pruned. 


--------------------------------------------------
trial.number=120
num_epochs: 55
model_type: base
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9245389031714633
beta2: 0.9939510968796879
lr: 0.00011864304543827257
weight_decay: 0.0058673512915748405
max_lr: 0.009061768585712691
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/55], Batch [350/352], Train Acc: 41.0112 Loss: 1.5671
  Validation Accuracy after Epoch 1: 49.1000
  Trial pruned due to no improvement.


[I 2025-03-12 10:24:10,305] Trial 120 pruned. 


--------------------------------------------------
trial.number=121
num_epochs: 59
model_type: largeresnet
batch_size: 128
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.9029821077514644
beta2: 0.9962220109314475
lr: 0.0005211576578906421
weight_decay: 0.004782487092338308
max_lr: 0.0017541946511082217
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/59], Batch [350/352], Train Acc: 41.1719 Loss: 1.7003
  Validation Accuracy after Epoch 1: 47.2200
  Trial pruned due to no improvement.


[I 2025-03-12 10:24:21,127] Trial 121 pruned. 


--------------------------------------------------
trial.number=122
num_epochs: 51
model_type: base
batch_size: 512
optimizer_type: AdamW
scheduler_type: OneCycleLR
beta1: 0.8998613394428597
beta2: 0.9926581307024805
lr: 0.00011348728256968825
weight_decay: 0.003559142099931068
max_lr: 0.009872217499931642
trainable_parameters: 4903242
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/51], Batch [80/88], Train Acc: 36.3477 Loss: 1.6744
  Validation Accuracy after Epoch 1: 42.6800
  Trial pruned due to no improvement.


[I 2025-03-12 10:24:32,291] Trial 122 pruned. 


Continued Study:
Best trial: 20
Best hyperparameters: {'num_epochs': 79, 'model_type': 'largeresnet', 'batch_size': 128, 'optimizer_type': 'AdamW', 'scheduler_type': 'OneCycleLR', 'beta1': 0.8966741749809605, 'beta2': 0.9953296889087532, 'lr': 0.0009949003229868505, 'weight_decay': 0.002220705473846143, 'max_lr': 0.005605644909963183}
Best validation accuracy: 93.92


### Single Run

In [76]:
# Realistic tranformation for better generalization
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05),  # Mild color variations
    transforms.ToTensor(),
    transforms.Normalize(**cifar_10_mean_std),
])

model = LargeResNet0()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

num_epochs = 79
batch_size = 128
dataset_size = 50_000 * 0.9


# Compute steps per epoch
steps_per_epoch = dataset_size // batch_size  # Integer division
if dataset_size % batch_size != 0:
    steps_per_epoch += 1  # Add 1 step if there's a remainder

optimizer_type = "AdamW"
optimizer_params = {
    "lr":0.0009949003229868505, "weight_decay": 0.002220705473846143, "betas": (0.9267890327351337, 0.9979342136957566)
}
scheduler_type = "OneCycleLR"
scheduler_params = {
    "max_lr": 0.005605644909963183, "steps_per_epoch": int(steps_per_epoch), "epochs": num_epochs, "anneal_strategy": "cos"
}
criterion_params = {"label_smoothing": 0.1}

checkpoint_fp = "studies/study_2025-03-12_02-29-51/checkpoint/trial_20_val_acc_ResNetLarge_93.9200_2025-03-12_06-08-28.pth"
checkpoint = torch.load(checkpoint_fp)
model.load_state_dict(checkpoint)

# single_run(
#     model,
#     train_transform,
#     num_epochs=num_epochs,
#     batch_size=batch_size,
#     optimizer_type=optimizer_type,
#     optimizer_params=optimizer_params,
#     scheduler_type=scheduler_type,
#     scheduler_params=scheduler_params,
#     criterion_params=criterion_params
# )

<All keys matched successfully>

### Train with pseudo labels cifar10.1 test

In [77]:
from cifar10_1_dataloader import get_dataloader_10_1
from data_loader import get_kaggle_test_dataloader
import torch.utils.data as data
from pseudo_labels import generate_pseudo_labels, PseudoDataset, CustomCIFAR10Dataset

pseudo_transform = transforms.Compose([
    transforms.RandAugment(num_ops=2, magnitude=9),  # Stronger augmentation
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(**cifar_10_mean_std)
])

train_loader, valid_loader = get_cifar10_dataloaders(
    transforms.ToTensor(), 
    batch_size=batch_size, 
    num_workers=8, 
    valid_size=0.1, 
    subset_percent=1.0
)
train_dataset = train_loader.dataset

# Convert original dataset into tensors
train_images = torch.stack([train_dataset[i][0] for i in range(len(train_dataset))])  # Stack images
train_labels = torch.tensor([train_dataset[i][1] for i in range(len(train_dataset))], dtype=torch.long)  # Convert labels to tensor
custom_train_dataset = CustomCIFAR10Dataset(train_images, train_labels, transform=train_transform)

# dataloader_10_1 = get_dataloader_10_1()
kaggle_test_data_loader = get_kaggle_test_dataloader()
pseudo_images, pseudo_labels = generate_pseudo_labels(model, kaggle_test_data_loader, threshold=0.85, device=device)
pseudo_images.shape

torch.Size([6623, 3, 32, 32])

In [78]:
# Fine-tune Model with Pseudo Labels
if pseudo_images is not None:
    pseudo_dataset = PseudoDataset(pseudo_images, pseudo_labels, transform=pseudo_transform)
    pseudo_loader = DataLoader(pseudo_dataset, batch_size=batch_size, shuffle=True)

    # Combine CIFAR-10 and pseudo-labeled data
    combined_loader = data.DataLoader(
        data.ConcatDataset([custom_train_dataset, pseudo_dataset]), batch_size=batch_size, shuffle=True)

    optimizer_params["lr"] = 5e-5
    optimizer = optimizer_map[optimizer_type](model.parameters(), **optimizer_params)
    scheduler = scheduler_map[scheduler_type](optimizer, **scheduler_params)
    criterion = nn.CrossEntropyLoss(**criterion_params)
    
    # Training
    best_val_accuracy = train_model(
        model, combined_loader, criterion, optimizer, valid_loader=valid_loader, 
        num_epochs=3, device=device, scheduler=scheduler
    )
    print(best_val_accuracy)

  Epoch [1/5], Batch [400/404], Train Acc: 90.8145 Loss: 0.6419
  Validation Accuracy after Epoch 1: 70.0600
  Cidar10.1 Accuracy: 85.3
  Epoch [2/5], Batch [400/404], Train Acc: 91.6133 Loss: 0.7215
  Validation Accuracy after Epoch 2: 74.8400
  Cidar10.1 Accuracy: 85.6
  Epoch [3/5], Batch [400/404], Train Acc: 92.0957 Loss: 0.6678
  Validation Accuracy after Epoch 3: 75.4600
  Cidar10.1 Accuracy: 85.55
  Epoch [4/5], Batch [400/404], Train Acc: 92.5078 Loss: 0.6540
  Validation Accuracy after Epoch 4: 76.7600
  Cidar10.1 Accuracy: 85.1
  Epoch [5/5], Batch [400/404], Train Acc: 92.8125 Loss: 0.6381
  Validation Accuracy after Epoch 5: 76.3000
  Cidar10.1 Accuracy: 85.15
Best Validation Accuracy: 76.7600

76.76


### Test on CIFAR10.1 data

In [79]:
from cifar10_1_dataloader import get_dataloader_10_1
dataloader_10_1 = get_dataloader_10_1(num_samples=2000)

acc, _ = evaluate_model(model, dataloader_10_1, device)
print("Acc:", acc)

Acc: 85.15


### TTA - on CIFAR10.1

In [80]:
from tta import tta_predict_batched

dataloader_10_1 = get_dataloader_10_1(num_samples=2000)

# Load test data
model.eval()

# Run TTA only on low-confidence predictions
correct, total = 0, 0
for images, labels in dataloader_10_1:
    images, labels = images.to("cuda"), labels.to("cuda")
    preds = tta_predict_batched(model, images, conf_threshold=0.8)  # Apply selective TTA
    predicted_classes = preds.argmax(dim=1)

    total += labels.size(0)
    correct += (predicted_classes == labels).sum().item()

print(f"TTA Accuracy: {100 * correct / total:.2f}%")

TTA Accuracy: 85.75%


### Test on test data

In [81]:
from trainer import evaluate_model
from data_loader import get_test_dataloader

test_loader = get_test_dataloader(use_kaggle=True)
acc, _ = evaluate_model(model, test_loader, device=device)
print("Acc:", acc)

Acc: 93.33


### Load Checkpoint

In [5]:
# model = SmallResNet0()
# model.to(device)

# # best_checkpoint_fp = "checkpoints_study_2025-03-10_19-00-59/model_trial_0_val_acc_0.8604.pth"
# best_checkpoint_fp = "studies/study_2025-03-11_01-51-41/checkpoint/trial_0_val_acc_SmallResNet_86.7000_2025-03-11_02-15-35.pth"

# if not best_checkpoint_fp:
#     checkpoint_dir = f"checkpoints_{study_name}"
#     with open(os.path.join(checkpoint_dir, "study_details.json"), "r") as f:
#         study_details = json.load(f)
#     best_checkpoint_fp = study_details[str(study.best_trial.number)]["checkpoint_path"]

# # Load the latest checkpoint
# checkpoint = torch.load(best_checkpoint_fp)
# model.load_state_dict(checkpoint)

<All keys matched successfully>

### Run model on Kaggle test data

In [31]:
from data_loader import get_kaggle_test_dataloader

In [33]:
# Generate submission file with test data
kaggle_test_loader = get_kaggle_test_dataloader()

model.eval()
predictions = []

with torch.no_grad():
    for images, in kaggle_test_loader:
        images = images.to(device)
        outputs = model(images) 
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())

submission = pd.DataFrame({'ID': np.arange(len(predictions)), 'Labels': predictions})
submission.to_csv('submission.csv', index=False)
print("submission file saved.")

submission file saved.


### Run model on Kaggle test data with TTA

In [None]:
from tta import tta_predict_batched

# dataloader_10_1 = get_dataloader_10_1()
kaggle_test_data_loader = get_kaggle_test_dataloader()

# Load test data
model.eval()

# Run TTA only on low-confidence predictions
correct, total = 0, 0
for images, labels in kaggle_test_data_loader:
    images, labels = images.to("cuda"), labels.to("cuda")
    preds = tta_predict_batched(model, images, conf_threshold=0.8)  # Apply selective TTA
    predicted_classes = preds.argmax(dim=1)

    total += labels.size(0)
    correct += (predicted_classes == labels).sum().item()

print(f"TTA Accuracy: {100 * correct / total:.2f}%")

In [6]:
from data_loader import get_kaggle_test_dataloader
from tta import tta_predict_batched

kaggle_test_loader = get_kaggle_test_dataloader()

In [7]:
model.eval()
predictions = []

with torch.no_grad():
    for images, in kaggle_test_loader:
        images = images.to(device)
        preds = tta_predict_batched(model, images, conf_threshold=0.8)  # Apply selective TTA
        predicted = preds.argmax(dim=1)
        predictions.extend(predicted.cpu().numpy())

submission = pd.DataFrame({'ID': np.arange(len(predictions)), 'Labels': predictions})
submission.to_csv('submission.csv', index=False)
print("submission file saved.")

submission file saved.


In [35]:
# import kaggle
# kaggle.api.competition_submit(
#     file_name="submission.csv",
#     message="0.9365",
#     competition="deep-learning-spring-2025-project-1"
# )

100%|██████████| 67.3k/67.3k [00:00<00:00, 330kB/s]


Successfully submitted to Deep Learning Spring 2025: CIFAR 10 classification