In [1]:
import sys
sys.path.insert(0, 'src/')
from utils import *
from template_attacks import *
from traces_simulator import *
from metrics import *
from scoop import Scoop
from custom_cnn import CNNModel
from custom_mlp import MLPModel

import os
import h5py
import matplotlib.pyplot as plt
from tqdm import tqdm, trange
import torch.nn.functional as F
import random
import optuna
from load_ascad_data import load_ascad_data
from train_model import train_model_h, train_model_classic
import torch
import numpy as np

seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.cuda.manual_seed(seed)
os.environ['PYTHONHASHSEED']=str(seed)
#torch.use_deterministic_algorithms(True)

print('__Python VERSION:', sys.version)
print('__pyTorch VERSION:', torch.__version__)
print('__CUDA VERSION')
from subprocess import call
# call(["nvcc", "--version"]) does not work
! nvcc --version
print('__CUDNN VERSION:', torch.backends.cudnn.version())
print('__Number CUDA Devices:', torch.cuda.device_count())
print('__Devices')
call(["nvidia-smi", "--format=csv", "--query-gpu=index,name,driver_version,memory.total,memory.used,memory.free"])
print('Active CUDA Device: GPU', torch.cuda.current_device())
print ('Available devices ', torch.cuda.device_count())
print ('Current cuda device', torch.cuda.current_device())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

__Python VERSION: 3.11.10 (main, Oct  3 2024, 07:29:13) [GCC 11.2.0]
__pyTorch VERSION: 2.5.1
__CUDA VERSION
/bin/bash: ligne 1: nvcc : commande introuvable


  from .autonotebook import tqdm as notebook_tqdm


__CUDNN VERSION: 90100
__Number CUDA Devices: 1
__Devices
index, name, driver_version, memory.total [MiB], memory.used [MiB], memory.free [MiB]
0, NVIDIA RTX 4500 Ada Generation, 535.183.01, 24570 MiB, 7 MiB, 24216 MiB
Active CUDA Device: GPU 0
Available devices  1
Current cuda device 0


In [2]:
def objective(trial):
    n_classes = 256
    signal_length = 700
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)
    beta1 = trial.suggest_float('beta1', 0.9, 0.999, step=0.001)
    beta2 = trial.suggest_float('beta2', 0.9, 0.999, step=0.001)
    n_epochs = 60
    activation = trial.suggest_categorical('activation', ['ReLU', 'SeLU', 'ELU', 'Tanh'])
    n_linear = trial.suggest_int('n_linear', 2, 8)
    linear_size = trial.suggest_categorical('linear_size', [256])
    input_bn = trial.suggest_categorical('input_bn', [True, False])
    dense_bn = trial.suggest_categorical('dense_bn', [True, False])
    batch_size = trial.suggest_int('batch_size', 32, 512, step=32)
    in_dropout = 0#trial.suggest_float('in_dropout', 0.0, 0.5, step=0.1)
    mid_dropout = 0#trial.suggest_float('mid_dropout', 0.0, 0.5, step=0.1)
    out_dropout = 0#trial.suggest_float('out_dropout', 0.0, 0.5, step=0.1)
    weight_decay = trial.suggest_float('weight_decay', 1e-7, 1, log=True)
    # rho = 0.08#trial.suggest_float('rho', 0.0, 0.1, step=0.01)


    train_loader, attack_loader, _, _, _, _ = load_ascad_data(device, batch_size=batch_size)

    if True:
        model = MLPModel(signal_length=signal_length, 
                         n_classes=n_classes, n_linear=n_linear, 
                         linear_size=linear_size, activation=activation, 
                         input_bn=input_bn, dense_bn=dense_bn).to(device)

        optimizer = Scoop(model.parameters(), lr=lr, 
                          betas=(beta1, beta2), 
                          weight_decay=weight_decay, estimator='low_variance',
                          hessian_iter=1)
        adam_optimizer = torch.optim.Adam(model.parameters(), lr=lr,
                                            betas=(beta1, beta2), weight_decay=weight_decay)

        path_model = 'scoop_vs_adam/ascadv1_fintuning_adam_' + str(trial.number) + '.pt'
        train_loss, val_loss, _ = train_model_classic(model, adam_optimizer, n_epochs, train_loader, attack_loader, verbose=True, path=path_model, device=device, MLP=False, finetuning=True, entropy=8)
        trial.set_user_attr('val_loss', val_loss)
        trial.set_user_attr('train_loss', train_loss)

        return np.mean(np.sort(np.array(val_loss))[:1])
    # except:
    #     return np.nan

In [3]:
storage_name = f"sqlite:///ascadv1_finetuning_scoop_comp.db"
tpe_sampler = optuna.samplers.TPESampler(seed=seed, n_startup_trials=10)
# random sampler
sampler = optuna.samplers.RandomSampler(seed=seed)
optuna.logging.set_verbosity(optuna.logging.INFO)
study = optuna.create_study(direction="minimize", storage=storage_name, study_name="adam", load_if_exists=True, sampler=sampler) 
study.optimize(objective, n_trials=167, n_jobs=1, show_progress_bar=True) 

[I 2025-01-09 17:16:39,666] A new study created in RDB with name: adam
  0%|          | 0/167 [00:00<?, ?it/s]

Epoch 57/60 | Train loss: 7.9960 | Valid loss: 8.0063 | Expected time left: 0.24 s

Best trial: 0. Best value: 8.002:   1%|          | 1/167 [00:05<15:10,  5.49s/it]

[I 2025-01-09 17:16:45,150] Trial 0 finished with value: 8.001999440400496 and parameters: {'lr': 0.0005611516415334506, 'beta1': 0.995, 'beta2': 0.973, 'activation': 'ReLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 448, 'weight_decay': 3.064599841241146e-06}. Best is trial 0 with value: 8.001999440400496.
Epoch 58/60 | Train loss: 8.0011 | Valid loss: 8.0053 | Expected time left: 0.28 s

Best trial: 0. Best value: 8.002:   1%|          | 2/167 [00:14<20:13,  7.35s/it]

[I 2025-01-09 17:16:53,813] Trial 1 finished with value: 8.004759293568286 and parameters: {'lr': 0.0002310201887845295, 'beta1': 0.918, 'beta2': 0.93, 'activation': 'Tanh', 'n_linear': 2, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 128, 'weight_decay': 0.0003977782830811189}. Best is trial 0 with value: 8.001999440400496.
Epoch 59/60 | Train loss: 7.9981 | Valid loss: 8.0053 | Expected time left: 0.79 ss

Best trial: 0. Best value: 8.002:   2%|▏         | 3/167 [01:01<1:10:04, 25.64s/it]

[I 2025-01-09 17:17:41,204] Trial 2 finished with value: 8.004587452882415 and parameters: {'lr': 0.0015304852121831463, 'beta1': 0.904, 'beta2': 0.96, 'activation': 'Tanh', 'n_linear': 7, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 64, 'weight_decay': 0.00029257577949824405}. Best is trial 0 with value: 8.001999440400496.
Epoch 59/60 | Train loss: 7.9999 | Valid loss: 8.0000 | Expected time left: 0.11 s

Best trial: 3. Best value: 7.99991:   2%|▏         | 4/167 [01:08<49:16, 18.14s/it]  

[I 2025-01-09 17:17:47,843] Trial 3 finished with value: 7.999912515282631 and parameters: {'lr': 0.00011715937392307068, 'beta1': 0.99, 'beta2': 0.925, 'activation': 'ReLU', 'n_linear': 3, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 320, 'weight_decay': 0.2838700963443626}. Best is trial 3 with value: 7.999912515282631.
Epoch 59/60 | Train loss: 8.0000 | Valid loss: 8.0000 | Expected time left: 0.35 ss

Best trial: 3. Best value: 7.99991:   3%|▎         | 5/167 [01:29<51:58, 19.25s/it]

[I 2025-01-09 17:18:09,070] Trial 4 finished with value: 7.999981661511075 and parameters: {'lr': 0.00015030900645056822, 'beta1': 0.919, 'beta2': 0.904, 'activation': 'Tanh', 'n_linear': 4, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 64, 'weight_decay': 0.8094845352286136}. Best is trial 3 with value: 7.999912515282631.
Epoch 6/60 | Train loss: 7.8349 | Valid loss: 8.0560 | Expected time left: 7.42 s

Best trial: 5. Best value: 7.98307:   4%|▎         | 6/167 [01:30<35:11, 13.12s/it]

[I 2025-01-09 17:18:10,278] Trial 5 finished with value: 7.983070409522866 and parameters: {'lr': 0.0035033984911586884, 'beta1': 0.919, 'beta2': 0.9, 'activation': 'ReLU', 'n_linear': 2, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 192, 'weight_decay': 2.7855339243891e-07}. Best is trial 5 with value: 7.983070409522866.
Epoch 8/60 | Train loss: 7.9791 | Valid loss: 8.0805 | Expected time left: 8.01 s

Best trial: 5. Best value: 7.98307:   4%|▍         | 7/167 [01:32<24:57,  9.36s/it]

[I 2025-01-09 17:18:11,905] Trial 6 finished with value: 8.045209966387068 and parameters: {'lr': 0.0004187594718900631, 'beta1': 0.932, 'beta2': 0.972, 'activation': 'SeLU', 'n_linear': 6, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 288, 'weight_decay': 9.835289062589965e-05}. Best is trial 5 with value: 7.983070409522866.
Epoch 10/60 | Train loss: 7.8270 | Valid loss: 8.0955 | Expected time left: 9.39 s

Best trial: 5. Best value: 7.98307:   5%|▍         | 8/167 [01:34<18:51,  7.12s/it]

[I 2025-01-09 17:18:14,218] Trial 7 finished with value: 8.025481420849998 and parameters: {'lr': 0.0001124186209579306, 'beta1': 0.91, 'beta2': 0.903, 'activation': 'Tanh', 'n_linear': 3, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 160, 'weight_decay': 1.3444634828135528e-06}. Best is trial 5 with value: 7.983070409522866.
Epoch 59/60 | Train loss: 8.0124 | Valid loss: 8.0155 | Expected time left: 0.29 ss

Best trial: 5. Best value: 7.98307:   5%|▌         | 9/167 [01:52<27:26, 10.42s/it]

[I 2025-01-09 17:18:31,911] Trial 8 finished with value: 8.010720989372157 and parameters: {'lr': 0.007234279845665418, 'beta1': 0.98, 'beta2': 0.9630000000000001, 'activation': 'Tanh', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 128, 'weight_decay': 9.766850057130267e-05}. Best is trial 5 with value: 7.983070409522866.
Epoch 58/60 | Train loss: 8.0000 | Valid loss: 8.0002 | Expected time left: 0.30 s

Best trial: 5. Best value: 7.98307:   6%|▌         | 10/167 [02:01<26:21, 10.08s/it]

[I 2025-01-09 17:18:41,208] Trial 9 finished with value: 7.999916517509605 and parameters: {'lr': 0.00432543242796456, 'beta1': 0.986, 'beta2': 0.9, 'activation': 'ReLU', 'n_linear': 4, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 192, 'weight_decay': 0.6345627564561446}. Best is trial 5 with value: 7.983070409522866.
Epoch 13/60 | Train loss: 8.0126 | Valid loss: 8.0508 | Expected time left: 18.47 s

Best trial: 5. Best value: 7.98307:   7%|▋         | 11/167 [02:07<22:44,  8.75s/it]

[I 2025-01-09 17:18:46,946] Trial 10 finished with value: 8.01189280010405 and parameters: {'lr': 0.008411909465645722, 'beta1': 0.925, 'beta2': 0.9490000000000001, 'activation': 'Tanh', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 96, 'weight_decay': 0.0002667898617242807}. Best is trial 5 with value: 7.983070409522866.
Epoch 60/60 | Train loss: 8.0019 | Valid loss: 8.0067 | Expected time left: 0.00 s

Best trial: 5. Best value: 7.98307:   7%|▋         | 12/167 [02:17<24:05,  9.33s/it]

[I 2025-01-09 17:18:57,603] Trial 11 finished with value: 8.004942489120195 and parameters: {'lr': 0.009360540102485375, 'beta1': 0.924, 'beta2': 0.9670000000000001, 'activation': 'ReLU', 'n_linear': 6, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 192, 'weight_decay': 2.0212944000653444e-06}. Best is trial 5 with value: 7.983070409522866.
Epoch 8/60 | Train loss: 7.8789 | Valid loss: 8.0787 | Expected time left: 8.59 ss

Best trial: 5. Best value: 7.98307:   8%|▊         | 13/167 [02:19<18:09,  7.07s/it]

[I 2025-01-09 17:18:59,485] Trial 12 finished with value: 8.025387889934036 and parameters: {'lr': 0.00012065637774378946, 'beta1': 0.9590000000000001, 'beta2': 0.9670000000000001, 'activation': 'Tanh', 'n_linear': 3, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 192, 'weight_decay': 6.227513084319219e-07}. Best is trial 5 with value: 7.983070409522866.
Epoch 42/60 | Train loss: 8.0053 | Valid loss: 8.0952 | Expected time left: 2.81 s

Best trial: 5. Best value: 7.98307:   8%|▊         | 14/167 [02:26<17:57,  7.04s/it]

[I 2025-01-09 17:19:06,451] Trial 13 finished with value: 8.02888732586267 and parameters: {'lr': 0.007069476197217995, 'beta1': 0.987, 'beta2': 0.925, 'activation': 'SeLU', 'n_linear': 3, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 192, 'weight_decay': 2.7827040640167687e-05}. Best is trial 5 with value: 7.983070409522866.
Epoch 59/60 | Train loss: 8.0006 | Valid loss: 8.0062 | Expected time left: 1.21 ss

Best trial: 5. Best value: 7.98307:   9%|▉         | 15/167 [03:39<1:08:13, 26.93s/it]

[I 2025-01-09 17:20:19,474] Trial 14 finished with value: 8.005094869449115 and parameters: {'lr': 0.0028308141496998056, 'beta1': 0.989, 'beta2': 0.988, 'activation': 'ReLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 32, 'weight_decay': 1.33553833958957e-06}. Best is trial 5 with value: 7.983070409522866.


Best trial: 5. Best value: 7.98307:  10%|▉         | 16/167 [03:40<48:06, 19.12s/it]  

[I 2025-01-09 17:20:20,448] Trial 15 finished with value: 8.216689781018882 and parameters: {'lr': 0.0012516058337570376, 'beta1': 0.9690000000000001, 'beta2': 0.9650000000000001, 'activation': 'SeLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 64, 'weight_decay': 3.74982172965185e-05}. Best is trial 5 with value: 7.983070409522866.
Epoch 60/60 | Train loss: 7.9906 | Valid loss: 8.0113 | Expected time left: 0.00 s

Best trial: 5. Best value: 7.98307:  10%|█         | 17/167 [03:51<41:47, 16.72s/it]

[I 2025-01-09 17:20:31,588] Trial 16 finished with value: 8.003842225150457 and parameters: {'lr': 0.0003391600849438615, 'beta1': 0.924, 'beta2': 0.997, 'activation': 'SeLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 160, 'weight_decay': 1.4798297876059036e-07}. Best is trial 5 with value: 7.983070409522866.
Epoch 34/60 | Train loss: 7.9640 | Valid loss: 8.0312 | Expected time left: 8.53 ss

Best trial: 5. Best value: 7.98307:  11%|█         | 18/167 [04:03<37:47, 15.22s/it]

[I 2025-01-09 17:20:43,306] Trial 17 finished with value: 8.004270084320552 and parameters: {'lr': 0.001954090133022007, 'beta1': 0.917, 'beta2': 0.994, 'activation': 'ReLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 160, 'weight_decay': 4.9623123841813285e-05}. Best is trial 5 with value: 7.983070409522866.


Best trial: 5. Best value: 7.98307:  11%|█▏        | 19/167 [04:03<26:28, 10.73s/it]

[I 2025-01-09 17:20:43,588] Trial 18 finished with value: 8.723096120925177 and parameters: {'lr': 0.005038176096019999, 'beta1': 0.931, 'beta2': 0.916, 'activation': 'SeLU', 'n_linear': 2, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 480, 'weight_decay': 0.015324288666666576}. Best is trial 5 with value: 7.983070409522866.
Epoch 58/60 | Train loss: 7.9996 | Valid loss: 8.0001 | Expected time left: 0.29 s

Best trial: 5. Best value: 7.98307:  12%|█▏        | 20/167 [04:12<24:56, 10.18s/it]

[I 2025-01-09 17:20:52,482] Trial 19 finished with value: 8.000018737934253 and parameters: {'lr': 0.0024776016523736957, 'beta1': 0.97, 'beta2': 0.935, 'activation': 'Tanh', 'n_linear': 8, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 384, 'weight_decay': 0.03720049219821375}. Best is trial 5 with value: 7.983070409522866.


Best trial: 5. Best value: 7.98307:  13%|█▎        | 21/167 [04:13<17:33,  7.22s/it]

[I 2025-01-09 17:20:52,790] Trial 20 finished with value: 8.526460564654807 and parameters: {'lr': 0.0060257440920984265, 'beta1': 0.933, 'beta2': 0.937, 'activation': 'SeLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 448, 'weight_decay': 3.321501782767483e-05}. Best is trial 5 with value: 7.983070409522866.
Epoch 60/60 | Train loss: 7.9957 | Valid loss: 8.0112 | Expected time left: 0.00 s

Best trial: 5. Best value: 7.98307:  13%|█▎        | 22/167 [04:18<16:10,  6.69s/it]

[I 2025-01-09 17:20:58,269] Trial 21 finished with value: 8.007553768157958 and parameters: {'lr': 0.00017952338368491265, 'beta1': 0.9520000000000001, 'beta2': 0.976, 'activation': 'SeLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 288, 'weight_decay': 1.822616554559887e-05}. Best is trial 5 with value: 7.983070409522866.
Epoch 58/60 | Train loss: 7.9992 | Valid loss: 8.0015 | Expected time left: 0.22 s

Best trial: 5. Best value: 7.98307:  14%|█▍        | 23/167 [04:25<16:06,  6.71s/it]

[I 2025-01-09 17:21:05,012] Trial 22 finished with value: 8.000870963505337 and parameters: {'lr': 0.003893788781594572, 'beta1': 0.927, 'beta2': 0.9430000000000001, 'activation': 'ELU', 'n_linear': 6, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 288, 'weight_decay': 0.010050124895976774}. Best is trial 5 with value: 7.983070409522866.
Epoch 59/60 | Train loss: 7.9998 | Valid loss: 8.0105 | Expected time left: 0.69 ss

Best trial: 5. Best value: 7.98307:  14%|█▍        | 24/167 [05:07<41:04, 17.23s/it]

[I 2025-01-09 17:21:46,794] Trial 23 finished with value: 8.003519384244 and parameters: {'lr': 0.0020911960669713066, 'beta1': 0.927, 'beta2': 0.995, 'activation': 'ReLU', 'n_linear': 3, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 32, 'weight_decay': 1.9279759682810136e-07}. Best is trial 5 with value: 7.983070409522866.
Epoch 59/60 | Train loss: 8.0019 | Valid loss: 8.0188 | Expected time left: 0.20 s

Best trial: 5. Best value: 7.98307:  15%|█▍        | 25/167 [05:19<37:20, 15.78s/it]

[I 2025-01-09 17:21:59,177] Trial 24 finished with value: 8.004370419484264 and parameters: {'lr': 0.005139503522154862, 'beta1': 0.97, 'beta2': 0.9470000000000001, 'activation': 'SeLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 192, 'weight_decay': 0.0024044702610177535}. Best is trial 5 with value: 7.983070409522866.
Epoch 58/60 | Train loss: 7.9964 | Valid loss: 8.0048 | Expected time left: 0.24 s

Best trial: 5. Best value: 7.98307:  16%|█▌        | 26/167 [05:27<31:17, 13.32s/it]

[I 2025-01-09 17:22:06,753] Trial 25 finished with value: 8.002299177646638 and parameters: {'lr': 0.0010145478086525616, 'beta1': 0.985, 'beta2': 0.9650000000000001, 'activation': 'ELU', 'n_linear': 6, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 256, 'weight_decay': 0.000659656012499137}. Best is trial 5 with value: 7.983070409522866.
Epoch 57/60 | Train loss: 7.9995 | Valid loss: 8.0063 | Expected time left: 0.14 s

Best trial: 5. Best value: 7.98307:  16%|█▌        | 27/167 [05:30<23:53, 10.24s/it]

[I 2025-01-09 17:22:09,817] Trial 26 finished with value: 8.003018938976785 and parameters: {'lr': 0.007637120051791047, 'beta1': 0.9380000000000001, 'beta2': 0.996, 'activation': 'ReLU', 'n_linear': 2, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 448, 'weight_decay': 1.455135882319205e-07}. Best is trial 5 with value: 7.983070409522866.
Epoch 57/60 | Train loss: 8.0033 | Valid loss: 8.0087 | Expected time left: 0.25 s

Best trial: 5. Best value: 7.98307:  16%|█▌        | 27/167 [05:35<23:53, 10.24s/it]

[I 2025-01-09 17:22:14,986] Trial 27 finished with value: 8.00651228427887 and parameters: {'lr': 0.004255366449682059, 'beta1': 0.928, 'beta2': 0.911, 'activation': 'ELU', 'n_linear': 7, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 512, 'weight_decay': 7.732567831097477e-05}. Best is trial 5 with value: 7.983070409522866.


Best trial: 5. Best value: 7.98307:  17%|█▋        | 28/167 [05:35<20:11,  8.72s/it]

Epoch 58/60 | Train loss: 7.9999 | Valid loss: 8.0000 | Expected time left: 0.32 s

Best trial: 5. Best value: 7.98307:  17%|█▋        | 29/167 [05:45<20:52,  9.07s/it]

[I 2025-01-09 17:22:24,886] Trial 28 finished with value: 7.999958470182599 and parameters: {'lr': 0.0005546719086332962, 'beta1': 0.977, 'beta2': 0.934, 'activation': 'ReLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 192, 'weight_decay': 0.18563616995357024}. Best is trial 5 with value: 7.983070409522866.
Epoch 59/60 | Train loss: 7.9985 | Valid loss: 8.0007 | Expected time left: 0.09 s

Best trial: 5. Best value: 7.98307:  18%|█▊        | 30/167 [05:50<18:25,  8.07s/it]

[I 2025-01-09 17:22:30,614] Trial 29 finished with value: 8.000325219384555 and parameters: {'lr': 0.0006003483999219653, 'beta1': 0.901, 'beta2': 0.99, 'activation': 'Tanh', 'n_linear': 6, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 352, 'weight_decay': 0.0184765850753926}. Best is trial 5 with value: 7.983070409522866.


Best trial: 5. Best value: 7.98307:  19%|█▊        | 31/167 [05:51<13:02,  5.76s/it]

[I 2025-01-09 17:22:30,969] Trial 30 finished with value: 8.123968869447708 and parameters: {'lr': 0.0038296412064459167, 'beta1': 0.978, 'beta2': 0.909, 'activation': 'ELU', 'n_linear': 8, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 320, 'weight_decay': 5.103389701117353e-07}. Best is trial 5 with value: 7.983070409522866.
Epoch 60/60 | Train loss: 7.9983 | Valid loss: 8.0006 | Expected time left: 0.00 s

Best trial: 5. Best value: 7.98307:  19%|█▉        | 32/167 [05:56<12:40,  5.63s/it]

[I 2025-01-09 17:22:36,320] Trial 31 finished with value: 8.000064873695374 and parameters: {'lr': 0.00014730368526805275, 'beta1': 0.97, 'beta2': 0.907, 'activation': 'ReLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 512, 'weight_decay': 0.018777911885126744}. Best is trial 5 with value: 7.983070409522866.
Epoch 59/60 | Train loss: 7.9971 | Valid loss: 8.0027 | Expected time left: 0.42 ss

Best trial: 5. Best value: 7.98307:  20%|█▉        | 33/167 [06:21<25:42, 11.51s/it]

[I 2025-01-09 17:23:01,540] Trial 32 finished with value: 8.001682181267222 and parameters: {'lr': 0.0005656127243812504, 'beta1': 0.908, 'beta2': 0.977, 'activation': 'ELU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 64, 'weight_decay': 0.0035032579385482626}. Best is trial 5 with value: 7.983070409522866.
Epoch 58/60 | Train loss: 8.0000 | Valid loss: 8.0001 | Expected time left: 0.22 s

Best trial: 5. Best value: 7.98307:  20%|██        | 34/167 [06:28<22:27, 10.13s/it]

[I 2025-01-09 17:23:08,461] Trial 33 finished with value: 7.9999275888715475 and parameters: {'lr': 0.003105201294430152, 'beta1': 0.9580000000000001, 'beta2': 0.996, 'activation': 'ELU', 'n_linear': 8, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 288, 'weight_decay': 0.8927987485470283}. Best is trial 5 with value: 7.983070409522866.
Epoch 56/60 | Train loss: 7.9998 | Valid loss: 8.0001 | Expected time left: 0.24 s

Best trial: 5. Best value: 7.98307:  21%|██        | 35/167 [06:32<18:09,  8.25s/it]

[I 2025-01-09 17:23:12,322] Trial 34 finished with value: 8.000051832199096 and parameters: {'lr': 0.00014047308797093114, 'beta1': 0.9550000000000001, 'beta2': 0.996, 'activation': 'ELU', 'n_linear': 6, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 512, 'weight_decay': 0.1705479436052674}. Best is trial 5 with value: 7.983070409522866.
Epoch 58/60 | Train loss: 7.9997 | Valid loss: 8.0003 | Expected time left: 0.30 s

Best trial: 5. Best value: 7.98307:  22%|██▏       | 36/167 [06:41<18:38,  8.54s/it]

[I 2025-01-09 17:23:21,527] Trial 35 finished with value: 8.000143808031838 and parameters: {'lr': 0.0008152926095771768, 'beta1': 0.962, 'beta2': 0.927, 'activation': 'Tanh', 'n_linear': 2, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 160, 'weight_decay': 0.04972389468490117}. Best is trial 5 with value: 7.983070409522866.
Epoch 58/60 | Train loss: 7.9998 | Valid loss: 8.0003 | Expected time left: 0.34 s

Best trial: 5. Best value: 7.98307:  22%|██▏       | 37/167 [06:52<19:39,  9.07s/it]

[I 2025-01-09 17:23:31,845] Trial 36 finished with value: 7.999932320912679 and parameters: {'lr': 0.0023413284647769823, 'beta1': 0.916, 'beta2': 0.991, 'activation': 'SeLU', 'n_linear': 4, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 224, 'weight_decay': 0.047192535965772234}. Best is trial 5 with value: 7.983070409522866.
Epoch 57/60 | Train loss: 8.0001 | Valid loss: 7.9999 | Expected time left: 0.23 s

Best trial: 5. Best value: 7.98307:  23%|██▎       | 38/167 [06:57<16:45,  7.80s/it]

[I 2025-01-09 17:23:36,666] Trial 37 finished with value: 7.99985468905905 and parameters: {'lr': 0.00943088086367435, 'beta1': 0.915, 'beta2': 0.9590000000000001, 'activation': 'SeLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 448, 'weight_decay': 0.9954597691726136}. Best is trial 5 with value: 7.983070409522866.
Epoch 57/60 | Train loss: 8.0003 | Valid loss: 8.0067 | Expected time left: 0.23 s

Best trial: 5. Best value: 7.98307:  23%|██▎       | 39/167 [07:01<14:42,  6.90s/it]

[I 2025-01-09 17:23:41,462] Trial 38 finished with value: 8.005013823509216 and parameters: {'lr': 0.009846313836604142, 'beta1': 0.9550000000000001, 'beta2': 0.976, 'activation': 'ReLU', 'n_linear': 2, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 320, 'weight_decay': 3.214689459446608e-05}. Best is trial 5 with value: 7.983070409522866.
Epoch 5/60 | Train loss: 7.8480 | Valid loss: 8.0163 | Expected time left: 5.53 s

Best trial: 5. Best value: 7.98307:  24%|██▍       | 40/167 [07:02<10:49,  5.11s/it]

[I 2025-01-09 17:23:42,410] Trial 39 finished with value: 8.016325702667237 and parameters: {'lr': 0.0001687000395477544, 'beta1': 0.9670000000000001, 'beta2': 0.9520000000000001, 'activation': 'ELU', 'n_linear': 5, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 416, 'weight_decay': 0.0021987044706745194}. Best is trial 5 with value: 7.983070409522866.
Epoch 58/60 | Train loss: 7.9980 | Valid loss: 8.0034 | Expected time left: 0.26 s

Best trial: 5. Best value: 7.98307:  25%|██▍       | 41/167 [07:10<12:33,  5.98s/it]

[I 2025-01-09 17:23:50,414] Trial 40 finished with value: 8.002056079440647 and parameters: {'lr': 0.0025595259450110724, 'beta1': 0.921, 'beta2': 0.913, 'activation': 'ELU', 'n_linear': 5, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 224, 'weight_decay': 0.002262595409133795}. Best is trial 5 with value: 7.983070409522866.


Best trial: 5. Best value: 7.98307:  25%|██▌       | 42/167 [07:11<09:24,  4.51s/it]

[I 2025-01-09 17:23:51,504] Trial 41 finished with value: 107.60934558795516 and parameters: {'lr': 0.005305513421197475, 'beta1': 0.994, 'beta2': 0.914, 'activation': 'ReLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 64, 'weight_decay': 7.982851553991398e-07}. Best is trial 5 with value: 7.983070409522866.
Epoch 10/60 | Train loss: 7.8944 | Valid loss: 8.0866 | Expected time left: 21.10 s

Best trial: 5. Best value: 7.98307:  26%|██▌       | 43/167 [07:16<09:33,  4.62s/it]

[I 2025-01-09 17:23:56,388] Trial 42 finished with value: 8.024587331499372 and parameters: {'lr': 0.00018033983912317568, 'beta1': 0.915, 'beta2': 0.913, 'activation': 'Tanh', 'n_linear': 5, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 96, 'weight_decay': 8.915226333691352e-06}. Best is trial 5 with value: 7.983070409522866.
Epoch 60/60 | Train loss: 7.9999 | Valid loss: 8.0000 | Expected time left: 0.00 s

Best trial: 5. Best value: 7.98307:  26%|██▋       | 44/167 [07:22<10:15,  5.00s/it]

[I 2025-01-09 17:24:02,270] Trial 43 finished with value: 7.999920077945875 and parameters: {'lr': 0.00022595448626583305, 'beta1': 0.908, 'beta2': 0.912, 'activation': 'Tanh', 'n_linear': 6, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 448, 'weight_decay': 0.27932306810832924}. Best is trial 5 with value: 7.983070409522866.
Epoch 59/60 | Train loss: 7.9963 | Valid loss: 8.0059 | Expected time left: 0.97 ss

Best trial: 5. Best value: 7.98307:  27%|██▋       | 45/167 [08:21<42:54, 21.10s/it]

[I 2025-01-09 17:25:00,934] Trial 44 finished with value: 8.001600844410662 and parameters: {'lr': 0.00013248170835329819, 'beta1': 0.927, 'beta2': 0.98, 'activation': 'ReLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 32, 'weight_decay': 5.84869010080286e-06}. Best is trial 5 with value: 7.983070409522866.


Best trial: 5. Best value: 7.98307:  28%|██▊       | 46/167 [08:21<30:01, 14.89s/it]

[I 2025-01-09 17:25:01,333] Trial 45 finished with value: 8.376058324982848 and parameters: {'lr': 0.0026711555047527883, 'beta1': 0.989, 'beta2': 0.9510000000000001, 'activation': 'Tanh', 'n_linear': 3, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 128, 'weight_decay': 1.9610174816809012e-05}. Best is trial 5 with value: 7.983070409522866.
Epoch 24/60 | Train loss: 7.4933 | Valid loss: 8.0653 | Expected time left: 8.33 ss

Best trial: 5. Best value: 7.98307:  28%|██▊       | 47/167 [08:27<24:27, 12.23s/it]

[I 2025-01-09 17:25:07,360] Trial 46 finished with value: 7.994542470054021 and parameters: {'lr': 0.00017358982432215278, 'beta1': 0.989, 'beta2': 0.9590000000000001, 'activation': 'SeLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 160, 'weight_decay': 3.479974605656827e-05}. Best is trial 5 with value: 7.983070409522866.
Epoch 59/60 | Train loss: 7.9999 | Valid loss: 8.0000 | Expected time left: 0.25 ss

Best trial: 5. Best value: 7.98307:  29%|██▊       | 48/167 [08:43<26:07, 13.17s/it]

[I 2025-01-09 17:25:22,738] Trial 47 finished with value: 7.999744901202974 and parameters: {'lr': 0.0019580982652705864, 'beta1': 0.9570000000000001, 'beta2': 0.935, 'activation': 'ReLU', 'n_linear': 3, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 96, 'weight_decay': 0.1893904048361686}. Best is trial 5 with value: 7.983070409522866.
Epoch 60/60 | Train loss: 7.9962 | Valid loss: 8.0046 | Expected time left: 0.00 s

Best trial: 5. Best value: 7.98307:  29%|██▉       | 49/167 [08:48<21:25, 10.89s/it]

[I 2025-01-09 17:25:28,296] Trial 48 finished with value: 8.001123292105538 and parameters: {'lr': 0.00014469965355704128, 'beta1': 0.9520000000000001, 'beta2': 0.9410000000000001, 'activation': 'ReLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 480, 'weight_decay': 0.000789478397848947}. Best is trial 5 with value: 7.983070409522866.


Best trial: 5. Best value: 7.98307:  30%|██▉       | 50/167 [08:49<15:07,  7.76s/it]

[I 2025-01-09 17:25:28,745] Trial 49 finished with value: 8.109415930793398 and parameters: {'lr': 0.001390674140149984, 'beta1': 0.927, 'beta2': 0.976, 'activation': 'Tanh', 'n_linear': 3, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 96, 'weight_decay': 0.0002333354435718439}. Best is trial 5 with value: 7.983070409522866.
Epoch 4/60 | Train loss: 8.0054 | Valid loss: 8.5394 | Expected time left: 10.32 s

Best trial: 5. Best value: 7.98307:  31%|███       | 51/167 [08:50<11:03,  5.72s/it]

[I 2025-01-09 17:25:29,720] Trial 50 finished with value: 8.03410564661026 and parameters: {'lr': 0.0011619270574317214, 'beta1': 0.905, 'beta2': 0.933, 'activation': 'ELU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 256, 'weight_decay': 7.636360086759311e-05}. Best is trial 5 with value: 7.983070409522866.
Epoch 59/60 | Train loss: 7.9993 | Valid loss: 8.0003 | Expected time left: 0.22 ss

Best trial: 5. Best value: 7.98307:  31%|███       | 52/167 [09:03<15:31,  8.10s/it]

[I 2025-01-09 17:25:43,355] Trial 51 finished with value: 8.000196248009091 and parameters: {'lr': 0.0004985819284806489, 'beta1': 0.992, 'beta2': 0.983, 'activation': 'ReLU', 'n_linear': 3, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 96, 'weight_decay': 0.03688681715941594}. Best is trial 5 with value: 7.983070409522866.
Epoch 13/60 | Train loss: 7.2884 | Valid loss: 8.1089 | Expected time left: 9.41 ss

Best trial: 52. Best value: 7.95182:  32%|███▏      | 53/167 [09:06<12:23,  6.52s/it]

[I 2025-01-09 17:25:46,202] Trial 52 finished with value: 7.951817572116852 and parameters: {'lr': 0.0002530781435824948, 'beta1': 0.916, 'beta2': 0.916, 'activation': 'ReLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 256, 'weight_decay': 1.2871972971093775e-05}. Best is trial 52 with value: 7.951817572116852.
Epoch 59/60 | Train loss: 8.0000 | Valid loss: 8.0000 | Expected time left: 0.43 ss

Best trial: 52. Best value: 7.95182:  32%|███▏      | 54/167 [09:32<23:21, 12.40s/it]

[I 2025-01-09 17:26:12,331] Trial 53 finished with value: 7.999938419886997 and parameters: {'lr': 0.0031276544147410185, 'beta1': 0.9500000000000001, 'beta2': 0.923, 'activation': 'Tanh', 'n_linear': 6, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 96, 'weight_decay': 0.03615566259438131}. Best is trial 52 with value: 7.951817572116852.
Epoch 59/60 | Train loss: 7.9998 | Valid loss: 8.0003 | Expected time left: 0.16 s

Best trial: 52. Best value: 7.95182:  33%|███▎      | 55/167 [09:42<21:40, 11.61s/it]

[I 2025-01-09 17:26:22,084] Trial 54 finished with value: 7.999937219439812 and parameters: {'lr': 0.0017383831824370022, 'beta1': 0.9530000000000001, 'beta2': 0.989, 'activation': 'ReLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 192, 'weight_decay': 0.05607911259986052}. Best is trial 52 with value: 7.951817572116852.
Epoch 25/60 | Train loss: 7.6392 | Valid loss: 7.9933 | Expected time left: 10.90 s

Best trial: 55. Best value: 7.92227:  34%|███▎      | 56/167 [09:50<19:39, 10.63s/it]

[I 2025-01-09 17:26:30,429] Trial 55 finished with value: 7.922266195690821 and parameters: {'lr': 0.0001664422100921329, 'beta1': 0.984, 'beta2': 0.912, 'activation': 'SeLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 160, 'weight_decay': 2.629492609574197e-05}. Best is trial 55 with value: 7.922266195690821.
Epoch 60/60 | Train loss: 7.9963 | Valid loss: 8.0057 | Expected time left: 0.00 s

Best trial: 55. Best value: 7.92227:  34%|███▍      | 57/167 [09:54<15:44,  8.59s/it]

[I 2025-01-09 17:26:34,252] Trial 56 finished with value: 8.002816608973912 and parameters: {'lr': 0.0002307785464319026, 'beta1': 0.99, 'beta2': 0.9580000000000001, 'activation': 'ELU', 'n_linear': 6, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 480, 'weight_decay': 0.0009035085883460172}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9997 | Valid loss: 8.0002 | Expected time left: 0.10 s

Best trial: 55. Best value: 7.92227:  35%|███▍      | 58/167 [10:00<14:16,  7.86s/it]

[I 2025-01-09 17:26:40,412] Trial 57 finished with value: 8.00003478527069 and parameters: {'lr': 0.0024734415258704863, 'beta1': 0.992, 'beta2': 0.97, 'activation': 'ELU', 'n_linear': 7, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 512, 'weight_decay': 0.07452236090789476}. Best is trial 55 with value: 7.922266195690821.
Epoch 58/60 | Train loss: 7.9999 | Valid loss: 8.0000 | Expected time left: 0.19 s

Best trial: 55. Best value: 7.92227:  35%|███▌      | 59/167 [10:06<13:11,  7.33s/it]

[I 2025-01-09 17:26:46,491] Trial 58 finished with value: 7.999997779301235 and parameters: {'lr': 0.00017755191566504067, 'beta1': 0.992, 'beta2': 0.986, 'activation': 'SeLU', 'n_linear': 4, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 288, 'weight_decay': 0.2747826546459476}. Best is trial 55 with value: 7.922266195690821.
Epoch 3/60 | Train loss: 8.0109 | Valid loss: 8.0374 | Expected time left: 5.66 s

Best trial: 55. Best value: 7.92227:  36%|███▌      | 60/167 [10:07<09:32,  5.35s/it]

[I 2025-01-09 17:26:47,225] Trial 59 finished with value: 8.02920551598072 and parameters: {'lr': 0.0004928241581701508, 'beta1': 0.934, 'beta2': 0.973, 'activation': 'ELU', 'n_linear': 3, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 320, 'weight_decay': 0.0026659378179796646}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9998 | Valid loss: 8.0000 | Expected time left: 0.25 ss

Best trial: 55. Best value: 7.92227:  37%|███▋      | 61/167 [10:23<14:50,  8.40s/it]

[I 2025-01-09 17:27:02,740] Trial 60 finished with value: 8.000036052510708 and parameters: {'lr': 0.00010621574831902212, 'beta1': 0.966, 'beta2': 0.917, 'activation': 'ReLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 128, 'weight_decay': 0.19336596110754986}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9916 | Valid loss: 8.0124 | Expected time left: 0.31 ss

Best trial: 55. Best value: 7.92227:  37%|███▋      | 62/167 [10:41<20:09, 11.51s/it]

[I 2025-01-09 17:27:21,526] Trial 61 finished with value: 8.004611937204997 and parameters: {'lr': 0.0002572049254108753, 'beta1': 0.919, 'beta2': 0.903, 'activation': 'Tanh', 'n_linear': 5, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 96, 'weight_decay': 1.877141077476386e-06}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  38%|███▊      | 63/167 [10:42<14:11,  8.19s/it]

[I 2025-01-09 17:27:21,959] Trial 62 finished with value: 8.717814529998392 and parameters: {'lr': 0.005291407907847561, 'beta1': 0.994, 'beta2': 0.937, 'activation': 'SeLU', 'n_linear': 3, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 128, 'weight_decay': 0.005049509634699893}. Best is trial 55 with value: 7.922266195690821.
Epoch 6/60 | Train loss: 7.9450 | Valid loss: 8.0827 | Expected time left: 4.37 s

Best trial: 55. Best value: 7.92227:  38%|███▊      | 64/167 [10:43<10:20,  6.03s/it]

[I 2025-01-09 17:27:22,933] Trial 63 finished with value: 8.046744708357187 and parameters: {'lr': 0.00010950175393552293, 'beta1': 0.91, 'beta2': 0.979, 'activation': 'SeLU', 'n_linear': 3, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 352, 'weight_decay': 2.7220122033880667e-06}. Best is trial 55 with value: 7.922266195690821.
Epoch 29/60 | Train loss: 7.9433 | Valid loss: 8.0326 | Expected time left: 8.23 ss

Best trial: 55. Best value: 7.92227:  39%|███▉      | 65/167 [10:51<11:21,  6.68s/it]

[I 2025-01-09 17:27:31,149] Trial 64 finished with value: 7.995650215754433 and parameters: {'lr': 0.0003857406966835365, 'beta1': 0.989, 'beta2': 0.901, 'activation': 'SeLU', 'n_linear': 6, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 160, 'weight_decay': 4.5482764854458234e-05}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  40%|███▉      | 66/167 [10:51<08:03,  4.79s/it]

[I 2025-01-09 17:27:31,512] Trial 65 finished with value: 8.10413392384847 and parameters: {'lr': 0.0015156184556529744, 'beta1': 0.926, 'beta2': 0.962, 'activation': 'SeLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 480, 'weight_decay': 0.03971794697104095}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 8.0000 | Valid loss: 8.0000 | Expected time left: 1.07 ss

Best trial: 55. Best value: 7.92227:  40%|████      | 67/167 [11:56<37:50, 22.70s/it]

[I 2025-01-09 17:28:36,019] Trial 66 finished with value: 7.999932244943735 and parameters: {'lr': 0.0007086421962732367, 'beta1': 0.902, 'beta2': 0.926, 'activation': 'SeLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 32, 'weight_decay': 0.25124165520894154}. Best is trial 55 with value: 7.922266195690821.
Epoch 6/60 | Train loss: 7.9225 | Valid loss: 8.0835 | Expected time left: 4.05 s

Best trial: 55. Best value: 7.92227:  41%|████      | 68/167 [11:57<26:40, 16.17s/it]

[I 2025-01-09 17:28:36,945] Trial 67 finished with value: 8.027886033058167 and parameters: {'lr': 0.00017198959181544463, 'beta1': 0.9570000000000001, 'beta2': 0.927, 'activation': 'ELU', 'n_linear': 2, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 320, 'weight_decay': 0.004521231261589039}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 8.0077 | Valid loss: 8.0197 | Expected time left: 0.20 ss

Best trial: 55. Best value: 7.92227:  41%|████▏     | 69/167 [12:09<24:35, 15.06s/it]

[I 2025-01-09 17:28:49,404] Trial 68 finished with value: 8.012640256730338 and parameters: {'lr': 0.0002242552180990901, 'beta1': 0.991, 'beta2': 0.9410000000000001, 'activation': 'SeLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 160, 'weight_decay': 3.088225575112976e-05}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  42%|████▏     | 70/167 [12:10<17:12, 10.65s/it]

[I 2025-01-09 17:28:49,762] Trial 69 finished with value: 8.104733610153199 and parameters: {'lr': 0.0027421538052244043, 'beta1': 0.929, 'beta2': 0.9560000000000001, 'activation': 'ELU', 'n_linear': 3, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 256, 'weight_decay': 0.001506259997315488}. Best is trial 55 with value: 7.922266195690821.
Epoch 27/60 | Train loss: 7.9144 | Valid loss: 7.9620 | Expected time left: 8.22 ss

Best trial: 55. Best value: 7.92227:  43%|████▎     | 71/167 [12:17<15:23,  9.62s/it]

[I 2025-01-09 17:28:56,980] Trial 70 finished with value: 7.9504274110945445 and parameters: {'lr': 0.0004661093143783299, 'beta1': 0.977, 'beta2': 0.91, 'activation': 'SeLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 160, 'weight_decay': 0.0013539875408994607}. Best is trial 55 with value: 7.922266195690821.
Epoch 58/60 | Train loss: 7.9997 | Valid loss: 8.0001 | Expected time left: 0.22 s

Best trial: 55. Best value: 7.92227:  43%|████▎     | 72/167 [12:24<13:52,  8.77s/it]

[I 2025-01-09 17:29:03,764] Trial 71 finished with value: 8.000010207847312 and parameters: {'lr': 0.0005271664874398606, 'beta1': 0.909, 'beta2': 0.991, 'activation': 'SeLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 384, 'weight_decay': 0.0879770835474937}. Best is trial 55 with value: 7.922266195690821.
Epoch 12/60 | Train loss: 7.3831 | Valid loss: 8.0129 | Expected time left: 6.95 s

Best trial: 55. Best value: 7.92227:  44%|████▎     | 73/167 [12:26<10:40,  6.81s/it]

[I 2025-01-09 17:29:06,022] Trial 72 finished with value: 7.96382059874358 and parameters: {'lr': 0.00031574466246528734, 'beta1': 0.9480000000000001, 'beta2': 0.922, 'activation': 'ReLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 384, 'weight_decay': 1.2062659019648327e-05}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  44%|████▍     | 74/167 [12:27<07:51,  5.07s/it]

[I 2025-01-09 17:29:07,007] Trial 73 finished with value: 8.132045250789375 and parameters: {'lr': 0.007059799883532963, 'beta1': 0.997, 'beta2': 0.994, 'activation': 'SeLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 64, 'weight_decay': 0.0074820642137205145}. Best is trial 55 with value: 7.922266195690821.
Epoch 10/60 | Train loss: 7.9121 | Valid loss: 8.0229 | Expected time left: 5.49 s

Best trial: 55. Best value: 7.92227:  45%|████▍     | 75/167 [12:28<06:06,  3.98s/it]

[I 2025-01-09 17:29:08,458] Trial 74 finished with value: 8.013435633286186 and parameters: {'lr': 0.0004783545701645277, 'beta1': 0.972, 'beta2': 0.906, 'activation': 'ELU', 'n_linear': 6, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 448, 'weight_decay': 1.4089185137002015e-07}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  46%|████▌     | 76/167 [12:29<04:22,  2.88s/it]

[I 2025-01-09 17:29:08,770] Trial 75 finished with value: 8.44196132659912 and parameters: {'lr': 0.0056156928410646505, 'beta1': 0.9520000000000001, 'beta2': 0.993, 'activation': 'SeLU', 'n_linear': 4, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 416, 'weight_decay': 8.401705901798714e-05}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  46%|████▌     | 77/167 [12:29<03:11,  2.13s/it]

[I 2025-01-09 17:29:09,149] Trial 76 finished with value: 8.11158727010091 and parameters: {'lr': 0.0006961679164698046, 'beta1': 0.973, 'beta2': 0.923, 'activation': 'SeLU', 'n_linear': 3, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 224, 'weight_decay': 0.005721777674617165}. Best is trial 55 with value: 7.922266195690821.
Epoch 15/60 | Train loss: 7.9057 | Valid loss: 8.0690 | Expected time left: 3.96 s

Best trial: 55. Best value: 7.92227:  47%|████▋     | 78/167 [12:31<02:59,  2.01s/it]

[I 2025-01-09 17:29:10,891] Trial 77 finished with value: 8.020309782028198 and parameters: {'lr': 0.00027321703609844934, 'beta1': 0.994, 'beta2': 0.978, 'activation': 'Tanh', 'n_linear': 5, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 512, 'weight_decay': 0.003527983480753074}. Best is trial 55 with value: 7.922266195690821.
Epoch 60/60 | Train loss: 8.0358 | Valid loss: 8.0420 | Expected time left: 0.00 s

Best trial: 55. Best value: 7.92227:  47%|████▋     | 79/167 [12:34<03:23,  2.31s/it]

[I 2025-01-09 17:29:13,897] Trial 78 finished with value: 8.02504997253418 and parameters: {'lr': 0.0016423797805174508, 'beta1': 0.9510000000000001, 'beta2': 0.923, 'activation': 'Tanh', 'n_linear': 4, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 512, 'weight_decay': 1.6491750070948784e-06}. Best is trial 55 with value: 7.922266195690821.
Epoch 60/60 | Train loss: 7.9980 | Valid loss: 8.0070 | Expected time left: 0.00 s

Best trial: 55. Best value: 7.92227:  48%|████▊     | 80/167 [12:45<07:09,  4.93s/it]

[I 2025-01-09 17:29:24,945] Trial 79 finished with value: 8.003566764649891 and parameters: {'lr': 0.0006201318507156901, 'beta1': 0.975, 'beta2': 0.9690000000000001, 'activation': 'SeLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 160, 'weight_decay': 8.065001434645899e-07}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9955 | Valid loss: 8.0133 | Expected time left: 0.59 ss

Best trial: 55. Best value: 7.92227:  49%|████▊     | 81/167 [13:21<20:21, 14.21s/it]

[I 2025-01-09 17:30:00,798] Trial 80 finished with value: 8.006932018668788 and parameters: {'lr': 0.005990956410666696, 'beta1': 0.995, 'beta2': 0.986, 'activation': 'ReLU', 'n_linear': 4, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 64, 'weight_decay': 3.477622312613441e-06}. Best is trial 55 with value: 7.922266195690821.
Epoch 57/60 | Train loss: 7.9977 | Valid loss: 8.0021 | Expected time left: 0.23 s

Best trial: 55. Best value: 7.92227:  49%|████▉     | 82/167 [13:25<16:07, 11.38s/it]

[I 2025-01-09 17:30:05,586] Trial 81 finished with value: 8.00105417251587 and parameters: {'lr': 0.0023255063370989084, 'beta1': 0.907, 'beta2': 0.985, 'activation': 'Tanh', 'n_linear': 4, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 416, 'weight_decay': 0.004065957573101796}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  50%|████▉     | 83/167 [13:26<11:18,  8.07s/it]

[I 2025-01-09 17:30:05,940] Trial 82 finished with value: 8.102133211882219 and parameters: {'lr': 0.005025316949970213, 'beta1': 0.986, 'beta2': 0.97, 'activation': 'ReLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 448, 'weight_decay': 7.98158370036148e-07}. Best is trial 55 with value: 7.922266195690821.
Epoch 16/60 | Train loss: 7.9776 | Valid loss: 8.0860 | Expected time left: 4.49 s

Best trial: 55. Best value: 7.92227:  50%|█████     | 84/167 [13:28<08:38,  6.25s/it]

[I 2025-01-09 17:30:07,920] Trial 83 finished with value: 8.02766620701757 and parameters: {'lr': 0.0004679912234289147, 'beta1': 0.974, 'beta2': 0.916, 'activation': 'SeLU', 'n_linear': 4, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 352, 'weight_decay': 0.0006024176089512067}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9996 | Valid loss: 8.0016 | Expected time left: 0.32 ss

Best trial: 55. Best value: 7.92227:  51%|█████     | 85/167 [13:47<14:01, 10.26s/it]

[I 2025-01-09 17:30:27,548] Trial 84 finished with value: 8.000945621066624 and parameters: {'lr': 0.0036281964186455234, 'beta1': 0.91, 'beta2': 0.976, 'activation': 'SeLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 160, 'weight_decay': 0.00906159442931998}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9924 | Valid loss: 8.0232 | Expected time left: 1.74 ss

Best trial: 55. Best value: 7.92227:  51%|█████▏    | 86/167 [15:32<51:59, 38.51s/it]

[I 2025-01-09 17:32:11,968] Trial 85 finished with value: 8.01597065666613 and parameters: {'lr': 0.00013636446185547795, 'beta1': 0.9580000000000001, 'beta2': 0.934, 'activation': 'Tanh', 'n_linear': 8, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 32, 'weight_decay': 1.8419232676703592e-05}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  52%|█████▏    | 87/167 [15:33<36:17, 27.22s/it]

[I 2025-01-09 17:32:12,833] Trial 86 finished with value: 8.198648252304952 and parameters: {'lr': 0.0009490440720289569, 'beta1': 0.977, 'beta2': 0.968, 'activation': 'ELU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 64, 'weight_decay': 0.005800896469261536}. Best is trial 55 with value: 7.922266195690821.
Epoch 5/60 | Train loss: 7.9927 | Valid loss: 8.4010 | Expected time left: 5.60 s

Best trial: 55. Best value: 7.92227:  53%|█████▎    | 88/167 [15:33<25:22, 19.28s/it]

[I 2025-01-09 17:32:13,582] Trial 87 finished with value: 8.025574343545097 and parameters: {'lr': 0.0020371472807574694, 'beta1': 0.927, 'beta2': 0.995, 'activation': 'ELU', 'n_linear': 6, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 480, 'weight_decay': 4.070823202817118e-06}. Best is trial 55 with value: 7.922266195690821.
Epoch 57/60 | Train loss: 7.9967 | Valid loss: 8.0024 | Expected time left: 0.21 s

Best trial: 55. Best value: 7.92227:  53%|█████▎    | 89/167 [15:38<19:18, 14.85s/it]

[I 2025-01-09 17:32:18,097] Trial 88 finished with value: 8.001608791351318 and parameters: {'lr': 0.000266814488011346, 'beta1': 0.903, 'beta2': 0.9650000000000001, 'activation': 'Tanh', 'n_linear': 3, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 416, 'weight_decay': 0.0024184786141986203}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9964 | Valid loss: 8.0043 | Expected time left: 0.38 ss

Best trial: 55. Best value: 7.92227:  54%|█████▍    | 90/167 [16:01<22:10, 17.27s/it]

[I 2025-01-09 17:32:41,030] Trial 89 finished with value: 8.002077379044454 and parameters: {'lr': 0.00018301629338242243, 'beta1': 0.903, 'beta2': 0.992, 'activation': 'SeLU', 'n_linear': 2, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 64, 'weight_decay': 0.0011852222979523287}. Best is trial 55 with value: 7.922266195690821.
Epoch 60/60 | Train loss: 7.9961 | Valid loss: 8.0048 | Expected time left: 0.00 s

Best trial: 55. Best value: 7.92227:  54%|█████▍    | 91/167 [16:06<17:18, 13.66s/it]

[I 2025-01-09 17:32:46,253] Trial 90 finished with value: 8.001002063751221 and parameters: {'lr': 0.0003451332562030653, 'beta1': 0.979, 'beta2': 0.931, 'activation': 'ReLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 416, 'weight_decay': 1.8937898189134362e-07}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9971 | Valid loss: 8.0063 | Expected time left: 0.55 ss

Best trial: 55. Best value: 7.92227:  55%|█████▌    | 92/167 [16:39<24:18, 19.45s/it]

[I 2025-01-09 17:33:19,205] Trial 91 finished with value: 8.001416434148314 and parameters: {'lr': 0.0009141424874414852, 'beta1': 0.91, 'beta2': 0.924, 'activation': 'ReLU', 'n_linear': 6, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 64, 'weight_decay': 2.8470068918766612e-05}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 8.0000 | Valid loss: 8.0000 | Expected time left: 0.74 ss

Best trial: 55. Best value: 7.92227:  56%|█████▌    | 93/167 [17:24<33:21, 27.05s/it]

[I 2025-01-09 17:34:03,996] Trial 92 finished with value: 7.999980591356564 and parameters: {'lr': 0.00011652154047274468, 'beta1': 0.907, 'beta2': 0.9390000000000001, 'activation': 'Tanh', 'n_linear': 3, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 32, 'weight_decay': 0.35767264283142663}. Best is trial 55 with value: 7.922266195690821.
Epoch 58/60 | Train loss: 7.9966 | Valid loss: 8.0025 | Expected time left: 0.24 s

Best trial: 55. Best value: 7.92227:  56%|█████▋    | 94/167 [17:31<25:43, 21.15s/it]

[I 2025-01-09 17:34:11,369] Trial 93 finished with value: 8.000341165065766 and parameters: {'lr': 0.00012704060909328655, 'beta1': 0.9540000000000001, 'beta2': 0.97, 'activation': 'ReLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 256, 'weight_decay': 0.0031992182338099317}. Best is trial 55 with value: 7.922266195690821.
Epoch 57/60 | Train loss: 7.9963 | Valid loss: 8.0057 | Expected time left: 0.23 s

Best trial: 55. Best value: 7.92227:  57%|█████▋    | 95/167 [17:36<19:28, 16.24s/it]

[I 2025-01-09 17:34:16,145] Trial 94 finished with value: 8.003382156635153 and parameters: {'lr': 0.0011292895825110586, 'beta1': 0.973, 'beta2': 0.908, 'activation': 'Tanh', 'n_linear': 3, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 352, 'weight_decay': 0.00030716378829921367}. Best is trial 55 with value: 7.922266195690821.
Epoch 57/60 | Train loss: 7.9981 | Valid loss: 8.0011 | Expected time left: 0.24 s

Best trial: 55. Best value: 7.92227:  57%|█████▋    | 96/167 [17:41<15:15, 12.90s/it]

[I 2025-01-09 17:34:21,262] Trial 95 finished with value: 8.000549026157545 and parameters: {'lr': 0.001290408163534379, 'beta1': 0.9710000000000001, 'beta2': 0.922, 'activation': 'ReLU', 'n_linear': 6, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 448, 'weight_decay': 0.012307886662005461}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  58%|█████▊    | 97/167 [17:41<10:38,  9.12s/it]

[I 2025-01-09 17:34:21,570] Trial 96 finished with value: 8.215665998912993 and parameters: {'lr': 0.0030578279246624316, 'beta1': 0.9420000000000001, 'beta2': 0.934, 'activation': 'SeLU', 'n_linear': 6, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 480, 'weight_decay': 8.954606130655998e-05}. Best is trial 55 with value: 7.922266195690821.
Epoch 56/60 | Train loss: 8.0059 | Valid loss: 8.0070 | Expected time left: 0.16 s

Best trial: 55. Best value: 7.92227:  59%|█████▊    | 98/167 [17:44<08:14,  7.17s/it]

[I 2025-01-09 17:34:24,176] Trial 97 finished with value: 8.007037806510926 and parameters: {'lr': 0.000357812570948876, 'beta1': 0.9590000000000001, 'beta2': 0.991, 'activation': 'Tanh', 'n_linear': 2, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 512, 'weight_decay': 1.848581783149663e-06}. Best is trial 55 with value: 7.922266195690821.
Epoch 26/60 | Train loss: 7.9738 | Valid loss: 8.0401 | Expected time left: 4.13 s

Best trial: 55. Best value: 7.92227:  59%|█████▉    | 99/167 [17:48<06:55,  6.12s/it]

[I 2025-01-09 17:34:27,837] Trial 98 finished with value: 8.005872512685842 and parameters: {'lr': 0.005114111432590181, 'beta1': 0.9490000000000001, 'beta2': 0.924, 'activation': 'ReLU', 'n_linear': 6, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 352, 'weight_decay': 1.889428446215504e-05}. Best is trial 55 with value: 7.922266195690821.
Epoch 58/60 | Train loss: 7.9988 | Valid loss: 8.0014 | Expected time left: 0.25 s

Best trial: 55. Best value: 7.92227:  60%|█████▉    | 100/167 [17:55<07:19,  6.57s/it]

[I 2025-01-09 17:34:35,451] Trial 99 finished with value: 8.00119473040104 and parameters: {'lr': 0.0035232726404470314, 'beta1': 0.913, 'beta2': 0.996, 'activation': 'ReLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 320, 'weight_decay': 0.008860138651974053}. Best is trial 55 with value: 7.922266195690821.
Epoch 14/60 | Train loss: 7.8982 | Valid loss: 8.0973 | Expected time left: 13.67 s

Best trial: 55. Best value: 7.92227:  60%|██████    | 101/167 [18:00<06:36,  6.01s/it]

[I 2025-01-09 17:34:40,155] Trial 100 finished with value: 8.027148859841484 and parameters: {'lr': 0.0002158160878940562, 'beta1': 0.916, 'beta2': 0.903, 'activation': 'Tanh', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 160, 'weight_decay': 2.509620149573303e-07}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 8.0001 | Valid loss: 8.0000 | Expected time left: 1.20 ss

Best trial: 55. Best value: 7.92227:  61%|██████    | 102/167 [19:12<27:57, 25.81s/it]

[I 2025-01-09 17:35:52,173] Trial 101 finished with value: 7.999807763023498 and parameters: {'lr': 0.0011543541164950248, 'beta1': 0.993, 'beta2': 0.903, 'activation': 'ELU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 32, 'weight_decay': 0.45992021426296353}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  62%|██████▏   | 103/167 [19:12<19:21, 18.15s/it]

[I 2025-01-09 17:35:52,462] Trial 102 finished with value: 8.119045495986938 and parameters: {'lr': 0.0018977538258483466, 'beta1': 0.986, 'beta2': 0.9450000000000001, 'activation': 'ELU', 'n_linear': 2, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 512, 'weight_decay': 4.542605375861925e-07}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 8.0001 | Valid loss: 8.0000 | Expected time left: 1.29 ss

Best trial: 55. Best value: 7.92227:  62%|██████▏   | 104/167 [20:30<37:47, 35.99s/it]

[I 2025-01-09 17:37:10,066] Trial 103 finished with value: 7.999820665048715 and parameters: {'lr': 0.0022141357104086042, 'beta1': 0.9440000000000001, 'beta2': 0.986, 'activation': 'Tanh', 'n_linear': 6, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 32, 'weight_decay': 0.3285798057639221}. Best is trial 55 with value: 7.922266195690821.
Epoch 58/60 | Train loss: 7.9994 | Valid loss: 8.0005 | Expected time left: 0.20 s

Best trial: 55. Best value: 7.92227:  63%|██████▎   | 105/167 [20:36<27:59, 27.09s/it]

[I 2025-01-09 17:37:16,407] Trial 104 finished with value: 8.000311747841213 and parameters: {'lr': 0.0023936106144420467, 'beta1': 0.9670000000000001, 'beta2': 0.921, 'activation': 'ReLU', 'n_linear': 6, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 448, 'weight_decay': 0.029970803646369327}. Best is trial 55 with value: 7.922266195690821.
Epoch 60/60 | Train loss: 7.9526 | Valid loss: 8.0840 | Expected time left: 0.00 s

Best trial: 55. Best value: 7.92227:  63%|██████▎   | 106/167 [20:47<22:31, 22.15s/it]

[I 2025-01-09 17:37:27,027] Trial 105 finished with value: 8.006422519683838 and parameters: {'lr': 0.0013615466375387765, 'beta1': 0.973, 'beta2': 0.987, 'activation': 'Tanh', 'n_linear': 7, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 288, 'weight_decay': 1.0972627184563681e-07}. Best is trial 55 with value: 7.922266195690821.
Epoch 60/60 | Train loss: 7.9964 | Valid loss: 8.0049 | Expected time left: 0.00 s

Best trial: 55. Best value: 7.92227:  64%|██████▍   | 107/167 [20:53<17:13, 17.23s/it]

[I 2025-01-09 17:37:32,778] Trial 106 finished with value: 8.00105294585228 and parameters: {'lr': 0.0004458324338819717, 'beta1': 0.936, 'beta2': 0.9390000000000001, 'activation': 'ReLU', 'n_linear': 4, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 320, 'weight_decay': 0.00017557323312532153}. Best is trial 55 with value: 7.922266195690821.
Epoch 57/60 | Train loss: 7.9977 | Valid loss: 8.0013 | Expected time left: 0.22 s

Best trial: 55. Best value: 7.92227:  65%|██████▍   | 108/167 [20:57<13:12, 13.44s/it]

[I 2025-01-09 17:37:37,371] Trial 107 finished with value: 8.000492863033129 and parameters: {'lr': 0.0005748725349824577, 'beta1': 0.986, 'beta2': 0.9510000000000001, 'activation': 'ReLU', 'n_linear': 4, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 448, 'weight_decay': 0.010638037908397145}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  65%|██████▌   | 109/167 [20:58<09:13,  9.54s/it]

[I 2025-01-09 17:37:37,815] Trial 108 finished with value: 12.68704264163971 and parameters: {'lr': 0.008147931925848268, 'beta1': 0.901, 'beta2': 0.919, 'activation': 'ELU', 'n_linear': 8, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 256, 'weight_decay': 6.272432177024974e-06}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9962 | Valid loss: 8.0050 | Expected time left: 0.27 ss

Best trial: 55. Best value: 7.92227:  66%|██████▌   | 110/167 [21:14<11:05, 11.68s/it]

[I 2025-01-09 17:37:54,482] Trial 109 finished with value: 8.002456687745594 and parameters: {'lr': 0.0003713178301486517, 'beta1': 0.93, 'beta2': 0.98, 'activation': 'Tanh', 'n_linear': 3, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 96, 'weight_decay': 0.00047439891345874734}. Best is trial 55 with value: 7.922266195690821.
Epoch 60/60 | Train loss: 7.9999 | Valid loss: 8.0001 | Expected time left: 0.00 s

Best trial: 55. Best value: 7.92227:  66%|██████▋   | 111/167 [21:20<09:08,  9.79s/it]

[I 2025-01-09 17:37:59,876] Trial 110 finished with value: 7.999946055204972 and parameters: {'lr': 0.002618740853476685, 'beta1': 0.91, 'beta2': 0.9560000000000001, 'activation': 'SeLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 448, 'weight_decay': 0.15468394635940663}. Best is trial 55 with value: 7.922266195690821.
Epoch 3/60 | Train loss: 7.9858 | Valid loss: 8.0722 | Expected time left: 5.71 s

Best trial: 55. Best value: 7.92227:  67%|██████▋   | 112/167 [21:20<06:28,  7.07s/it]

[I 2025-01-09 17:38:00,597] Trial 111 finished with value: 8.03758900741051 and parameters: {'lr': 0.0002858640510403272, 'beta1': 0.921, 'beta2': 0.9610000000000001, 'activation': 'ELU', 'n_linear': 3, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 352, 'weight_decay': 4.66387816570709e-07}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  68%|██████▊   | 113/167 [21:21<04:34,  5.08s/it]

[I 2025-01-09 17:38:01,027] Trial 112 finished with value: 8.189389452149596 and parameters: {'lr': 0.0017689625932621829, 'beta1': 0.9450000000000001, 'beta2': 0.9580000000000001, 'activation': 'ELU', 'n_linear': 2, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 128, 'weight_decay': 5.4493157293786755e-05}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  68%|██████▊   | 114/167 [21:21<03:14,  3.67s/it]

[I 2025-01-09 17:38:01,400] Trial 113 finished with value: 8.113557091465703 and parameters: {'lr': 0.0015450860461205145, 'beta1': 0.9490000000000001, 'beta2': 0.998, 'activation': 'SeLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 384, 'weight_decay': 0.0011186865563555292}. Best is trial 55 with value: 7.922266195690821.
Epoch 60/60 | Train loss: 7.9962 | Valid loss: 8.0047 | Expected time left: 0.00 s

Best trial: 55. Best value: 7.92227:  69%|██████▉   | 115/167 [21:33<05:10,  5.98s/it]

[I 2025-01-09 17:38:12,778] Trial 114 finished with value: 8.000695663162425 and parameters: {'lr': 0.0003534445856055918, 'beta1': 0.907, 'beta2': 0.908, 'activation': 'ReLU', 'n_linear': 4, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 128, 'weight_decay': 0.0006026753131720383}. Best is trial 55 with value: 7.922266195690821.
Epoch 58/60 | Train loss: 8.0000 | Valid loss: 8.0001 | Expected time left: 0.29 s

Best trial: 55. Best value: 7.92227:  69%|██████▉   | 116/167 [21:41<05:48,  6.82s/it]

[I 2025-01-09 17:38:21,570] Trial 115 finished with value: 7.999894033159529 and parameters: {'lr': 0.005756495574542673, 'beta1': 0.9650000000000001, 'beta2': 0.9530000000000001, 'activation': 'Tanh', 'n_linear': 6, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 288, 'weight_decay': 0.5050995615651582}. Best is trial 55 with value: 7.922266195690821.
Epoch 10/60 | Train loss: 7.8840 | Valid loss: 8.0902 | Expected time left: 7.20 s

Best trial: 55. Best value: 7.92227:  70%|███████   | 117/167 [21:43<04:28,  5.37s/it]

[I 2025-01-09 17:38:23,537] Trial 116 finished with value: 8.021958255767823 and parameters: {'lr': 0.0002241846560554654, 'beta1': 0.9690000000000001, 'beta2': 0.92, 'activation': 'Tanh', 'n_linear': 4, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 224, 'weight_decay': 1.1665734639618738e-06}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  71%|███████   | 118/167 [21:45<03:29,  4.28s/it]

[I 2025-01-09 17:38:25,293] Trial 117 finished with value: 13.421851816268775 and parameters: {'lr': 0.009659774857064811, 'beta1': 0.992, 'beta2': 0.9530000000000001, 'activation': 'ReLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 32, 'weight_decay': 1.3240320307829758e-05}. Best is trial 55 with value: 7.922266195690821.
Epoch 3/60 | Train loss: 7.9966 | Valid loss: 8.7716 | Expected time left: 4.91 s

Best trial: 55. Best value: 7.92227:  71%|███████▏  | 119/167 [21:46<02:31,  3.15s/it]

[I 2025-01-09 17:38:25,790] Trial 118 finished with value: 8.005204677581787 and parameters: {'lr': 0.00033591170695115597, 'beta1': 0.936, 'beta2': 0.908, 'activation': 'ReLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 512, 'weight_decay': 0.09119577201968872}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9973 | Valid loss: 8.0015 | Expected time left: 0.36 ss

Best trial: 55. Best value: 7.92227:  72%|███████▏  | 120/167 [22:07<06:51,  8.75s/it]

[I 2025-01-09 17:38:47,604] Trial 119 finished with value: 8.000923115866525 and parameters: {'lr': 0.0002612287839371927, 'beta1': 0.993, 'beta2': 0.911, 'activation': 'ELU', 'n_linear': 7, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 96, 'weight_decay': 0.007322470358371905}. Best is trial 55 with value: 7.922266195690821.
Epoch 58/60 | Train loss: 8.0420 | Valid loss: 8.0427 | Expected time left: 0.23 s

Best trial: 55. Best value: 7.92227:  72%|███████▏  | 121/167 [22:15<06:20,  8.28s/it]

[I 2025-01-09 17:38:54,777] Trial 120 finished with value: 8.010196018218995 and parameters: {'lr': 0.0004922655605142162, 'beta1': 0.997, 'beta2': 0.964, 'activation': 'Tanh', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 224, 'weight_decay': 1.0583222947764978e-06}. Best is trial 55 with value: 7.922266195690821.
Epoch 10/60 | Train loss: 7.9511 | Valid loss: 8.0934 | Expected time left: 5.73 s

Best trial: 55. Best value: 7.92227:  73%|███████▎  | 122/167 [22:16<04:42,  6.28s/it]

[I 2025-01-09 17:38:56,387] Trial 121 finished with value: 8.033138505343732 and parameters: {'lr': 0.0005433364546383788, 'beta1': 0.906, 'beta2': 0.902, 'activation': 'Tanh', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 352, 'weight_decay': 3.2528742722350166e-06}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  74%|███████▎  | 123/167 [22:17<03:23,  4.63s/it]

[I 2025-01-09 17:38:57,185] Trial 122 finished with value: 8.12816294715518 and parameters: {'lr': 0.0017345064125816876, 'beta1': 0.9650000000000001, 'beta2': 0.915, 'activation': 'SeLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 96, 'weight_decay': 7.074480860267659e-07}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9999 | Valid loss: 8.0001 | Expected time left: 0.45 ss

Best trial: 55. Best value: 7.92227:  74%|███████▍  | 124/167 [22:44<08:09, 11.39s/it]

[I 2025-01-09 17:39:24,349] Trial 123 finished with value: 8.000016239711217 and parameters: {'lr': 0.00040415717677256944, 'beta1': 0.91, 'beta2': 0.9690000000000001, 'activation': 'ELU', 'n_linear': 6, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 96, 'weight_decay': 0.16077906794700295}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  75%|███████▍  | 125/167 [22:44<05:38,  8.06s/it]

[I 2025-01-09 17:39:24,645] Trial 124 finished with value: 8.124353332519531 and parameters: {'lr': 0.000793540867007523, 'beta1': 0.919, 'beta2': 0.936, 'activation': 'SeLU', 'n_linear': 2, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 416, 'weight_decay': 9.115236477925342e-07}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  75%|███████▌  | 126/167 [22:45<03:59,  5.84s/it]

[I 2025-01-09 17:39:25,312] Trial 125 finished with value: 8.178082794140858 and parameters: {'lr': 0.0011765767520027447, 'beta1': 0.921, 'beta2': 0.901, 'activation': 'SeLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 64, 'weight_decay': 0.0002616302517793903}. Best is trial 55 with value: 7.922266195690821.
Epoch 12/60 | Train loss: 7.2741 | Valid loss: 8.0372 | Expected time left: 6.19 s

Best trial: 55. Best value: 7.92227:  76%|███████▌  | 127/167 [22:47<03:06,  4.67s/it]

[I 2025-01-09 17:39:27,229] Trial 126 finished with value: 8.012300244084111 and parameters: {'lr': 0.0001995709521715485, 'beta1': 0.932, 'beta2': 0.973, 'activation': 'ReLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 384, 'weight_decay': 0.00011567030171189666}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9983 | Valid loss: 8.0008 | Expected time left: 0.39 ss

Best trial: 55. Best value: 7.92227:  77%|███████▋  | 128/167 [23:10<06:40, 10.28s/it]

[I 2025-01-09 17:39:50,605] Trial 127 finished with value: 8.000207619439989 and parameters: {'lr': 0.0003231031887061631, 'beta1': 0.984, 'beta2': 0.903, 'activation': 'ReLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 96, 'weight_decay': 0.014522332667474182}. Best is trial 55 with value: 7.922266195690821.
Epoch 58/60 | Train loss: 7.9991 | Valid loss: 8.0004 | Expected time left: 0.27 s

Best trial: 55. Best value: 7.92227:  77%|███████▋  | 129/167 [23:19<06:09,  9.72s/it]

[I 2025-01-09 17:39:59,015] Trial 128 finished with value: 8.000246983654094 and parameters: {'lr': 0.0010578981109429216, 'beta1': 0.968, 'beta2': 0.904, 'activation': 'SeLU', 'n_linear': 2, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 192, 'weight_decay': 0.026459078712388277}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  78%|███████▊  | 130/167 [23:19<04:15,  6.91s/it]

[I 2025-01-09 17:39:59,368] Trial 129 finished with value: 8.099805504083633 and parameters: {'lr': 0.0021025252598625573, 'beta1': 0.918, 'beta2': 0.917, 'activation': 'ELU', 'n_linear': 2, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 320, 'weight_decay': 4.716039973715041e-05}. Best is trial 55 with value: 7.922266195690821.
Epoch 18/60 | Train loss: 7.9351 | Valid loss: 9.1022 | Expected time left: 7.30 s

Best trial: 55. Best value: 7.92227:  78%|███████▊  | 131/167 [23:23<03:30,  5.85s/it]

[I 2025-01-09 17:40:02,744] Trial 130 finished with value: 7.962071910500526 and parameters: {'lr': 0.00025785438683720324, 'beta1': 0.912, 'beta2': 0.9610000000000001, 'activation': 'ReLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 320, 'weight_decay': 0.017239007396785525}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9263 | Valid loss: 7.9843 | Expected time left: 0.58 ss

Best trial: 55. Best value: 7.92227:  79%|███████▉  | 132/167 [23:57<08:28, 14.54s/it]

[I 2025-01-09 17:40:37,551] Trial 131 finished with value: 7.9607506041314195 and parameters: {'lr': 0.004202808250827565, 'beta1': 0.9650000000000001, 'beta2': 0.912, 'activation': 'SeLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 64, 'weight_decay': 2.4115342201105617e-06}. Best is trial 55 with value: 7.922266195690821.
Epoch 57/60 | Train loss: 7.9990 | Valid loss: 8.0022 | Expected time left: 0.24 s

Best trial: 55. Best value: 7.92227:  80%|███████▉  | 133/167 [24:02<06:37, 11.68s/it]

[I 2025-01-09 17:40:42,572] Trial 132 finished with value: 8.001370468139648 and parameters: {'lr': 0.005940866598045098, 'beta1': 0.964, 'beta2': 0.928, 'activation': 'Tanh', 'n_linear': 3, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 416, 'weight_decay': 0.003918617410854137}. Best is trial 55 with value: 7.922266195690821.
Epoch 60/60 | Train loss: 7.9962 | Valid loss: 8.0042 | Expected time left: 0.00 s

Best trial: 55. Best value: 7.92227:  80%|████████  | 134/167 [24:08<05:27,  9.93s/it]

[I 2025-01-09 17:40:48,425] Trial 133 finished with value: 8.000676879176387 and parameters: {'lr': 0.00022639015610599372, 'beta1': 0.9540000000000001, 'beta2': 0.998, 'activation': 'ReLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 384, 'weight_decay': 0.0006246840804307461}. Best is trial 55 with value: 7.922266195690821.
Epoch 60/60 | Train loss: 8.0000 | Valid loss: 7.9999 | Expected time left: 0.00 s

Best trial: 55. Best value: 7.92227:  81%|████████  | 135/167 [24:14<04:33,  8.56s/it]

[I 2025-01-09 17:40:53,778] Trial 134 finished with value: 7.999913427564833 and parameters: {'lr': 0.0015159866641908737, 'beta1': 0.9500000000000001, 'beta2': 0.929, 'activation': 'ELU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 384, 'weight_decay': 0.8763339953012391}. Best is trial 55 with value: 7.922266195690821.
Epoch 6/60 | Train loss: 7.8890 | Valid loss: 8.0993 | Expected time left: 4.11 s

Best trial: 55. Best value: 7.92227:  81%|████████▏ | 136/167 [24:14<03:12,  6.22s/it]

[I 2025-01-09 17:40:54,549] Trial 135 finished with value: 8.036022090911866 and parameters: {'lr': 0.0001578905723959125, 'beta1': 0.9400000000000001, 'beta2': 0.98, 'activation': 'ELU', 'n_linear': 3, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 416, 'weight_decay': 1.766767837026464e-07}. Best is trial 55 with value: 7.922266195690821.
Epoch 60/60 | Train loss: 7.9972 | Valid loss: 8.0039 | Expected time left: 0.00 s

Best trial: 55. Best value: 7.92227:  82%|████████▏ | 137/167 [24:20<02:59,  5.99s/it]

[I 2025-01-09 17:40:59,980] Trial 136 finished with value: 8.001546564556303 and parameters: {'lr': 0.0031007558052625894, 'beta1': 0.92, 'beta2': 0.995, 'activation': 'ReLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 480, 'weight_decay': 0.0014168058432052566}. Best is trial 55 with value: 7.922266195690821.
Epoch 9/60 | Train loss: 7.9567 | Valid loss: 8.0880 | Expected time left: 4.42 s

Best trial: 55. Best value: 7.92227:  83%|████████▎ | 138/167 [24:21<02:11,  4.55s/it]

[I 2025-01-09 17:41:01,169] Trial 137 finished with value: 8.03504653275013 and parameters: {'lr': 0.0006068081377343336, 'beta1': 0.9410000000000001, 'beta2': 0.9690000000000001, 'activation': 'Tanh', 'n_linear': 2, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 320, 'weight_decay': 0.0008519068605722872}. Best is trial 55 with value: 7.922266195690821.
Epoch 4/60 | Train loss: 8.0224 | Valid loss: 8.0704 | Expected time left: 3.27 s

Best trial: 55. Best value: 7.92227:  83%|████████▎ | 139/167 [24:22<01:34,  3.38s/it]

[I 2025-01-09 17:41:01,817] Trial 138 finished with value: 8.059339561462401 and parameters: {'lr': 0.0013222701344779886, 'beta1': 0.96, 'beta2': 0.9670000000000001, 'activation': 'ELU', 'n_linear': 2, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 416, 'weight_decay': 0.0004177181972920523}. Best is trial 55 with value: 7.922266195690821.
Epoch 58/60 | Train loss: 7.9992 | Valid loss: 8.0004 | Expected time left: 0.29 s

Best trial: 55. Best value: 7.92227:  84%|████████▍ | 140/167 [24:31<02:16,  5.04s/it]

[I 2025-01-09 17:41:10,752] Trial 139 finished with value: 8.000346775712638 and parameters: {'lr': 0.0007592730689079476, 'beta1': 0.914, 'beta2': 0.932, 'activation': 'Tanh', 'n_linear': 7, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 352, 'weight_decay': 0.031590851101574266}. Best is trial 55 with value: 7.922266195690821.
Epoch 58/60 | Train loss: 7.9935 | Valid loss: 8.0069 | Expected time left: 0.34 s

Best trial: 55. Best value: 7.92227:  84%|████████▍ | 141/167 [24:41<02:52,  6.62s/it]

[I 2025-01-09 17:41:21,061] Trial 140 finished with value: 8.003256366366433 and parameters: {'lr': 0.00017245364648845978, 'beta1': 0.9400000000000001, 'beta2': 0.983, 'activation': 'ELU', 'n_linear': 4, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 160, 'weight_decay': 0.0004232055589503022}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  85%|████████▌ | 142/167 [24:41<01:58,  4.74s/it]

[I 2025-01-09 17:41:21,422] Trial 141 finished with value: 8.103331495214391 and parameters: {'lr': 0.0003957775421942578, 'beta1': 0.994, 'beta2': 0.925, 'activation': 'SeLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 384, 'weight_decay': 0.00029869507819086944}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 8.0144 | Valid loss: 8.0229 | Expected time left: 0.20 s

Best trial: 55. Best value: 7.92227:  86%|████████▌ | 143/167 [24:53<02:45,  6.91s/it]

[I 2025-01-09 17:41:33,391] Trial 142 finished with value: 8.014065417032393 and parameters: {'lr': 0.0007476552809469429, 'beta1': 0.972, 'beta2': 0.976, 'activation': 'Tanh', 'n_linear': 6, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 160, 'weight_decay': 5.356135971168273e-06}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  86%|████████▌ | 144/167 [24:54<01:53,  4.95s/it]

[I 2025-01-09 17:41:33,767] Trial 143 finished with value: 8.25453041621617 and parameters: {'lr': 0.006493953298280694, 'beta1': 0.924, 'beta2': 0.927, 'activation': 'ELU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 288, 'weight_decay': 7.545173281958506e-05}. Best is trial 55 with value: 7.922266195690821.
Epoch 58/60 | Train loss: 7.9967 | Valid loss: 8.0024 | Expected time left: 0.27 s

Best trial: 55. Best value: 7.92227:  87%|████████▋ | 145/167 [25:02<02:10,  5.95s/it]

[I 2025-01-09 17:41:42,057] Trial 144 finished with value: 8.000741943480476 and parameters: {'lr': 0.0004950928478419477, 'beta1': 0.989, 'beta2': 0.902, 'activation': 'SeLU', 'n_linear': 2, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 160, 'weight_decay': 0.0037950911424326312}. Best is trial 55 with value: 7.922266195690821.
Epoch 12/60 | Train loss: 7.3428 | Valid loss: 8.0888 | Expected time left: 4.67 s

Best trial: 55. Best value: 7.92227:  87%|████████▋ | 146/167 [25:03<01:36,  4.62s/it]

[I 2025-01-09 17:41:43,560] Trial 145 finished with value: 7.939815643855503 and parameters: {'lr': 0.00024899765986501266, 'beta1': 0.9560000000000001, 'beta2': 0.9460000000000001, 'activation': 'ReLU', 'n_linear': 3, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 288, 'weight_decay': 6.358901577783479e-07}. Best is trial 55 with value: 7.922266195690821.
Epoch 40/60 | Train loss: 7.9892 | Valid loss: 8.0237 | Expected time left: 2.92 s

Best trial: 55. Best value: 7.92227:  88%|████████▊ | 147/167 [25:10<01:42,  5.15s/it]

[I 2025-01-09 17:41:49,942] Trial 146 finished with value: 8.003728222846984 and parameters: {'lr': 0.005251451836209499, 'beta1': 0.972, 'beta2': 0.906, 'activation': 'ReLU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 256, 'weight_decay': 7.62708859466025e-05}. Best is trial 55 with value: 7.922266195690821.
Epoch 57/60 | Train loss: 8.0062 | Valid loss: 8.0093 | Expected time left: 0.22 s

Best trial: 55. Best value: 7.92227:  89%|████████▊ | 148/167 [25:14<01:35,  5.02s/it]

[I 2025-01-09 17:41:54,652] Trial 147 finished with value: 8.00577096939087 and parameters: {'lr': 0.0020042271193278075, 'beta1': 0.9540000000000001, 'beta2': 0.906, 'activation': 'SeLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 416, 'weight_decay': 0.00043221617589459584}. Best is trial 55 with value: 7.922266195690821.
Epoch 58/60 | Train loss: 7.9963 | Valid loss: 8.0051 | Expected time left: 0.22 s

Best trial: 55. Best value: 7.92227:  89%|████████▉ | 149/167 [25:21<01:40,  5.59s/it]

[I 2025-01-09 17:42:01,582] Trial 148 finished with value: 8.002840314592634 and parameters: {'lr': 0.00025689907959532365, 'beta1': 0.987, 'beta2': 0.987, 'activation': 'Tanh', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 288, 'weight_decay': 0.00030527241067667863}. Best is trial 55 with value: 7.922266195690821.
Epoch 57/60 | Train loss: 7.9893 | Valid loss: 8.0090 | Expected time left: 0.21 s

Best trial: 55. Best value: 7.92227:  90%|████████▉ | 150/167 [25:26<01:28,  5.23s/it]

[I 2025-01-09 17:42:05,971] Trial 149 finished with value: 8.00375394821167 and parameters: {'lr': 0.0001625351887424124, 'beta1': 0.9650000000000001, 'beta2': 0.982, 'activation': 'ELU', 'n_linear': 5, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 416, 'weight_decay': 5.912801625145088e-05}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 8.0002 | Valid loss: 8.0002 | Expected time left: 0.35 ss

Best trial: 55. Best value: 7.92227:  90%|█████████ | 151/167 [25:47<02:41, 10.12s/it]

[I 2025-01-09 17:42:27,500] Trial 150 finished with value: 7.999791313353039 and parameters: {'lr': 0.006763634594859098, 'beta1': 0.9530000000000001, 'beta2': 0.915, 'activation': 'Tanh', 'n_linear': 8, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 96, 'weight_decay': 0.1811039697580067}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  91%|█████████ | 152/167 [25:48<01:47,  7.19s/it]

[I 2025-01-09 17:42:27,870] Trial 151 finished with value: 8.199387813436575 and parameters: {'lr': 0.0020350969557990763, 'beta1': 0.915, 'beta2': 0.9440000000000001, 'activation': 'ELU', 'n_linear': 5, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 352, 'weight_decay': 1.4510563132798571e-06}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  92%|█████████▏| 153/167 [25:48<01:11,  5.13s/it]

[I 2025-01-09 17:42:28,190] Trial 152 finished with value: 8.211108083310334 and parameters: {'lr': 0.005805712378716848, 'beta1': 0.9420000000000001, 'beta2': 0.916, 'activation': 'Tanh', 'n_linear': 8, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 448, 'weight_decay': 0.0006103181492207736}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9992 | Valid loss: 8.0065 | Expected time left: 0.58 ss

Best trial: 55. Best value: 7.92227:  92%|█████████▏| 154/167 [26:23<03:03, 14.15s/it]

[I 2025-01-09 17:43:03,365] Trial 153 finished with value: 8.00330559615117 and parameters: {'lr': 0.0026332106155754695, 'beta1': 0.931, 'beta2': 0.9470000000000001, 'activation': 'ReLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 64, 'weight_decay': 4.7765990650025255e-06}. Best is trial 55 with value: 7.922266195690821.
Epoch 14/60 | Train loss: 7.1709 | Valid loss: 8.0966 | Expected time left: 6.24 s

Best trial: 55. Best value: 7.92227:  93%|█████████▎| 155/167 [26:25<02:07, 10.59s/it]

[I 2025-01-09 17:43:05,653] Trial 154 finished with value: 8.045301371607287 and parameters: {'lr': 0.00019228436816187694, 'beta1': 0.934, 'beta2': 0.9450000000000001, 'activation': 'ReLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 352, 'weight_decay': 1.8868097336917643e-05}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 8.0010 | Valid loss: 8.0073 | Expected time left: 1.48 ss

Best trial: 55. Best value: 7.92227:  93%|█████████▎| 156/167 [27:54<06:15, 34.11s/it]

[I 2025-01-09 17:44:34,658] Trial 155 finished with value: 8.006967186546936 and parameters: {'lr': 0.002358600538571802, 'beta1': 0.906, 'beta2': 0.917, 'activation': 'ReLU', 'n_linear': 6, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 32, 'weight_decay': 1.341605434091491e-05}. Best is trial 55 with value: 7.922266195690821.
Epoch 58/60 | Train loss: 8.0000 | Valid loss: 8.0001 | Expected time left: 0.30 s

Best trial: 55. Best value: 7.92227:  93%|█████████▎| 156/167 [28:04<06:15, 34.11s/it]

[I 2025-01-09 17:44:43,981] Trial 156 finished with value: 7.999926161766052 and parameters: {'lr': 0.002024536360482818, 'beta1': 0.993, 'beta2': 0.987, 'activation': 'SeLU', 'n_linear': 8, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 256, 'weight_decay': 0.11270554064876723}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  94%|█████████▍| 157/167 [28:04<04:26, 26.68s/it]

Epoch 60/60 | Train loss: 7.9995 | Valid loss: 8.0002 | Expected time left: 0.00 s

Best trial: 55. Best value: 7.92227:  95%|█████████▍| 158/167 [28:09<03:02, 20.30s/it]

[I 2025-01-09 17:44:49,395] Trial 157 finished with value: 8.000094184875488 and parameters: {'lr': 0.0012396424519529128, 'beta1': 0.9380000000000001, 'beta2': 0.997, 'activation': 'Tanh', 'n_linear': 8, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 416, 'weight_decay': 0.04308711718717469}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9937 | Valid loss: 8.0062 | Expected time left: 0.75 ss

Best trial: 55. Best value: 7.92227:  95%|█████████▌| 159/167 [28:55<03:42, 27.86s/it]

[I 2025-01-09 17:45:34,889] Trial 158 finished with value: 8.00444726212718 and parameters: {'lr': 0.00010864584229277542, 'beta1': 0.996, 'beta2': 0.972, 'activation': 'Tanh', 'n_linear': 2, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 32, 'weight_decay': 0.0001911506407981056}. Best is trial 55 with value: 7.922266195690821.
Epoch 40/60 | Train loss: 7.8856 | Valid loss: 7.9863 | Expected time left: 16.06 s

Best trial: 55. Best value: 7.92227:  96%|█████████▌| 160/167 [29:28<03:26, 29.45s/it]

[I 2025-01-09 17:46:08,063] Trial 159 finished with value: 7.926158182180611 and parameters: {'lr': 0.0004004336091738337, 'beta1': 0.9590000000000001, 'beta2': 0.929, 'activation': 'Tanh', 'n_linear': 7, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 64, 'weight_decay': 4.7486612441536005e-06}. Best is trial 55 with value: 7.922266195690821.
Epoch 58/60 | Train loss: 8.0000 | Valid loss: 8.0004 | Expected time left: 0.22 s

Best trial: 55. Best value: 7.92227:  96%|█████████▋| 161/167 [29:35<02:15, 22.63s/it]

[I 2025-01-09 17:46:14,767] Trial 160 finished with value: 8.00003035315152 and parameters: {'lr': 0.003882903142725179, 'beta1': 0.903, 'beta2': 0.9580000000000001, 'activation': 'ReLU', 'n_linear': 7, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 352, 'weight_decay': 0.06382746486447297}. Best is trial 55 with value: 7.922266195690821.
Epoch 60/60 | Train loss: 7.9994 | Valid loss: 8.0019 | Expected time left: 0.00 s

Best trial: 55. Best value: 7.92227:  97%|█████████▋| 162/167 [29:41<01:29, 17.85s/it]

[I 2025-01-09 17:46:21,466] Trial 161 finished with value: 8.000990629196167 and parameters: {'lr': 0.005727403763647467, 'beta1': 0.9570000000000001, 'beta2': 0.9510000000000001, 'activation': 'Tanh', 'n_linear': 6, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 320, 'weight_decay': 0.00828774560119825}. Best is trial 55 with value: 7.922266195690821.
Epoch 18/60 | Train loss: 7.9137 | Valid loss: 8.0991 | Expected time left: 12.42 s

Best trial: 55. Best value: 7.92227:  98%|█████████▊| 163/167 [29:47<00:57, 14.25s/it]

[I 2025-01-09 17:46:27,324] Trial 162 finished with value: 8.022316190931532 and parameters: {'lr': 0.0002984468069270451, 'beta1': 0.9510000000000001, 'beta2': 0.91, 'activation': 'Tanh', 'n_linear': 6, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 160, 'weight_decay': 3.4117064177356475e-05}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 8.0097 | Valid loss: 8.0326 | Expected time left: 0.84 ss

Best trial: 55. Best value: 7.92227:  98%|█████████▊| 164/167 [30:38<01:15, 25.17s/it]

[I 2025-01-09 17:47:17,955] Trial 163 finished with value: 8.004708003693114 and parameters: {'lr': 0.0008810219121568485, 'beta1': 0.904, 'beta2': 0.914, 'activation': 'SeLU', 'n_linear': 3, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 32, 'weight_decay': 1.1314835087575435e-07}. Best is trial 55 with value: 7.922266195690821.
Epoch 59/60 | Train loss: 7.9918 | Valid loss: 8.0120 | Expected time left: 0.42 ss

Best trial: 55. Best value: 7.92227:  99%|█████████▉| 165/167 [31:03<00:50, 25.17s/it]

[I 2025-01-09 17:47:43,137] Trial 164 finished with value: 8.003247661955038 and parameters: {'lr': 0.000274244305309583, 'beta1': 0.903, 'beta2': 0.91, 'activation': 'SeLU', 'n_linear': 4, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 64, 'weight_decay': 2.6095282310615847e-05}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227:  99%|█████████▉| 166/167 [31:03<00:17, 17.73s/it]

[I 2025-01-09 17:47:43,517] Trial 165 finished with value: 8.139888048171997 and parameters: {'lr': 0.0010347224631409363, 'beta1': 0.987, 'beta2': 0.9490000000000001, 'activation': 'SeLU', 'n_linear': 4, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 320, 'weight_decay': 1.4642290706482846e-05}. Best is trial 55 with value: 7.922266195690821.


Best trial: 55. Best value: 7.92227: 100%|██████████| 167/167 [31:04<00:00, 11.16s/it]

[I 2025-01-09 17:47:43,852] Trial 166 finished with value: 8.554567074775695 and parameters: {'lr': 0.0044420226473872925, 'beta1': 0.995, 'beta2': 0.982, 'activation': 'SeLU', 'n_linear': 2, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 256, 'weight_decay': 3.8315204893889454e-05}. Best is trial 55 with value: 7.922266195690821.





In [2]:
def objective(trial):
    n_classes = 256
    signal_length = 700
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)
    beta1 = trial.suggest_float('beta1', 0.9, 0.999, step=0.001)
    beta2 = trial.suggest_float('beta2', 0.9, 0.999, step=0.001)
    n_epochs = 60
    activation = trial.suggest_categorical('activation', ['ReLU', 'SeLU', 'ELU', 'Tanh'])
    n_linear = trial.suggest_int('n_linear', 2, 5)
    n_conv = trial.suggest_int('n_conv', 1, 5)
    conv_filter_type = trial.suggest_categorical('conv_filter_type', ['increasing', 'increasing_clipped', 'same'])
    kernel_size = trial.suggest_int('kernel_size', 5, 17, step=2)
    conv_filter_size = trial.suggest_int('conv_filter_size', 10,20, step=10)
    linear_size = trial.suggest_categorical('linear_size', [256])
    input_bn = trial.suggest_categorical('input_bn', [True, False])
    dense_bn = trial.suggest_categorical('dense_bn', [True, False])
    batch_size = trial.suggest_int('batch_size', 32, 512, step=32)
    pooling_size = trial.suggest_int('pooling_size', 2, 3)
    global_pool = trial.suggest_categorical('global_pool', [True, False])
    conv_bn = trial.suggest_categorical('conv_bn', [True, False])
    weight_decay = trial.suggest_float('weight_decay', 1e-7, 1, log=True)
    # rho = 0.08#trial.suggest_float('rho', 0.0, 0.1, step=0.01)


    train_loader, attack_loader, _, _, _, _ = load_ascad_data(device, batch_size=batch_size)

    try:
        model = CNNModel(n_classes, signal_length, n_linear=n_linear, 
                         n_conv=n_conv, conv_filter_type=conv_filter_type, 
                         kernel_size=kernel_size, conv_filter_size=conv_filter_size,
                         pooling_size=pooling_size, pooling_stride=None,
                         linear_size=linear_size, activation=activation, global_pool=global_pool,
                         input_bn=input_bn, dense_bn=dense_bn, conv_bn=conv_bn).to(device)

        optimizer = Scoop(model.parameters(), lr=lr, 
                          betas=(beta1, beta2), 
                          weight_decay=weight_decay, estimator='low_variance',
                          hessian_iter=1)
        adam_optimizer = torch.optim.Adam(model.parameters(), lr=lr,
                                            betas=(beta1, beta2), weight_decay=weight_decay)

        path_model = 'scoop_vs_adam/ascadv1_fintuning_scoop_cnn_' + str(trial.number) + '.pt'
        train_loss, val_loss, _ = train_model_classic(model, adam_optimizer, n_epochs, train_loader, attack_loader, verbose=True, path=path_model, device=device, MLP=False, finetuning=True, entropy=8)
        trial.set_user_attr('val_loss', val_loss)
        trial.set_user_attr('train_loss', train_loss)

        return np.mean(np.sort(np.array(val_loss))[:1])
    except:
        return np.nan

In [3]:
storage_name = f"sqlite:///ascadv1_finetuning_scoop_comp.db"
tpe_sampler = optuna.samplers.TPESampler(seed=seed, n_startup_trials=10)
# random sampler
sampler = optuna.samplers.RandomSampler(seed=seed)
optuna.logging.set_verbosity(optuna.logging.INFO)
study = optuna.create_study(direction="minimize", storage=storage_name, study_name="adam_cnn1", load_if_exists=True, sampler=sampler) 
study.optimize(objective, n_trials=98, n_jobs=1, show_progress_bar=True) 

[I 2025-01-10 07:43:54,684] A new study created in RDB with name: adam_cnn1
  0%|          | 0/98 [00:00<?, ?it/s]

Epoch 59/60 | Train loss: 7.9960 | Valid loss: 8.0056 | Expected time left: 1.05 ss

Best trial: 0. Best value: 8.00171:   1%|          | 1/98 [01:03<1:43:00, 63.72s/it]

[I 2025-01-10 07:44:58,400] Trial 0 finished with value: 8.001707490285238 and parameters: {'lr': 0.0005611516415334506, 'beta1': 0.995, 'beta2': 0.973, 'activation': 'ReLU', 'n_linear': 5, 'n_conv': 4, 'conv_filter_type': 'same', 'kernel_size': 15, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 224, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.0001557721770269301}. Best is trial 0 with value: 8.001707490285238.


Best trial: 0. Best value: 8.00171:   2%|▏         | 2/98 [01:08<46:33, 29.10s/it]  

[I 2025-01-10 07:45:03,273] Trial 1 finished with value: 15.287322680155436 and parameters: {'lr': 0.0037183641805732083, 'beta1': 0.919, 'beta2': 0.9510000000000001, 'activation': 'ELU', 'n_linear': 2, 'n_conv': 5, 'conv_filter_type': 'increasing', 'kernel_size': 5, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 480, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 1.9678010532114947e-06}. Best is trial 0 with value: 8.001707490285238.
Epoch 4/60 | Train loss: 8.0045 | Valid loss: 8.0045 | Expected time left: 21.92 s

Best trial: 0. Best value: 8.00171:   3%|▎         | 3/98 [01:10<26:39, 16.84s/it]

[I 2025-01-10 07:45:05,521] Trial 2 finished with value: 8.002767635758515 and parameters: {'lr': 0.00869299151113955, 'beta1': 0.977, 'beta2': 0.993, 'activation': 'ELU', 'n_linear': 2, 'n_conv': 1, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 15, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 64, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.008871588860587615}. Best is trial 0 with value: 8.001707490285238.


Best trial: 0. Best value: 8.00171:   4%|▍         | 4/98 [01:11<16:41, 10.65s/it]

[I 2025-01-10 07:45:06,681] Trial 3 finished with value: 8.197179440089634 and parameters: {'lr': 0.0028708753481954683, 'beta1': 0.977, 'beta2': 0.907, 'activation': 'ELU', 'n_linear': 3, 'n_conv': 1, 'conv_filter_type': 'same', 'kernel_size': 13, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 288, 'pooling_size': 3, 'global_pool': False, 'conv_bn': True, 'weight_decay': 5.691673629899884e-07}. Best is trial 0 with value: 8.001707490285238.
Epoch 11/60 | Train loss: 7.8348 | Valid loss: 8.0753 | Expected time left: 17.88 s

Best trial: 0. Best value: 8.00171:   5%|▌         | 5/98 [01:16<13:09,  8.49s/it]

[I 2025-01-10 07:45:11,352] Trial 4 finished with value: 8.030732563563756 and parameters: {'lr': 0.0001155735281626988, 'beta1': 0.9630000000000001, 'beta2': 0.931, 'activation': 'SeLU', 'n_linear': 5, 'n_conv': 2, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 17, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 480, 'pooling_size': 3, 'global_pool': False, 'conv_bn': True, 'weight_decay': 3.940452872934759e-06}. Best is trial 0 with value: 8.001707490285238.
Epoch 1/60 | Train loss: 8.1082 | Valid loss: 8.0902 | Expected time left: 22.65 s

Best trial: 0. Best value: 8.00171:   6%|▌         | 6/98 [01:17<09:07,  5.95s/it]

[I 2025-01-10 07:45:12,373] Trial 5 finished with value: 8.090187890189034 and parameters: {'lr': 0.0007148510793512986, 'beta1': 0.981, 'beta2': 0.986, 'activation': 'SeLU', 'n_linear': 2, 'n_conv': 2, 'conv_filter_type': 'increasing', 'kernel_size': 13, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 160, 'pooling_size': 2, 'global_pool': False, 'conv_bn': True, 'weight_decay': 8.923294976546345e-06}. Best is trial 0 with value: 8.001707490285238.


Best trial: 0. Best value: 8.00171:   7%|▋         | 7/98 [01:19<07:02,  4.64s/it]

[I 2025-01-10 07:45:14,322] Trial 6 finished with value: 8.15932211555993 and parameters: {'lr': 0.0065543823435462835, 'beta1': 0.923, 'beta2': 0.914, 'activation': 'SeLU', 'n_linear': 5, 'n_conv': 2, 'conv_filter_type': 'increasing', 'kernel_size': 13, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 32, 'pooling_size': 3, 'global_pool': True, 'conv_bn': True, 'weight_decay': 0.0032825362654054194}. Best is trial 0 with value: 8.001707490285238.
Epoch 59/60 | Train loss: 7.9988 | Valid loss: 8.0004 | Expected time left: 1.52 ss

Best trial: 7. Best value: 8.00042:   8%|▊         | 8/98 [02:51<48:34, 32.38s/it]

[I 2025-01-10 07:46:46,092] Trial 7 finished with value: 8.000416032199201 and parameters: {'lr': 0.00022321987366901572, 'beta1': 0.9690000000000001, 'beta2': 0.9380000000000001, 'activation': 'ReLU', 'n_linear': 5, 'n_conv': 5, 'conv_filter_type': 'same', 'kernel_size': 11, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 352, 'pooling_size': 2, 'global_pool': False, 'conv_bn': True, 'weight_decay': 0.028782520390527488}. Best is trial 7 with value: 8.000416032199201.
Epoch 59/60 | Train loss: 7.9968 | Valid loss: 8.0064 | Expected time left: 0.52 ss

Best trial: 7. Best value: 8.00042:   9%|▉         | 9/98 [03:22<47:36, 32.10s/it]

[I 2025-01-10 07:47:17,564] Trial 8 finished with value: 8.001841577990302 and parameters: {'lr': 0.0019233720128877185, 'beta1': 0.908, 'beta2': 0.916, 'activation': 'ReLU', 'n_linear': 4, 'n_conv': 1, 'conv_filter_type': 'same', 'kernel_size': 13, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 352, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 7.184831273965058e-06}. Best is trial 7 with value: 8.000416032199201.


Best trial: 7. Best value: 8.00042:  10%|█         | 10/98 [03:38<39:28, 26.91s/it]

[I 2025-01-10 07:47:32,870] Trial 9 finished with value: 9.516930022329655 and parameters: {'lr': 0.000307595010687204, 'beta1': 0.997, 'beta2': 0.9390000000000001, 'activation': 'ReLU', 'n_linear': 4, 'n_conv': 3, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 5, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 192, 'pooling_size': 2, 'global_pool': True, 'conv_bn': True, 'weight_decay': 0.09355482446969178}. Best is trial 7 with value: 8.000416032199201.
Epoch 59/60 | Train loss: 7.9962 | Valid loss: 8.0046 | Expected time left: 0.91 ss

Best trial: 7. Best value: 8.00042:  11%|█         | 11/98 [04:32<51:23, 35.44s/it]

[I 2025-01-10 07:48:27,650] Trial 10 finished with value: 8.002412742038942 and parameters: {'lr': 0.0003880590213919323, 'beta1': 0.9380000000000001, 'beta2': 0.985, 'activation': 'Tanh', 'n_linear': 4, 'n_conv': 3, 'conv_filter_type': 'same', 'kernel_size': 5, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 192, 'pooling_size': 2, 'global_pool': False, 'conv_bn': False, 'weight_decay': 0.0003796617034906455}. Best is trial 7 with value: 8.000416032199201.
Epoch 59/60 | Train loss: 7.9976 | Valid loss: 8.0152 | Expected time left: 0.22 ss

Best trial: 7. Best value: 8.00042:  12%|█▏        | 12/98 [04:46<41:18, 28.82s/it]

[I 2025-01-10 07:48:41,333] Trial 11 finished with value: 8.00486262339466 and parameters: {'lr': 0.001007007231645657, 'beta1': 0.979, 'beta2': 0.964, 'activation': 'ELU', 'n_linear': 3, 'n_conv': 1, 'conv_filter_type': 'increasing', 'kernel_size': 11, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 192, 'pooling_size': 2, 'global_pool': False, 'conv_bn': False, 'weight_decay': 3.9576032587089985e-07}. Best is trial 7 with value: 8.000416032199201.
Epoch 59/60 | Train loss: 8.0000 | Valid loss: 8.0000 | Expected time left: 20.59 sss

Best trial: 12. Best value: 7.99999:  13%|█▎        | 13/98 [25:22<9:18:48, 394.46s/it]

[I 2025-01-10 08:09:17,130] Trial 12 finished with value: 7.999992488679432 and parameters: {'lr': 0.00012687131530893567, 'beta1': 0.9530000000000001, 'beta2': 0.9540000000000001, 'activation': 'ELU', 'n_linear': 3, 'n_conv': 4, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 5, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 96, 'pooling_size': 2, 'global_pool': False, 'conv_bn': True, 'weight_decay': 0.48312216705541605}. Best is trial 12 with value: 7.999992488679432.


Best trial: 12. Best value: 7.99999:  14%|█▍        | 14/98 [25:23<6:25:52, 275.63s/it]

[I 2025-01-10 08:09:18,173] Trial 13 finished with value: 8.11689510345459 and parameters: {'lr': 0.0029908444973404313, 'beta1': 0.9550000000000001, 'beta2': 0.9610000000000001, 'activation': 'Tanh', 'n_linear': 2, 'n_conv': 1, 'conv_filter_type': 'same', 'kernel_size': 13, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 224, 'pooling_size': 2, 'global_pool': False, 'conv_bn': False, 'weight_decay': 0.0024044702610177535}. Best is trial 12 with value: 7.999992488679432.


Best trial: 12. Best value: 7.99999:  15%|█▌        | 15/98 [25:24<4:26:55, 192.96s/it]

[I 2025-01-10 08:09:19,547] Trial 14 finished with value: 8.168191414845142 and parameters: {'lr': 0.0010145478086525616, 'beta1': 0.985, 'beta2': 0.9650000000000001, 'activation': 'ELU', 'n_linear': 4, 'n_conv': 5, 'conv_filter_type': 'same', 'kernel_size': 11, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 128, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 3.150072426599281e-07}. Best is trial 12 with value: 7.999992488679432.


Best trial: 12. Best value: 7.99999:  16%|█▋        | 16/98 [25:29<3:06:19, 136.33s/it]

[I 2025-01-10 08:09:24,382] Trial 15 finished with value: 16.08135440826416 and parameters: {'lr': 0.00043446146328325033, 'beta1': 0.984, 'beta2': 0.902, 'activation': 'ReLU', 'n_linear': 4, 'n_conv': 5, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 7, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 416, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.019133751030274922}. Best is trial 12 with value: 7.999992488679432.
Epoch 59/60 | Train loss: 7.5423 | Valid loss: 7.9849 | Expected time left: 0.95 ss

Best trial: 16. Best value: 7.88937:  17%|█▋        | 17/98 [26:26<2:31:56, 112.55s/it]

[I 2025-01-10 08:10:21,628] Trial 16 finished with value: 7.88936815590694 and parameters: {'lr': 0.0001607858173371064, 'beta1': 0.99, 'beta2': 0.9500000000000001, 'activation': 'ELU', 'n_linear': 2, 'n_conv': 5, 'conv_filter_type': 'same', 'kernel_size': 17, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 352, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 2.5287815842822377e-07}. Best is trial 16 with value: 7.88936815590694.
Epoch 3/60 | Train loss: 8.0750 | Valid loss: 8.0724 | Expected time left: 16.37 s

Best trial: 16. Best value: 7.88937:  18%|█▊        | 18/98 [26:28<1:45:33, 79.17s/it] 

[I 2025-01-10 08:10:23,083] Trial 17 finished with value: 8.072372814394393 and parameters: {'lr': 0.001256197036859384, 'beta1': 0.9440000000000001, 'beta2': 0.988, 'activation': 'Tanh', 'n_linear': 4, 'n_conv': 1, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 15, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 192, 'pooling_size': 3, 'global_pool': False, 'conv_bn': True, 'weight_decay': 3.841536968266002e-07}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9988 | Valid loss: 8.0052 | Expected time left: 0.69 ss

Best trial: 16. Best value: 7.88937:  19%|█▉        | 19/98 [27:09<1:29:22, 67.88s/it]

[I 2025-01-10 08:11:04,671] Trial 18 finished with value: 8.003198183035549 and parameters: {'lr': 0.003583387974072159, 'beta1': 0.9550000000000001, 'beta2': 0.9420000000000001, 'activation': 'ReLU', 'n_linear': 3, 'n_conv': 1, 'conv_filter_type': 'same', 'kernel_size': 15, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 128, 'pooling_size': 3, 'global_pool': False, 'conv_bn': False, 'weight_decay': 0.0004942057153676933}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 8.0000 | Valid loss: 8.0001 | Expected time left: 19.59 ss

Best trial: 16. Best value: 7.88937:  20%|██        | 20/98 [46:45<8:40:38, 400.49s/it]

[I 2025-01-10 08:30:40,378] Trial 19 finished with value: 7.999836306028728 and parameters: {'lr': 0.009681208919128432, 'beta1': 0.907, 'beta2': 0.9550000000000001, 'activation': 'ReLU', 'n_linear': 3, 'n_conv': 4, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 7, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 128, 'pooling_size': 2, 'global_pool': False, 'conv_bn': False, 'weight_decay': 0.8007099243138588}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9999 | Valid loss: 8.0002 | Expected time left: 4.73 sss

Best trial: 16. Best value: 7.88937:  21%|██▏       | 21/98 [51:30<7:49:14, 365.64s/it]

[I 2025-01-10 08:35:24,762] Trial 20 finished with value: 7.999938583374023 and parameters: {'lr': 0.002490711511896317, 'beta1': 0.9530000000000001, 'beta2': 0.93, 'activation': 'Tanh', 'n_linear': 5, 'n_conv': 5, 'conv_filter_type': 'increasing', 'kernel_size': 17, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 512, 'pooling_size': 2, 'global_pool': True, 'conv_bn': True, 'weight_decay': 0.07384157187150384}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9990 | Valid loss: 8.0005 | Expected time left: 0.23 ss

Best trial: 16. Best value: 7.88937:  22%|██▏       | 22/98 [51:43<5:29:25, 260.07s/it]

[I 2025-01-10 08:35:38,630] Trial 21 finished with value: 8.000428199768066 and parameters: {'lr': 0.0008657376792270707, 'beta1': 0.9410000000000001, 'beta2': 0.927, 'activation': 'Tanh', 'n_linear': 5, 'n_conv': 3, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 7, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 352, 'pooling_size': 3, 'global_pool': True, 'conv_bn': True, 'weight_decay': 0.025481720169780286}. Best is trial 16 with value: 7.88936815590694.
Epoch 60/60 | Train loss: 7.9998 | Valid loss: 8.0001 | Expected time left: 0.00 s

Best trial: 16. Best value: 7.88937:  23%|██▎       | 23/98 [51:53<3:51:16, 185.02s/it]

[I 2025-01-10 08:35:48,599] Trial 22 finished with value: 8.000052883511497 and parameters: {'lr': 0.0010973041007839744, 'beta1': 0.985, 'beta2': 0.9550000000000001, 'activation': 'SeLU', 'n_linear': 2, 'n_conv': 4, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 5, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 480, 'pooling_size': 2, 'global_pool': False, 'conv_bn': False, 'weight_decay': 0.10877922521984192}. Best is trial 16 with value: 7.88936815590694.
Epoch 23/60 | Train loss: 8.0140 | Valid loss: 8.0320 | Expected time left: 9.37 ss

Best trial: 16. Best value: 7.88937:  24%|██▍       | 24/98 [52:00<2:42:04, 131.41s/it]

[I 2025-01-10 08:35:54,973] Trial 23 finished with value: 8.011829889045572 and parameters: {'lr': 0.007925766022280026, 'beta1': 0.914, 'beta2': 0.992, 'activation': 'Tanh', 'n_linear': 3, 'n_conv': 2, 'conv_filter_type': 'increasing', 'kernel_size': 5, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 192, 'pooling_size': 3, 'global_pool': False, 'conv_bn': False, 'weight_decay': 1.9323256280070807e-07}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9965 | Valid loss: 8.0019 | Expected time left: 0.78 ss

Best trial: 16. Best value: 7.88937:  26%|██▌       | 25/98 [52:47<2:09:10, 106.17s/it]

[I 2025-01-10 08:36:42,248] Trial 24 finished with value: 8.000920162200927 and parameters: {'lr': 0.00021770586351835763, 'beta1': 0.927, 'beta2': 0.917, 'activation': 'ELU', 'n_linear': 3, 'n_conv': 3, 'conv_filter_type': 'same', 'kernel_size': 13, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 416, 'pooling_size': 3, 'global_pool': False, 'conv_bn': False, 'weight_decay': 0.002127077926183298}. Best is trial 16 with value: 7.88936815590694.
Epoch 5/60 | Train loss: 8.0305 | Valid loss: 8.0757 | Expected time left: 48.90 s

Best trial: 16. Best value: 7.88937:  27%|██▋       | 26/98 [52:53<1:31:12, 76.01s/it] 

[I 2025-01-10 08:36:47,884] Trial 25 finished with value: 8.03853355263764 and parameters: {'lr': 0.0005467984554504091, 'beta1': 0.9460000000000001, 'beta2': 0.974, 'activation': 'Tanh', 'n_linear': 4, 'n_conv': 3, 'conv_filter_type': 'same', 'kernel_size': 7, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 192, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.0003083865244878966}. Best is trial 16 with value: 7.88936815590694.


Best trial: 16. Best value: 7.88937:  28%|██▊       | 27/98 [52:54<1:03:26, 53.61s/it]

[I 2025-01-10 08:36:49,229] Trial 26 finished with value: 8.136750230335053 and parameters: {'lr': 0.0001492246524838933, 'beta1': 0.9530000000000001, 'beta2': 0.9580000000000001, 'activation': 'ReLU', 'n_linear': 3, 'n_conv': 4, 'conv_filter_type': 'same', 'kernel_size': 13, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 96, 'pooling_size': 2, 'global_pool': False, 'conv_bn': False, 'weight_decay': 7.460669282028748e-05}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 8.0030 | Valid loss: 8.0072 | Expected time left: 19.58 ss

Best trial: 16. Best value: 7.88937:  29%|██▊       | 28/98 [1:12:29<7:35:10, 390.14s/it]

[I 2025-01-10 08:56:24,574] Trial 27 finished with value: 8.005438146711905 and parameters: {'lr': 0.009220558632540494, 'beta1': 0.911, 'beta2': 0.9390000000000001, 'activation': 'ReLU', 'n_linear': 2, 'n_conv': 4, 'conv_filter_type': 'increasing', 'kernel_size': 7, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 128, 'pooling_size': 2, 'global_pool': True, 'conv_bn': True, 'weight_decay': 0.0002333354435718439}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9976 | Valid loss: 8.0043 | Expected time left: 0.57 ss

Best trial: 16. Best value: 7.88937:  30%|██▉       | 29/98 [1:13:04<5:25:53, 283.38s/it]

[I 2025-01-10 08:56:58,863] Trial 28 finished with value: 8.001778320142417 and parameters: {'lr': 0.0011619270574317214, 'beta1': 0.905, 'beta2': 0.933, 'activation': 'ELU', 'n_linear': 5, 'n_conv': 2, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 11, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 64, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.001050296195310737}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9988 | Valid loss: 8.0032 | Expected time left: 0.20 ss

Best trial: 16. Best value: 7.88937:  31%|███       | 30/98 [1:13:16<3:48:59, 202.05s/it]

[I 2025-01-10 08:57:11,151] Trial 29 finished with value: 8.002505370548793 and parameters: {'lr': 0.004826780544283172, 'beta1': 0.913, 'beta2': 0.979, 'activation': 'Tanh', 'n_linear': 4, 'n_conv': 3, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 15, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 288, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.0023424264934977725}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9990 | Valid loss: 8.0002 | Expected time left: 15.79 ss

Best trial: 16. Best value: 7.88937:  32%|███▏      | 31/98 [1:29:04<7:55:31, 425.85s/it]

[I 2025-01-10 09:12:59,174] Trial 30 finished with value: 8.000217554704198 and parameters: {'lr': 0.00017131527250893023, 'beta1': 0.993, 'beta2': 0.962, 'activation': 'ELU', 'n_linear': 4, 'n_conv': 5, 'conv_filter_type': 'increasing', 'kernel_size': 7, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 192, 'pooling_size': 3, 'global_pool': False, 'conv_bn': False, 'weight_decay': 0.03811251080146861}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9999 | Valid loss: 8.0000 | Expected time left: 0.24 ss

Best trial: 16. Best value: 7.88937:  33%|███▎      | 32/98 [1:29:19<5:32:50, 302.58s/it]

[I 2025-01-10 09:13:14,129] Trial 31 finished with value: 8.000023230910301 and parameters: {'lr': 0.00019945037385407478, 'beta1': 0.922, 'beta2': 0.972, 'activation': 'ReLU', 'n_linear': 3, 'n_conv': 2, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 9, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 320, 'pooling_size': 2, 'global_pool': False, 'conv_bn': False, 'weight_decay': 0.2867448774806263}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 8.0014 | Valid loss: 8.0056 | Expected time left: 22.39 sss

Best trial: 16. Best value: 7.88937:  34%|███▎      | 33/98 [1:51:43<11:06:14, 614.99s/it]

[I 2025-01-10 09:35:38,079] Trial 32 finished with value: 8.004546639256583 and parameters: {'lr': 0.0025970318068077314, 'beta1': 0.915, 'beta2': 0.9570000000000001, 'activation': 'Tanh', 'n_linear': 5, 'n_conv': 3, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 5, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 32, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.000577788934965354}. Best is trial 16 with value: 7.88936815590694.


Best trial: 16. Best value: 7.88937:  35%|███▍      | 34/98 [1:51:44<7:39:30, 430.79s/it] 

[I 2025-01-10 09:35:39,058] Trial 33 finished with value: 13.271478033065796 and parameters: {'lr': 0.006913711177998885, 'beta1': 0.934, 'beta2': 0.934, 'activation': 'ReLU', 'n_linear': 2, 'n_conv': 1, 'conv_filter_type': 'same', 'kernel_size': 9, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 512, 'pooling_size': 2, 'global_pool': True, 'conv_bn': True, 'weight_decay': 0.0014712792880877553}. Best is trial 16 with value: 7.88936815590694.
Epoch 60/60 | Train loss: 7.9791 | Valid loss: 8.0138 | Expected time left: 0.00 s

Best trial: 16. Best value: 7.88937:  36%|███▌      | 35/98 [1:51:56<5:20:22, 305.12s/it]

[I 2025-01-10 09:35:50,971] Trial 34 finished with value: 8.00253146627675 and parameters: {'lr': 0.00013619242560057148, 'beta1': 0.974, 'beta2': 0.92, 'activation': 'ReLU', 'n_linear': 3, 'n_conv': 3, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 11, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 448, 'pooling_size': 3, 'global_pool': True, 'conv_bn': True, 'weight_decay': 1.505582262500961e-07}. Best is trial 16 with value: 7.88936815590694.
Epoch 6/60 | Train loss: 7.9275 | Valid loss: 8.0965 | Expected time left: 8.61 s

Best trial: 16. Best value: 7.88937:  37%|███▋      | 36/98 [1:51:57<3:41:08, 214.01s/it]

[I 2025-01-10 09:35:52,389] Trial 35 finished with value: 8.0222826745775 and parameters: {'lr': 0.00020526041126466539, 'beta1': 0.9710000000000001, 'beta2': 0.9650000000000001, 'activation': 'Tanh', 'n_linear': 2, 'n_conv': 1, 'conv_filter_type': 'increasing', 'kernel_size': 7, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 224, 'pooling_size': 3, 'global_pool': False, 'conv_bn': True, 'weight_decay': 3.967890841276329e-07}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9902 | Valid loss: 8.0261 | Expected time left: 0.23 ss

Best trial: 16. Best value: 7.88937:  38%|███▊      | 37/98 [1:52:11<2:36:36, 154.04s/it]

[I 2025-01-10 09:36:06,487] Trial 36 finished with value: 8.001644477844238 and parameters: {'lr': 0.0002604788766691677, 'beta1': 0.902, 'beta2': 0.918, 'activation': 'ELU', 'n_linear': 3, 'n_conv': 2, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 13, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 416, 'pooling_size': 2, 'global_pool': True, 'conv_bn': True, 'weight_decay': 9.47315255662579e-05}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9965 | Valid loss: 8.0040 | Expected time left: 0.32 ss

Best trial: 16. Best value: 7.88937:  39%|███▉      | 38/98 [1:52:31<1:53:41, 113.69s/it]

[I 2025-01-10 09:36:26,028] Trial 37 finished with value: 8.002133410909902 and parameters: {'lr': 0.0001109018055118723, 'beta1': 0.926, 'beta2': 0.9540000000000001, 'activation': 'Tanh', 'n_linear': 5, 'n_conv': 3, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 17, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 448, 'pooling_size': 2, 'global_pool': False, 'conv_bn': True, 'weight_decay': 0.0015201684512987097}. Best is trial 16 with value: 7.88936815590694.
Epoch 8/60 | Train loss: 7.7228 | Valid loss: 8.0441 | Expected time left: 8.22 s

Best trial: 16. Best value: 7.88937:  40%|███▉      | 39/98 [1:52:33<1:18:45, 80.10s/it] 

[I 2025-01-10 09:36:27,748] Trial 38 finished with value: 8.009104758501053 and parameters: {'lr': 0.001588670096363, 'beta1': 0.966, 'beta2': 0.917, 'activation': 'ReLU', 'n_linear': 2, 'n_conv': 1, 'conv_filter_type': 'increasing', 'kernel_size': 7, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 320, 'pooling_size': 2, 'global_pool': False, 'conv_bn': True, 'weight_decay': 1.6530346055463798e-07}. Best is trial 16 with value: 7.88936815590694.
Epoch 6/60 | Train loss: 7.9126 | Valid loss: 8.1078 | Expected time left: 9.80 ss

Best trial: 16. Best value: 7.88937:  41%|████      | 40/98 [1:52:34<54:36, 56.48s/it]  

[I 2025-01-10 09:36:29,129] Trial 39 finished with value: 8.02285236120224 and parameters: {'lr': 0.0003346016579810972, 'beta1': 0.9590000000000001, 'beta2': 0.905, 'activation': 'Tanh', 'n_linear': 2, 'n_conv': 1, 'conv_filter_type': 'increasing', 'kernel_size': 11, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 320, 'pooling_size': 2, 'global_pool': False, 'conv_bn': False, 'weight_decay': 0.00013244647697007426}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9999 | Valid loss: 8.0000 | Expected time left: 0.94 ss

Best trial: 16. Best value: 7.88937:  42%|████▏     | 41/98 [1:53:31<53:44, 56.57s/it]

[I 2025-01-10 09:37:25,891] Trial 40 finished with value: 7.999977906545003 and parameters: {'lr': 0.00023456642975819766, 'beta1': 0.9540000000000001, 'beta2': 0.987, 'activation': 'SeLU', 'n_linear': 5, 'n_conv': 2, 'conv_filter_type': 'same', 'kernel_size': 17, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 480, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.40725406114436535}. Best is trial 16 with value: 7.88936815590694.


Best trial: 16. Best value: 7.88937:  43%|████▎     | 42/98 [1:53:31<37:04, 39.72s/it]

[I 2025-01-10 09:37:26,285] Trial 41 finished with value: 8.120577121603079 and parameters: {'lr': 0.0008880316269660902, 'beta1': 0.986, 'beta2': 0.984, 'activation': 'SeLU', 'n_linear': 2, 'n_conv': 1, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 15, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 352, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 1.4089185137002015e-07}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 8.0000 | Valid loss: 8.0000 | Expected time left: 1.17 ss

Best trial: 16. Best value: 7.88937:  44%|████▍     | 43/98 [1:54:42<44:51, 48.94s/it]

[I 2025-01-10 09:38:36,750] Trial 42 finished with value: 7.999884749358555 and parameters: {'lr': 0.0056156928410646505, 'beta1': 0.9520000000000001, 'beta2': 0.993, 'activation': 'SeLU', 'n_linear': 3, 'n_conv': 3, 'conv_filter_type': 'same', 'kernel_size': 15, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 192, 'pooling_size': 2, 'global_pool': True, 'conv_bn': True, 'weight_decay': 0.820458706793812}. Best is trial 16 with value: 7.88936815590694.


Best trial: 16. Best value: 7.88937:  45%|████▍     | 44/98 [1:54:47<32:09, 35.74s/it]

[I 2025-01-10 09:38:41,688] Trial 43 finished with value: 8.797144562857492 and parameters: {'lr': 0.0007170447617250585, 'beta1': 0.9380000000000001, 'beta2': 0.9670000000000001, 'activation': 'SeLU', 'n_linear': 3, 'n_conv': 5, 'conv_filter_type': 'increasing', 'kernel_size': 7, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 288, 'pooling_size': 2, 'global_pool': False, 'conv_bn': False, 'weight_decay': 2.824078823025901e-05}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9965 | Valid loss: 8.0029 | Expected time left: 19.15 ss

Best trial: 16. Best value: 7.88937:  46%|████▌     | 45/98 [2:13:56<5:26:42, 369.85s/it]

[I 2025-01-10 09:57:51,144] Trial 44 finished with value: 8.001900067405096 and parameters: {'lr': 0.0001305224908479318, 'beta1': 0.996, 'beta2': 0.988, 'activation': 'SeLU', 'n_linear': 5, 'n_conv': 4, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 7, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 160, 'pooling_size': 2, 'global_pool': False, 'conv_bn': True, 'weight_decay': 0.003860947217420717}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9999 | Valid loss: 8.0000 | Expected time left: 0.80 ss

Best trial: 16. Best value: 7.88937:  47%|████▋     | 46/98 [2:14:44<3:56:54, 273.36s/it]

[I 2025-01-10 09:58:39,351] Trial 45 finished with value: 8.000008419156075 and parameters: {'lr': 0.0012639059001467116, 'beta1': 0.908, 'beta2': 0.9400000000000001, 'activation': 'ELU', 'n_linear': 2, 'n_conv': 2, 'conv_filter_type': 'same', 'kernel_size': 11, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 320, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.11777670681111535}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 8.0014 | Valid loss: 8.0040 | Expected time left: 27.81 sss

Best trial: 16. Best value: 7.88937:  48%|████▊     | 47/98 [2:42:33<9:48:11, 692.00s/it]

[I 2025-01-10 10:26:28,167] Trial 46 finished with value: 8.00290114430193 and parameters: {'lr': 0.0026105135599207145, 'beta1': 0.983, 'beta2': 0.9690000000000001, 'activation': 'ELU', 'n_linear': 5, 'n_conv': 5, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 9, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 32, 'pooling_size': 2, 'global_pool': False, 'conv_bn': True, 'weight_decay': 0.0027418261975217803}. Best is trial 16 with value: 7.88936815590694.


Best trial: 16. Best value: 7.88937:  49%|████▉     | 48/98 [2:42:34<6:43:49, 484.60s/it]

[I 2025-01-10 10:26:28,833] Trial 47 finished with value: 8.155498892068863 and parameters: {'lr': 0.0012021834902045255, 'beta1': 0.977, 'beta2': 0.91, 'activation': 'ELU', 'n_linear': 4, 'n_conv': 5, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 5, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 320, 'pooling_size': 2, 'global_pool': False, 'conv_bn': True, 'weight_decay': 8.139611578924267e-07}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9991 | Valid loss: 8.0069 | Expected time left: 0.25 ss

Best trial: 16. Best value: 7.88937:  50%|█████     | 49/98 [2:42:49<4:40:42, 343.73s/it]

[I 2025-01-10 10:26:43,879] Trial 48 finished with value: 8.004576962569665 and parameters: {'lr': 0.003284932060640571, 'beta1': 0.902, 'beta2': 0.902, 'activation': 'ELU', 'n_linear': 3, 'n_conv': 2, 'conv_filter_type': 'increasing', 'kernel_size': 7, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 352, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 8.67143990097916e-05}. Best is trial 16 with value: 7.88936815590694.


Best trial: 16. Best value: 7.88937:  51%|█████     | 50/98 [2:42:49<3:12:39, 240.83s/it]

[I 2025-01-10 10:26:44,606] Trial 49 finished with value: 8.608999086462934 and parameters: {'lr': 0.0018925697547456676, 'beta1': 0.9390000000000001, 'beta2': 0.927, 'activation': 'ReLU', 'n_linear': 2, 'n_conv': 1, 'conv_filter_type': 'same', 'kernel_size': 11, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 448, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.0020727751895550602}. Best is trial 16 with value: 7.88936815590694.


Best trial: 16. Best value: 7.88937:  52%|█████▏    | 51/98 [2:42:51<2:12:20, 168.94s/it]

[I 2025-01-10 10:26:45,808] Trial 50 finished with value: 8.648247748613358 and parameters: {'lr': 0.003918091572232347, 'beta1': 0.9480000000000001, 'beta2': 0.911, 'activation': 'SeLU', 'n_linear': 3, 'n_conv': 1, 'conv_filter_type': 'same', 'kernel_size': 9, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 320, 'pooling_size': 2, 'global_pool': False, 'conv_bn': True, 'weight_decay': 0.00023096630725796095}. Best is trial 16 with value: 7.88936815590694.
Epoch 60/60 | Train loss: 7.9975 | Valid loss: 8.0010 | Expected time left: 0.00 s

Best trial: 16. Best value: 7.88937:  53%|█████▎    | 52/98 [2:42:56<1:31:58, 119.97s/it]

[I 2025-01-10 10:26:51,519] Trial 51 finished with value: 8.000118697131121 and parameters: {'lr': 0.00016212887039362508, 'beta1': 0.924, 'beta2': 0.998, 'activation': 'Tanh', 'n_linear': 4, 'n_conv': 1, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 9, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 384, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.008817187685200928}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 8.0000 | Valid loss: 8.0000 | Expected time left: 0.87 ss

Best trial: 16. Best value: 7.88937:  54%|█████▍    | 53/98 [2:43:49<1:14:44, 99.67s/it] 

[I 2025-01-10 10:27:43,802] Trial 52 finished with value: 7.999982159608488 and parameters: {'lr': 0.00011565709762960558, 'beta1': 0.993, 'beta2': 0.905, 'activation': 'ELU', 'n_linear': 5, 'n_conv': 2, 'conv_filter_type': 'same', 'kernel_size': 11, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 64, 'pooling_size': 2, 'global_pool': True, 'conv_bn': True, 'weight_decay': 0.6777301255333951}. Best is trial 16 with value: 7.88936815590694.
Epoch 58/60 | Train loss: 7.9770 | Valid loss: 8.0223 | Expected time left: 0.32 s

Best trial: 16. Best value: 7.88937:  55%|█████▌    | 54/98 [2:43:59<53:22, 72.78s/it]  

[I 2025-01-10 10:27:53,839] Trial 53 finished with value: 8.00272219269364 and parameters: {'lr': 0.00047183655415103913, 'beta1': 0.918, 'beta2': 0.978, 'activation': 'Tanh', 'n_linear': 2, 'n_conv': 5, 'conv_filter_type': 'increasing', 'kernel_size': 13, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 384, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 1.9088172008612072e-07}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 8.0526 | Valid loss: 8.0675 | Expected time left: 0.51 ss

Best trial: 16. Best value: 7.88937:  56%|█████▌    | 55/98 [2:44:30<43:12, 60.30s/it]

[I 2025-01-10 10:28:25,012] Trial 54 finished with value: 8.028789220537458 and parameters: {'lr': 0.00542079510624578, 'beta1': 0.9570000000000001, 'beta2': 0.9430000000000001, 'activation': 'Tanh', 'n_linear': 3, 'n_conv': 2, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 13, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 96, 'pooling_size': 2, 'global_pool': True, 'conv_bn': True, 'weight_decay': 5.37795941926493e-06}. Best is trial 16 with value: 7.88936815590694.
Epoch 59/60 | Train loss: 7.9996 | Valid loss: 8.0015 | Expected time left: 0.78 ss

Best trial: 16. Best value: 7.88937:  57%|█████▋    | 56/98 [2:45:17<39:23, 56.27s/it]

[I 2025-01-10 10:29:11,887] Trial 55 finished with value: 8.000954532623291 and parameters: {'lr': 0.005514419516648056, 'beta1': 0.9440000000000001, 'beta2': 0.9510000000000001, 'activation': 'SeLU', 'n_linear': 5, 'n_conv': 2, 'conv_filter_type': 'same', 'kernel_size': 5, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 288, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.008860138651974053}. Best is trial 16 with value: 7.88936815590694.
Epoch 60/60 | Train loss: 8.0000 | Valid loss: 8.0000 | Expected time left: 0.00 s

Best trial: 16. Best value: 7.88937:  58%|█████▊    | 57/98 [2:45:28<29:11, 42.71s/it]

[I 2025-01-10 10:29:22,953] Trial 56 finished with value: 7.99997445344925 and parameters: {'lr': 0.0002158160878940562, 'beta1': 0.916, 'beta2': 0.903, 'activation': 'Tanh', 'n_linear': 5, 'n_conv': 3, 'conv_filter_type': 'increasing', 'kernel_size': 7, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 256, 'pooling_size': 3, 'global_pool': False, 'conv_bn': False, 'weight_decay': 0.8061407233479758}. Best is trial 16 with value: 7.88936815590694.


Best trial: 16. Best value: 7.88937:  59%|█████▉    | 58/98 [2:45:29<20:08, 30.21s/it]

[I 2025-01-10 10:29:23,990] Trial 57 finished with value: 8.760146331787109 and parameters: {'lr': 0.00851603412148059, 'beta1': 0.9, 'beta2': 0.995, 'activation': 'SeLU', 'n_linear': 3, 'n_conv': 4, 'conv_filter_type': 'same', 'kernel_size': 13, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 256, 'pooling_size': 3, 'global_pool': False, 'conv_bn': False, 'weight_decay': 0.0060636789378003585}. Best is trial 16 with value: 7.88936815590694.
Epoch 12/60 | Train loss: 7.6311 | Valid loss: 7.8514 | Expected time left: 15.86 s

Best trial: 58. Best value: 7.85141:  60%|██████    | 59/98 [2:45:33<14:38, 22.52s/it]

[I 2025-01-10 10:29:28,581] Trial 58 finished with value: 7.85141080358754 and parameters: {'lr': 0.0009870735806235635, 'beta1': 0.9610000000000001, 'beta2': 0.986, 'activation': 'ELU', 'n_linear': 4, 'n_conv': 2, 'conv_filter_type': 'increasing', 'kernel_size': 5, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 448, 'pooling_size': 3, 'global_pool': False, 'conv_bn': True, 'weight_decay': 1.946400156950539e-05}. Best is trial 58 with value: 7.85141080358754.
Epoch 59/60 | Train loss: 8.0000 | Valid loss: 8.0001 | Expected time left: 0.74 ss

Best trial: 58. Best value: 7.85141:  61%|██████    | 60/98 [2:46:18<18:28, 29.17s/it]

[I 2025-01-10 10:30:13,253] Trial 59 finished with value: 7.999882667687289 and parameters: {'lr': 0.002163648814722611, 'beta1': 0.98, 'beta2': 0.976, 'activation': 'ELU', 'n_linear': 4, 'n_conv': 1, 'conv_filter_type': 'same', 'kernel_size': 13, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 64, 'pooling_size': 3, 'global_pool': True, 'conv_bn': True, 'weight_decay': 0.11049319888520401}. Best is trial 58 with value: 7.85141080358754.
Epoch 59/60 | Train loss: 7.9968 | Valid loss: 8.0056 | Expected time left: 0.45 ss

Best trial: 58. Best value: 7.85141:  62%|██████▏   | 61/98 [2:46:45<17:36, 28.55s/it]

[I 2025-01-10 10:30:40,377] Trial 60 finished with value: 8.002331709559959 and parameters: {'lr': 0.0010918514879245187, 'beta1': 0.9470000000000001, 'beta2': 0.902, 'activation': 'Tanh', 'n_linear': 4, 'n_conv': 4, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 17, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 128, 'pooling_size': 3, 'global_pool': False, 'conv_bn': True, 'weight_decay': 0.00025182701305725557}. Best is trial 58 with value: 7.85141080358754.
Epoch 59/60 | Train loss: 7.9965 | Valid loss: 8.0031 | Expected time left: 0.28 ss

Best trial: 58. Best value: 7.85141:  63%|██████▎   | 62/98 [2:47:02<15:01, 25.04s/it]

[I 2025-01-10 10:30:57,209] Trial 61 finished with value: 8.001070151253352 and parameters: {'lr': 0.0003262519645131201, 'beta1': 0.928, 'beta2': 0.93, 'activation': 'ReLU', 'n_linear': 4, 'n_conv': 2, 'conv_filter_type': 'increasing', 'kernel_size': 13, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 160, 'pooling_size': 3, 'global_pool': False, 'conv_bn': True, 'weight_decay': 0.00270432964635403}. Best is trial 58 with value: 7.85141080358754.
Epoch 3/60 | Train loss: 7.9968 | Valid loss: 8.0349 | Expected time left: 17.15 s

Best trial: 58. Best value: 7.85141:  64%|██████▍   | 63/98 [2:47:04<10:29, 17.98s/it]

[I 2025-01-10 10:30:58,708] Trial 62 finished with value: 8.00578011572361 and parameters: {'lr': 0.007994783075403888, 'beta1': 0.96, 'beta2': 0.981, 'activation': 'ReLU', 'n_linear': 3, 'n_conv': 5, 'conv_filter_type': 'increasing', 'kernel_size': 15, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 320, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 3.2898689667173864e-06}. Best is trial 58 with value: 7.85141080358754.


Best trial: 58. Best value: 7.85141:  65%|██████▌   | 64/98 [2:47:05<07:19, 12.91s/it]

[I 2025-01-10 10:30:59,810] Trial 63 finished with value: 8.356313468544347 and parameters: {'lr': 0.00015539216267650773, 'beta1': 0.902, 'beta2': 0.964, 'activation': 'ReLU', 'n_linear': 4, 'n_conv': 3, 'conv_filter_type': 'increasing', 'kernel_size': 9, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 64, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.18169213268243448}. Best is trial 58 with value: 7.85141080358754.
Epoch 4/60 | Train loss: 7.9959 | Valid loss: 8.1921 | Expected time left: 10.37 s

Best trial: 58. Best value: 7.85141:  66%|██████▋   | 65/98 [2:47:06<05:08,  9.35s/it]

[I 2025-01-10 10:31:00,850] Trial 64 finished with value: 8.07363474709647 and parameters: {'lr': 0.00024195502685594222, 'beta1': 0.932, 'beta2': 0.922, 'activation': 'ELU', 'n_linear': 5, 'n_conv': 2, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 11, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 288, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 1.6870214187220567e-06}. Best is trial 58 with value: 7.85141080358754.
Epoch 59/60 | Train loss: 7.9985 | Valid loss: 8.0012 | Expected time left: 4.08 sss

Best trial: 58. Best value: 7.85141:  67%|██████▋   | 66/98 [2:51:11<42:44, 80.15s/it]

[I 2025-01-10 10:35:06,205] Trial 65 finished with value: 8.001112565398216 and parameters: {'lr': 0.0023988869486893456, 'beta1': 0.92, 'beta2': 0.9530000000000001, 'activation': 'ELU', 'n_linear': 4, 'n_conv': 4, 'conv_filter_type': 'increasing', 'kernel_size': 7, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 320, 'pooling_size': 2, 'global_pool': True, 'conv_bn': True, 'weight_decay': 0.00914561434983535}. Best is trial 58 with value: 7.85141080358754.
Epoch 59/60 | Train loss: 7.9990 | Valid loss: 8.0003 | Expected time left: 0.65 ss

Best trial: 58. Best value: 7.85141:  68%|██████▊   | 67/98 [2:51:50<35:06, 67.94s/it]

[I 2025-01-10 10:35:45,641] Trial 66 finished with value: 8.00029515005221 and parameters: {'lr': 0.00011811927184193546, 'beta1': 0.93, 'beta2': 0.926, 'activation': 'ELU', 'n_linear': 3, 'n_conv': 2, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 17, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 64, 'pooling_size': 3, 'global_pool': False, 'conv_bn': True, 'weight_decay': 0.03390104254961122}. Best is trial 58 with value: 7.85141080358754.
Epoch 4/60 | Train loss: 8.0322 | Valid loss: 8.0863 | Expected time left: 63.01 s

Best trial: 58. Best value: 7.85141:  69%|██████▉   | 68/98 [2:51:56<24:40, 49.34s/it]

[I 2025-01-10 10:35:51,574] Trial 67 finished with value: 8.051084918067568 and parameters: {'lr': 0.00040677876945069023, 'beta1': 0.908, 'beta2': 0.9400000000000001, 'activation': 'Tanh', 'n_linear': 4, 'n_conv': 5, 'conv_filter_type': 'same', 'kernel_size': 11, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 96, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.0007025977444867789}. Best is trial 58 with value: 7.85141080358754.
Epoch 59/60 | Train loss: 7.9995 | Valid loss: 8.0020 | Expected time left: 0.27 ss

Best trial: 58. Best value: 7.85141:  70%|███████   | 69/98 [2:52:13<19:04, 39.46s/it]

[I 2025-01-10 10:36:07,973] Trial 68 finished with value: 8.001217865943909 and parameters: {'lr': 0.008543667186048135, 'beta1': 0.9430000000000001, 'beta2': 0.931, 'activation': 'Tanh', 'n_linear': 2, 'n_conv': 4, 'conv_filter_type': 'increasing', 'kernel_size': 15, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 512, 'pooling_size': 2, 'global_pool': True, 'conv_bn': True, 'weight_decay': 0.007000505350271644}. Best is trial 58 with value: 7.85141080358754.
Epoch 59/60 | Train loss: 7.9964 | Valid loss: 8.0055 | Expected time left: 1.24 ss

Best trial: 58. Best value: 7.85141:  71%|███████▏  | 70/98 [2:53:28<23:22, 50.10s/it]

[I 2025-01-10 10:37:22,910] Trial 69 finished with value: 8.001100654419238 and parameters: {'lr': 0.00013322434578497613, 'beta1': 0.9500000000000001, 'beta2': 0.999, 'activation': 'ReLU', 'n_linear': 4, 'n_conv': 3, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 7, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 32, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 9.115236477925342e-07}. Best is trial 58 with value: 7.85141080358754.


Best trial: 58. Best value: 7.85141:  72%|███████▏  | 71/98 [2:53:29<15:55, 35.38s/it]

[I 2025-01-10 10:37:23,956] Trial 70 finished with value: 8.367652066548665 and parameters: {'lr': 0.0011765767520027447, 'beta1': 0.921, 'beta2': 0.901, 'activation': 'SeLU', 'n_linear': 3, 'n_conv': 1, 'conv_filter_type': 'same', 'kernel_size': 5, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 224, 'pooling_size': 2, 'global_pool': False, 'conv_bn': False, 'weight_decay': 0.03223470527632585}. Best is trial 58 with value: 7.85141080358754.
Epoch 10/60 | Train loss: 7.8793 | Valid loss: 8.0951 | Expected time left: 17.75 s

Best trial: 58. Best value: 7.85141:  73%|███████▎  | 72/98 [2:53:33<11:16, 26.04s/it]

[I 2025-01-10 10:37:28,181] Trial 71 finished with value: 8.024554565035064 and parameters: {'lr': 0.00042737960318793035, 'beta1': 0.968, 'beta2': 0.9430000000000001, 'activation': 'Tanh', 'n_linear': 3, 'n_conv': 4, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 13, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 352, 'pooling_size': 2, 'global_pool': False, 'conv_bn': True, 'weight_decay': 1.2155122124926735e-07}. Best is trial 58 with value: 7.85141080358754.


Best trial: 58. Best value: 7.85141:  74%|███████▍  | 73/98 [2:53:34<07:40, 18.44s/it]

[I 2025-01-10 10:37:28,892] Trial 72 finished with value: 8.305105298757553 and parameters: {'lr': 0.008184700585797086, 'beta1': 0.973, 'beta2': 0.935, 'activation': 'ELU', 'n_linear': 2, 'n_conv': 1, 'conv_filter_type': 'same', 'kernel_size': 7, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 320, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.003216101708708016}. Best is trial 58 with value: 7.85141080358754.
Epoch 59/60 | Train loss: 7.9970 | Valid loss: 8.0029 | Expected time left: 4.48 sss

Best trial: 58. Best value: 7.85141:  76%|███████▌  | 74/98 [2:58:03<37:28, 93.68s/it]

[I 2025-01-10 10:41:58,145] Trial 73 finished with value: 8.000965878367424 and parameters: {'lr': 0.0011497522456628113, 'beta1': 0.904, 'beta2': 0.996, 'activation': 'ELU', 'n_linear': 4, 'n_conv': 4, 'conv_filter_type': 'increasing', 'kernel_size': 9, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 320, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.0033164670634716646}. Best is trial 58 with value: 7.85141080358754.
Epoch 59/60 | Train loss: 7.9982 | Valid loss: 8.0009 | Expected time left: 17.09 ss

Best trial: 58. Best value: 7.85141:  77%|███████▋  | 75/98 [3:15:09<2:23:05, 373.26s/it]

[I 2025-01-10 10:59:03,755] Trial 74 finished with value: 8.000764665149507 and parameters: {'lr': 0.0003730899740186701, 'beta1': 0.981, 'beta2': 0.986, 'activation': 'SeLU', 'n_linear': 3, 'n_conv': 5, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 7, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 96, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.013477545707570898}. Best is trial 58 with value: 7.85141080358754.
Epoch 59/60 | Train loss: 7.9994 | Valid loss: 8.0004 | Expected time left: 16.41 ss

Best trial: 58. Best value: 7.85141:  78%|███████▊  | 76/98 [3:31:34<3:24:10, 556.84s/it]

[I 2025-01-10 11:15:28,936] Trial 75 finished with value: 8.000134486186353 and parameters: {'lr': 0.0012147150462740695, 'beta1': 0.9590000000000001, 'beta2': 0.9500000000000001, 'activation': 'Tanh', 'n_linear': 4, 'n_conv': 4, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 7, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 128, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.03065790621888019}. Best is trial 58 with value: 7.85141080358754.
Epoch 59/60 | Train loss: 7.9996 | Valid loss: 8.0003 | Expected time left: 1.03 ss

Best trial: 58. Best value: 7.85141:  79%|███████▊  | 77/98 [3:32:36<2:22:58, 408.49s/it]

[I 2025-01-10 11:16:31,275] Trial 76 finished with value: 8.000206777784559 and parameters: {'lr': 0.0012836689259299004, 'beta1': 0.9, 'beta2': 0.976, 'activation': 'Tanh', 'n_linear': 3, 'n_conv': 2, 'conv_filter_type': 'same', 'kernel_size': 17, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 224, 'pooling_size': 2, 'global_pool': True, 'conv_bn': True, 'weight_decay': 0.03625611944334442}. Best is trial 58 with value: 7.85141080358754.
Epoch 59/60 | Train loss: 7.9962 | Valid loss: 8.0046 | Expected time left: 0.21 ss

Best trial: 58. Best value: 7.85141:  80%|███████▉  | 78/98 [3:32:49<1:36:37, 289.89s/it]

[I 2025-01-10 11:16:44,431] Trial 77 finished with value: 8.000778639758074 and parameters: {'lr': 0.0001534550834241584, 'beta1': 0.9580000000000001, 'beta2': 0.9480000000000001, 'activation': 'ReLU', 'n_linear': 4, 'n_conv': 4, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 15, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 384, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.00010917810781903589}. Best is trial 58 with value: 7.85141080358754.
Epoch 59/60 | Train loss: 7.5988 | Valid loss: 7.8346 | Expected time left: 0.28 ss

Best trial: 78. Best value: 7.82701:  81%|████████  | 79/98 [3:33:07<1:05:54, 208.14s/it]

[I 2025-01-10 11:17:01,827] Trial 78 finished with value: 7.827013240670258 and parameters: {'lr': 0.0001503837323673155, 'beta1': 0.922, 'beta2': 0.9590000000000001, 'activation': 'SeLU', 'n_linear': 3, 'n_conv': 4, 'conv_filter_type': 'increasing', 'kernel_size': 15, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 192, 'pooling_size': 2, 'global_pool': False, 'conv_bn': False, 'weight_decay': 1.2333292035495694e-05}. Best is trial 78 with value: 7.827013240670258.
Epoch 59/60 | Train loss: 8.0028 | Valid loss: 8.0097 | Expected time left: 0.80 ss

Best trial: 78. Best value: 7.82701:  82%|████████▏ | 80/98 [3:33:55<48:03, 160.22s/it]  

[I 2025-01-10 11:17:50,231] Trial 79 finished with value: 8.005239695594424 and parameters: {'lr': 0.007609541506101938, 'beta1': 0.925, 'beta2': 0.9420000000000001, 'activation': 'ReLU', 'n_linear': 3, 'n_conv': 3, 'conv_filter_type': 'same', 'kernel_size': 11, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 96, 'pooling_size': 3, 'global_pool': False, 'conv_bn': True, 'weight_decay': 9.447339508495081e-06}. Best is trial 78 with value: 7.827013240670258.
Epoch 59/60 | Train loss: 7.9966 | Valid loss: 8.0060 | Expected time left: 1.28 ss

Best trial: 78. Best value: 7.82701:  83%|████████▎ | 81/98 [3:35:12<38:18, 135.21s/it]

[I 2025-01-10 11:19:07,090] Trial 80 finished with value: 8.001829969997223 and parameters: {'lr': 0.0003337423867546693, 'beta1': 0.924, 'beta2': 0.99, 'activation': 'ELU', 'n_linear': 5, 'n_conv': 1, 'conv_filter_type': 'increasing', 'kernel_size': 17, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 32, 'pooling_size': 3, 'global_pool': True, 'conv_bn': True, 'weight_decay': 8.541875427902002e-05}. Best is trial 78 with value: 7.827013240670258.
Epoch 20/60 | Train loss: 7.7405 | Valid loss: 7.8005 | Expected time left: 42.89 s

Best trial: 81. Best value: 7.80051:  84%|████████▎ | 82/98 [3:35:35<27:04, 101.51s/it]

[I 2025-01-10 11:19:29,964] Trial 81 finished with value: 7.800514224228586 and parameters: {'lr': 0.0003313718267582067, 'beta1': 0.973, 'beta2': 0.998, 'activation': 'SeLU', 'n_linear': 3, 'n_conv': 5, 'conv_filter_type': 'increasing', 'kernel_size': 7, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 64, 'pooling_size': 3, 'global_pool': True, 'conv_bn': True, 'weight_decay': 3.733167017864098e-07}. Best is trial 81 with value: 7.800514224228586.
Epoch 59/60 | Train loss: 7.9999 | Valid loss: 8.0001 | Expected time left: 0.46 ss

Best trial: 81. Best value: 7.80051:  85%|████████▍ | 83/98 [3:36:03<19:51, 79.46s/it] 

[I 2025-01-10 11:19:57,978] Trial 82 finished with value: 7.999933442615327 and parameters: {'lr': 0.0008252799289286202, 'beta1': 0.9480000000000001, 'beta2': 0.916, 'activation': 'ReLU', 'n_linear': 3, 'n_conv': 4, 'conv_filter_type': 'increasing', 'kernel_size': 15, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 96, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.1435730692493399}. Best is trial 81 with value: 7.800514224228586.
Epoch 1/60 | Train loss: 8.0724 | Valid loss: 8.0442 | Expected time left: 275.18 s

Best trial: 81. Best value: 7.80051:  86%|████████▌ | 84/98 [3:36:12<13:39, 58.51s/it]

[I 2025-01-10 11:20:07,601] Trial 83 finished with value: 8.044202859061105 and parameters: {'lr': 0.005510066624168155, 'beta1': 0.923, 'beta2': 0.9450000000000001, 'activation': 'ReLU', 'n_linear': 3, 'n_conv': 5, 'conv_filter_type': 'increasing', 'kernel_size': 13, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 288, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 5.912801625145088e-05}. Best is trial 81 with value: 7.800514224228586.
Epoch 59/60 | Train loss: 8.0000 | Valid loss: 7.9998 | Expected time left: 13.91 ss

Best trial: 81. Best value: 7.80051:  87%|████████▋ | 85/98 [3:50:08<1:03:09, 291.51s/it]

[I 2025-01-10 11:34:02,769] Trial 84 finished with value: 7.999841243598111 and parameters: {'lr': 0.006763634594859098, 'beta1': 0.9530000000000001, 'beta2': 0.915, 'activation': 'Tanh', 'n_linear': 5, 'n_conv': 2, 'conv_filter_type': 'increasing', 'kernel_size': 7, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 64, 'pooling_size': 3, 'global_pool': True, 'conv_bn': True, 'weight_decay': 0.7740557360125179}. Best is trial 81 with value: 7.800514224228586.
Epoch 59/60 | Train loss: 7.9999 | Valid loss: 8.0003 | Expected time left: 0.29 ss

Best trial: 81. Best value: 7.80051:  88%|████████▊ | 86/98 [3:50:25<41:52, 209.38s/it]  

[I 2025-01-10 11:34:20,530] Trial 85 finished with value: 7.999952307287252 and parameters: {'lr': 0.00216690562826753, 'beta1': 0.9630000000000001, 'beta2': 0.916, 'activation': 'ReLU', 'n_linear': 4, 'n_conv': 3, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 15, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 192, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.06276893601434352}. Best is trial 81 with value: 7.800514224228586.


Best trial: 81. Best value: 7.80051:  89%|████████▉ | 87/98 [3:50:26<26:54, 146.75s/it]

[I 2025-01-10 11:34:21,139] Trial 86 finished with value: 14.69127579393058 and parameters: {'lr': 0.00046818349005472895, 'beta1': 0.917, 'beta2': 0.9710000000000001, 'activation': 'ReLU', 'n_linear': 3, 'n_conv': 3, 'conv_filter_type': 'increasing', 'kernel_size': 9, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 352, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 3.892753457985506e-06}. Best is trial 81 with value: 7.800514224228586.
Epoch 2/60 | Train loss: 8.0017 | Valid loss: 8.0098 | Expected time left: 23.66 s

Best trial: 81. Best value: 7.80051:  90%|████████▉ | 88/98 [3:50:27<17:11, 103.19s/it]

[I 2025-01-10 11:34:22,673] Trial 87 finished with value: 8.009773009163993 and parameters: {'lr': 0.004721524531559767, 'beta1': 0.927, 'beta2': 0.964, 'activation': 'ReLU', 'n_linear': 2, 'n_conv': 2, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 15, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 288, 'pooling_size': 2, 'global_pool': False, 'conv_bn': True, 'weight_decay': 4.6004873705119424e-05}. Best is trial 81 with value: 7.800514224228586.


Best trial: 81. Best value: 7.80051:  91%|█████████ | 89/98 [3:50:28<10:51, 72.40s/it] 

[I 2025-01-10 11:34:23,237] Trial 88 finished with value: 8.111591256183127 and parameters: {'lr': 0.008986708545984485, 'beta1': 0.911, 'beta2': 0.9420000000000001, 'activation': 'ELU', 'n_linear': 5, 'n_conv': 2, 'conv_filter_type': 'increasing', 'kernel_size': 5, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': True, 'batch_size': 448, 'pooling_size': 2, 'global_pool': True, 'conv_bn': False, 'weight_decay': 1.2213165829005455e-07}. Best is trial 81 with value: 7.800514224228586.
Epoch 49/60 | Train loss: 7.9667 | Valid loss: 8.0154 | Expected time left: 11.45 s

Best trial: 81. Best value: 7.80051:  92%|█████████▏| 90/98 [3:51:20<08:51, 66.39s/it]

[I 2025-01-10 11:35:15,592] Trial 89 finished with value: 8.003788471221924 and parameters: {'lr': 0.0008660359710348397, 'beta1': 0.93, 'beta2': 0.9590000000000001, 'activation': 'ELU', 'n_linear': 5, 'n_conv': 5, 'conv_filter_type': 'same', 'kernel_size': 7, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 512, 'pooling_size': 3, 'global_pool': True, 'conv_bn': True, 'weight_decay': 6.375745593554727e-07}. Best is trial 81 with value: 7.800514224228586.
Epoch 59/60 | Train loss: 8.0000 | Valid loss: 8.0000 | Expected time left: 0.26 ss

Best trial: 81. Best value: 7.80051:  93%|█████████▎| 91/98 [3:51:36<05:58, 51.20s/it]

[I 2025-01-10 11:35:31,350] Trial 90 finished with value: 7.999984952467907 and parameters: {'lr': 0.0001540687537637355, 'beta1': 0.99, 'beta2': 0.966, 'activation': 'SeLU', 'n_linear': 3, 'n_conv': 2, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 17, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 128, 'pooling_size': 3, 'global_pool': False, 'conv_bn': False, 'weight_decay': 0.45052341569107734}. Best is trial 81 with value: 7.800514224228586.
Epoch 15/60 | Train loss: 7.2332 | Valid loss: 8.0063 | Expected time left: 31.69 s

Best trial: 81. Best value: 7.80051:  94%|█████████▍| 92/98 [3:51:48<03:55, 39.31s/it]

[I 2025-01-10 11:35:42,926] Trial 91 finished with value: 7.92415531703404 and parameters: {'lr': 0.0015896522646832167, 'beta1': 0.974, 'beta2': 0.9500000000000001, 'activation': 'ReLU', 'n_linear': 3, 'n_conv': 1, 'conv_filter_type': 'same', 'kernel_size': 9, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 288, 'pooling_size': 2, 'global_pool': False, 'conv_bn': False, 'weight_decay': 2.3551823898758735e-05}. Best is trial 81 with value: 7.800514224228586.
Epoch 59/60 | Train loss: 8.0000 | Valid loss: 8.0006 | Expected time left: 0.63 ss

Best trial: 81. Best value: 7.80051:  95%|█████████▍| 93/98 [3:52:26<03:14, 38.91s/it]

[I 2025-01-10 11:36:20,911] Trial 92 finished with value: 7.999896091885037 and parameters: {'lr': 0.004028759653542734, 'beta1': 0.9570000000000001, 'beta2': 0.9510000000000001, 'activation': 'SeLU', 'n_linear': 4, 'n_conv': 1, 'conv_filter_type': 'same', 'kernel_size': 11, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 224, 'pooling_size': 2, 'global_pool': True, 'conv_bn': True, 'weight_decay': 0.0584161175523332}. Best is trial 81 with value: 7.800514224228586.


Best trial: 81. Best value: 7.80051:  96%|█████████▌| 94/98 [3:52:27<01:50, 27.53s/it]

[I 2025-01-10 11:36:21,884] Trial 93 finished with value: 1148216925441.364 and parameters: {'lr': 0.008130570557008924, 'beta1': 0.982, 'beta2': 0.9, 'activation': 'ReLU', 'n_linear': 4, 'n_conv': 4, 'conv_filter_type': 'same', 'kernel_size': 9, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 192, 'pooling_size': 3, 'global_pool': False, 'conv_bn': False, 'weight_decay': 1.1773844296998151e-07}. Best is trial 81 with value: 7.800514224228586.
Epoch 59/60 | Train loss: 7.9976 | Valid loss: 8.0019 | Expected time left: 0.89 ss

Best trial: 81. Best value: 7.80051:  97%|█████████▋| 95/98 [3:53:21<01:46, 35.41s/it]

[I 2025-01-10 11:37:15,688] Trial 94 finished with value: 8.000858117949289 and parameters: {'lr': 0.001871509744914136, 'beta1': 0.991, 'beta2': 0.9610000000000001, 'activation': 'Tanh', 'n_linear': 2, 'n_conv': 3, 'conv_filter_type': 'same', 'kernel_size': 9, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 192, 'pooling_size': 3, 'global_pool': False, 'conv_bn': True, 'weight_decay': 0.005137671789863878}. Best is trial 81 with value: 7.800514224228586.
Epoch 59/60 | Train loss: 7.9966 | Valid loss: 8.0029 | Expected time left: 17.04 ss

Best trial: 81. Best value: 7.80051:  98%|█████████▊| 96/98 [4:10:23<11:03, 331.68s/it]

[I 2025-01-10 11:54:18,674] Trial 95 finished with value: 8.001585547129313 and parameters: {'lr': 0.0003554367658035554, 'beta1': 0.93, 'beta2': 0.978, 'activation': 'Tanh', 'n_linear': 5, 'n_conv': 3, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 13, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': True, 'dense_bn': False, 'batch_size': 96, 'pooling_size': 3, 'global_pool': True, 'conv_bn': False, 'weight_decay': 0.0028728716403281537}. Best is trial 81 with value: 7.800514224228586.
Epoch 43/60 | Train loss: 7.7395 | Valid loss: 8.0333 | Expected time left: 33.40 ss

Best trial: 81. Best value: 7.80051:  99%|█████████▉| 97/98 [4:11:50<04:18, 258.19s/it]

[I 2025-01-10 11:55:45,370] Trial 96 finished with value: 7.870500777856991 and parameters: {'lr': 0.00047996363429746457, 'beta1': 0.907, 'beta2': 0.9400000000000001, 'activation': 'SeLU', 'n_linear': 2, 'n_conv': 5, 'conv_filter_type': 'increasing_clipped', 'kernel_size': 11, 'conv_filter_size': 10, 'linear_size': 256, 'input_bn': False, 'dense_bn': True, 'batch_size': 32, 'pooling_size': 2, 'global_pool': True, 'conv_bn': True, 'weight_decay': 1.0585549414012343e-05}. Best is trial 81 with value: 7.800514224228586.
Epoch 14/60 | Train loss: 8.0204 | Valid loss: 8.0300 | Expected time left: 15.58 s

Best trial: 81. Best value: 7.80051: 100%|██████████| 98/98 [4:11:56<00:00, 154.25s/it]

[I 2025-01-10 11:55:50,754] Trial 97 finished with value: 8.007002706113068 and parameters: {'lr': 0.0063119909283947005, 'beta1': 0.911, 'beta2': 0.995, 'activation': 'SeLU', 'n_linear': 5, 'n_conv': 2, 'conv_filter_type': 'increasing', 'kernel_size': 9, 'conv_filter_size': 20, 'linear_size': 256, 'input_bn': False, 'dense_bn': False, 'batch_size': 448, 'pooling_size': 2, 'global_pool': True, 'conv_bn': True, 'weight_decay': 2.4953751842497307e-05}. Best is trial 81 with value: 7.800514224228586.





In [None]:
def func(c):
    return (np.exp(2*c)+np.exp(2))/(np.exp(2*c+1)+np.exp(2))