In [1]:
## Standard libraries
import os
import numpy as np
import math
import json
from functools import partial

import random as rd

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import GPUtil
import torch
import torch.nn as nn
import sys
sys.path.append('../model')
from utils import amino_acid_to_number, tokenize

device = "cuda:0"

import sys
sys.path.append('../model')
from functions import get_A2N_list, tokenize, make_train_val_test_lists_rand, prepare_data
from models import ProtDataset

outpath = "../output/"

In [2]:
# os.makedirs(outpath + study_id + "_rep_" + str(0))

In [3]:
data_name = "Faure2023_1_lenient"
train_percent = 20

In [4]:
# make folder for storing analysis outputs

study_id = "_".join([data_name, str(train_percent) + "%"])

matching_folders = [folder for folder in os.listdir(outpath) if study_id in folder and os.path.isdir(os.path.join(outpath, folder)) ]

if len(matching_folders) == 0:
    rep = 0
else: rep = np.max([int(folder.split("_")[-1]) for folder in matching_folders]) + 1

results_path = outpath + "_".join([study_id, "rep", str(rep)])
os.makedirs(results_path)

In [5]:
R2s = pd.DataFrame(columns=['Model', 'R2'])
R2s.to_csv(os.path.join(results_path, 'R2s.csv'), index=False)

### Read in data

In [6]:
in_path = "../Data/Data_prepared/" + data_name + ".csv"
datafile = pd.read_csv(in_path, index_col=None)

In [7]:
phenotypes, seqs, seqs1h = prepare_data(datafile)

  seqs = seqs[:, sites_var]


In [8]:
_, L, AA_size = seqs1h.shape
print(f"sequence length = {L}; ", f"AA_size = {AA_size}")

sequence length = 34;  AA_size = 2


In [9]:
num_train = int(.01*train_percent*len(datafile))
num_test = 2000
train_list, val_list, test_list = make_train_val_test_lists_rand(datafile, num_train, num_test)    
print(num_train)

25864


### Linear model

In [23]:
model_name = "Linear"
from models import LinearModel

In [24]:
import torch.utils.data as data

X = seqs1h.float().to(device)
y = phenotypes.to(device)

X_train, y_train = X[train_list], y[train_list]
X_val, y_val = X[val_list], y[val_list]
X_test, y_test = X[test_list], y[test_list]


train_dataset = ProtDataset(X_train, y_train)
train_loader = data.DataLoader(train_dataset,
                               batch_size=1000,
                               shuffle=True,
                               drop_last=False)

In [25]:
dropout_p = 0.0
model = LinearModel(L, AA_size, dropout_p).cuda()

In [26]:
# from models import LinearModel

In [27]:
import torch.optim as optim
import torch
import torch.nn as nn


from scipy.stats import pearsonr
learning_rate = 0.01
epochs = 300

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    total_loss = 0
    for batch_inputs, batch_targets in train_loader:
        model.train()
        optimizer.zero_grad()
        outputs = model(batch_inputs)
        loss = criterion(outputs, batch_targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader)}")
        model.eval()
        pred, true = model(X_val.flatten(1)).flatten().detach().cpu().numpy(), y_val.flatten().detach().cpu().numpy()
        print(pearsonr(pred, true)[0]**2)

Epoch 1/300, Loss: 0.3863317109644413
0.0005189282881258626
Epoch 11/300, Loss: 0.25813640529910725
0.429036599475166
Epoch 21/300, Loss: 0.11567544937133789
0.5396034736800561
Epoch 31/300, Loss: 0.11313357794036467
0.5486051689077466
Epoch 41/300, Loss: 0.11234207358211279
0.551351779863347
Epoch 51/300, Loss: 0.11176873215784629
0.553001527850364
Epoch 61/300, Loss: 0.11201879226913054
0.5531673752149168
Epoch 71/300, Loss: 0.11191412216673295
0.5533361462018197
Epoch 81/300, Loss: 0.11170469379673402
0.5533676000131679
Epoch 91/300, Loss: 0.11179927829653025
0.5541687978371111
Epoch 101/300, Loss: 0.11167437645296256
0.5539298296527275
Epoch 111/300, Loss: 0.11180036949614684
0.5537810484803593
Epoch 121/300, Loss: 0.1115486528724432
0.5532200234380505
Epoch 131/300, Loss: 0.11186157322178285
0.5529691761211394
Epoch 141/300, Loss: 0.11165164442112048
0.5540670952023885
Epoch 151/300, Loss: 0.1119967708364129
0.5537554845040921
Epoch 161/300, Loss: 0.11165731990089019
0.55355264646

In [28]:
model.eval()
pred, true = model(X_test.flatten(1)).flatten().detach().cpu().numpy(), y_test.flatten().detach().cpu().numpy()

r2_test = pearsonr(pred, true)[0]**2

print(f"{model_name} model achieved test R2 = {r2_test}")

Linear model achieved test R2 = 0.547824029307497


In [29]:
import csv
with open(os.path.join(results_path, "R2s.csv"), mode='a', newline='') as file:
    writer = csv.writer(file)
    writer.writerows([[model_name, r2_test]])

### Transformer model

In [36]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from models import Transformer_torch_MHA, Transformer_2k

In [37]:
seqs_ex = seqs + AA_size*torch.tensor(range(L))
X = seqs_ex.to(device)
y = phenotypes.to(device)
X_train, y_train = X[train_list], y[train_list]
X_val, y_val = X[val_list], y[val_list]
X_test, y_test = X[test_list], y[test_list]
train_dataset = ProtDataset(X_train, y_train)

In [38]:
# Best Trial:
#   Value: 0.7601
#   Params: 
#     hidden_dim_h: 23
#     dropout: 0.12805161023112027
#     batch_size: 544


In [39]:
# sequence_length = L
# input_dim = AA_size*L
# output_dim = 1
# num_layers = 2
# num_heads = 4
# hidden_dim = 23*num_heads
# dropout = 0.12805161023112027

# model = Transformer_torch_MHA(L, input_dim, hidden_dim, num_layers, num_heads, dropout).to(device)

In [40]:
# from scipy.stats import pearsonr
# learning_rate = 0.001
# epochs = 500

# criterion = nn.MSELoss()
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# for epoch in range(epochs):
#     model.train()
#     total_loss = 0
#     for batch_inputs, batch_targets in train_loader:
#         optimizer.zero_grad()
#         outputs = model(batch_inputs)
#         loss = criterion(outputs, batch_targets)
#         loss.backward()
#         optimizer.step()
#         total_loss += loss.item()
    
#     if epoch % 20 == 0:
#         print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader)}")
#         model.eval()
#         pred, true = model(X_test.flatten(1)).flatten().detach().cpu().numpy(), y_test.flatten().detach().cpu().numpy()
#         print(pearsonr(pred, true)[0]**2)

In [41]:
import optuna
from scipy.stats import pearsonr

learning_rate = 0.001
num_heads = 4

sequence_length = L
input_dim = AA_size*L
output_dim = 1

def objective(trial):
    global criterion_best, model_best

    hidden_dim_h = trial.suggest_int('hidden_dim_h', 10, 50)
    dropout = trial.suggest_float('dropout', 0.05, 0.35)
    batch_size = trial.suggest_int('batch_size', 100, 1200)
    n_epochs = trial.suggest_int('n_epochs', 30, 300)
    
    print(f"Build model with {num_layers} layers of attention")
    model = Transformer_2k(L, input_dim, hidden_dim_h*num_heads, num_layers, num_heads, dropout).to(device)
    
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   drop_last=False)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    r2_test = []
    try: 
        for epoch in range(n_epochs):

                model.train()
                total_loss = 0
                for batch_inputs, batch_targets in train_loader:
                    optimizer.zero_grad()
                    outputs = model(batch_inputs)
                    loss = criterion(outputs, batch_targets)
                    loss.backward()
                    optimizer.step()
                    total_loss += loss.item()

                if epoch % 10 == 0:
                    print(f"Epoch {epoch+1}/{n_epochs}, Loss: {total_loss/len(train_loader)}")
                    model.eval()
                    pred, true = model(X_val.flatten(1)).flatten().detach().cpu().numpy(), y_val.flatten().detach().cpu().numpy()
                    print(pearsonr(pred, true)[0]**2)
                    if pearsonr(pred, true)[0]**2 == "nan":
                        break
                    r2_test.append(pearsonr(pred, true)[0]**2)
                    
    except: print("training failed")
    
    criterion = np.array(r2_test)[-1]
    if criterion > criterion_best:
        print("Found better hyperparameter, update model")
        criterion_best = criterion
        model_best = model
    
    return np.array(r2_test)[-1]

In [42]:
n_trials = 100
for num_layers in [1, 2, 3]:

    model_name = "TF_" + str(num_layers)

    criterion_best = 0.
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=100)

    # Print the best hyperparameters
    best_trial = study.best_trial
    print("Best Trial:")
    print(f"  Criterion: {best_trial.value:.4f}")
    print("  Params: ")
    for key, value in best_trial.params.items():
        print(f"    {key}: {value}")  

    best_hyper_parameters = {}
    for key, value in best_trial.params.items():
        best_hyper_parameters[key] = value

    model_best.eval()
    pred, true = model_best(X_val.flatten(1)).flatten().detach().cpu().numpy(), y_val.flatten().detach().cpu().numpy()

    r2_test = pearsonr(pred, true)[0]**2
    print(f"{model_name} achieved R2 = {r2_test}")

    # save test R2 score
    import csv
    with open(os.path.join(results_path, "R2s.csv"), mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerows([[model_name, r2_test]])

    # save predictions
    pd.DataFrame({"prediction": pred, "true": true}).to_csv(os.path.join(results_path, model_name + "_predictions.csv"), index=False)

    # save best model
    torch.save(model_best, os.path.join(results_path, model_name + "_BestModel"))        

[I 2024-01-22 23:00:08,236] A new study created in memory with name: no-name-06f4ec3e-4f88-4b6b-821f-6d6a6e03ea30


Build model with 1 layers of attention
Epoch 1/54, Loss: 0.27300238893145606
0.5659377368682414
Epoch 11/54, Loss: 0.17443125588553293
0.6352508089495565
Epoch 21/54, Loss: 0.15758839391526722
0.6331747105164571
Epoch 31/54, Loss: 0.1434752153498786
0.6441724728190299
Epoch 41/54, Loss: 0.13047365915207637
0.6426852112211843
Epoch 51/54, Loss: 0.11981855652162007
0.645188020136586


[I 2024-01-22 23:00:25,633] Trial 0 finished with value: 0.645188020136586 and parameters: {'hidden_dim_h': 49, 'dropout': 0.17583944046619926, 'batch_size': 1151, 'n_epochs': 54}. Best is trial 0 with value: 0.645188020136586.


Found better hyperparameter, update model
Build model with 1 layers of attention
Epoch 1/49, Loss: 0.3058879319578409
0.5700129578114762
Epoch 11/49, Loss: 0.17630322696641088
0.6298366440556901
Epoch 21/49, Loss: 0.16005395911633968
0.6433917009428549
Epoch 31/49, Loss: 0.14421286806464195
0.6399882555905548
Epoch 41/49, Loss: 0.1301981434226036
0.6432709627991137


[I 2024-01-22 23:00:39,737] Trial 1 finished with value: 0.6432709627991137 and parameters: {'hidden_dim_h': 37, 'dropout': 0.2765663087566271, 'batch_size': 758, 'n_epochs': 49}. Best is trial 0 with value: 0.645188020136586.


Build model with 1 layers of attention
Epoch 1/60, Loss: 0.8813921124846847
0.5374497603185929
Epoch 11/60, Loss: 0.4095723871831541
0.6387062040514015
Epoch 21/60, Loss: 0.2495868730324286
0.6435031114835666
Epoch 31/60, Loss: 0.20508618542441615
0.6462329385903013
Epoch 41/60, Loss: 0.1897567096683714
0.6477189573056769
Epoch 51/60, Loss: 0.17796243892775643
0.6449919346645457


[I 2024-01-22 23:00:54,436] Trial 2 finished with value: 0.6449919346645457 and parameters: {'hidden_dim_h': 19, 'dropout': 0.07920396843593208, 'batch_size': 896, 'n_epochs': 60}. Best is trial 0 with value: 0.645188020136586.


Build model with 1 layers of attention
Epoch 1/85, Loss: 0.2447005153605432
0.5739739504983876
Epoch 11/85, Loss: 0.1670232591303912
0.6289751381280846
Epoch 21/85, Loss: 0.13981454932328427
0.6396029074605076
Epoch 31/85, Loss: 0.11913285133513538
0.6376031919562841
Epoch 41/85, Loss: 0.10629923461061536
0.6389427354416705
Epoch 51/85, Loss: 0.09862461686134338
0.643565652516241
Epoch 61/85, Loss: 0.09429478735634775
0.6414712908335996
Epoch 71/85, Loss: 0.09220975185885574
0.6465145292618247
Epoch 81/85, Loss: 0.09125263514843854
0.6502522047795809


[I 2024-01-22 23:01:20,802] Trial 3 finished with value: 0.6502522047795809 and parameters: {'hidden_dim_h': 45, 'dropout': 0.14170673882767865, 'batch_size': 725, 'n_epochs': 85}. Best is trial 3 with value: 0.6502522047795809.


Found better hyperparameter, update model
Build model with 1 layers of attention
Epoch 1/266, Loss: 0.33893223336109746
0.5835830648211799
Epoch 11/266, Loss: 0.1532283622276533
0.6203081572189818
Epoch 21/266, Loss: 0.10677652238132236
0.6286143834906623
Epoch 31/266, Loss: 0.09605111608972082
0.6361277715550954
Epoch 41/266, Loss: 0.09495084018974037
0.6392895175237359
Epoch 51/266, Loss: 0.09401800113541263
0.6415532525931259
Epoch 61/266, Loss: 0.09386591337152295
0.6436861553279282
Epoch 71/266, Loss: 0.09436426025170547
0.6452977006765057
Epoch 81/266, Loss: 0.09318047339891221
0.6424680266813413
Epoch 91/266, Loss: 0.093604994976854
0.6427622232292031
Epoch 101/266, Loss: 0.09353324234277219
0.6442947953575008
Epoch 111/266, Loss: 0.09301266907811999
0.6495816113542419
Epoch 121/266, Loss: 0.09275946729666704
0.6424260648264987
Epoch 131/266, Loss: 0.0925677596897512
0.6473285216366029
Epoch 141/266, Loss: 0.09240571763757226
0.6424705014654742
Epoch 151/266, Loss: 0.09234498136

[I 2024-01-22 23:03:25,098] Trial 4 finished with value: 0.6456566574904876 and parameters: {'hidden_dim_h': 43, 'dropout': 0.2498742301667783, 'batch_size': 167, 'n_epochs': 266}. Best is trial 3 with value: 0.6502522047795809.


Build model with 1 layers of attention
Epoch 1/241, Loss: 0.26017296739986967
0.5720614531214059
Epoch 11/241, Loss: 0.21810830224837577
0.6290268309124041
Epoch 21/241, Loss: 0.1848521259214197
0.6311476508011952
Epoch 31/241, Loss: 0.15772554810558045
0.6341165341650618
Epoch 41/241, Loss: 0.137937352593456
0.6427219919666468
Epoch 51/241, Loss: 0.1233106894152505
0.6358488568138032
Epoch 61/241, Loss: 0.11143552591758114
0.644903487231729
Epoch 71/241, Loss: 0.10512087866663933
0.6440647975522369
Epoch 81/241, Loss: 0.0994992381227868
0.6403061440128986
Epoch 91/241, Loss: 0.09703306002276284
0.6445901923947427
Epoch 101/241, Loss: 0.09427792525717191
0.6451051512187106
Epoch 111/241, Loss: 0.09332794642874173
0.646009036800421
Epoch 121/241, Loss: 0.09315777170870986
0.6469250494341909
Epoch 131/241, Loss: 0.09261087141931057
0.6469349196473972
Epoch 141/241, Loss: 0.09295744448900223
0.6457784309825644
Epoch 151/241, Loss: 0.0923604132341487
0.6440170245394645
Epoch 161/241, Loss:

[I 2024-01-22 23:04:38,037] Trial 5 finished with value: 0.6474941655953302 and parameters: {'hidden_dim_h': 41, 'dropout': 0.3459649894133719, 'batch_size': 880, 'n_epochs': 241}. Best is trial 3 with value: 0.6502522047795809.


Epoch 241/241, Loss: 0.09140264429152012
0.6474941655953302
Build model with 1 layers of attention
Epoch 1/127, Loss: 0.9363441021887811
0.553922225473831
Epoch 11/127, Loss: 0.21515045830836663
0.6304769192366482
Epoch 21/127, Loss: 0.17379184787745003
0.6370782128519029
Epoch 31/127, Loss: 0.13569001583280144
0.6386003259727807
Epoch 41/127, Loss: 0.10941544142398206
0.6450629562665304
Epoch 51/127, Loss: 0.09762929150691399
0.6450595273065044
Epoch 61/127, Loss: 0.09207860077475453
0.6477712333038423
Epoch 71/127, Loss: 0.09255599369714548
0.652395003234582
Epoch 81/127, Loss: 0.09189322361579308
0.6524844390011681
Epoch 91/127, Loss: 0.09078328057632341
0.6486176159564969
Epoch 101/127, Loss: 0.09090936208491797
0.6513331769281725
Epoch 111/127, Loss: 0.09104003405177986
0.6525300250590073
Epoch 121/127, Loss: 0.09154217305419209
0.6499786491179876


[I 2024-01-22 23:05:23,068] Trial 6 finished with value: 0.6499786491179876 and parameters: {'hidden_dim_h': 20, 'dropout': 0.10229403213827723, 'batch_size': 265, 'n_epochs': 127}. Best is trial 3 with value: 0.6502522047795809.


Build model with 1 layers of attention
Epoch 1/184, Loss: 2.4955502282018247
0.5294091644597276
Epoch 11/184, Loss: 1.610435610232146
0.5783393205591326
Epoch 21/184, Loss: 1.0385508926018425
0.5993038299455087
Epoch 31/184, Loss: 0.6808821273886639
0.6149571607553835
Epoch 41/184, Loss: 0.47429749887922534
0.6260117084221251
Epoch 51/184, Loss: 0.3647423321786134
0.632670469169583
Epoch 61/184, Loss: 0.31115483071493066
0.6361964162659355
Epoch 71/184, Loss: 0.2847265342007513
0.6380582570290894
Epoch 81/184, Loss: 0.26940198048301367
0.6393898112253683
Epoch 91/184, Loss: 0.25743964120097784
0.6395551613677464
Epoch 101/184, Loss: 0.24603502387585846
0.6393155196297396
Epoch 111/184, Loss: 0.23434899229070413
0.6447189133549661
Epoch 121/184, Loss: 0.2222278947415559
0.6395398831058599
Epoch 131/184, Loss: 0.2100582258856815
0.6383067170385051
Epoch 141/184, Loss: 0.19753625017145407
0.6402963072033636
Epoch 151/184, Loss: 0.18529816619727923
0.643194893263997
Epoch 161/184, Loss: 0.

[I 2024-01-22 23:06:11,737] Trial 7 finished with value: 0.6458495338449028 and parameters: {'hidden_dim_h': 30, 'dropout': 0.12403408973851605, 'batch_size': 1043, 'n_epochs': 184}. Best is trial 3 with value: 0.6502522047795809.


Build model with 1 layers of attention
Epoch 1/215, Loss: 0.48268353063908837
0.5782602962220674
Epoch 11/215, Loss: 0.1628045311995915
0.6280731014455082
Epoch 21/215, Loss: 0.11909871643024778
0.6327948242016727
Epoch 31/215, Loss: 0.09799402065220333
0.6410418334756589
Epoch 41/215, Loss: 0.09332235772458333
0.6422912843979056
Epoch 51/215, Loss: 0.0921873174134701
0.6375271876290854
Epoch 61/215, Loss: 0.09216276745474528
0.6431507837448559
Epoch 71/215, Loss: 0.09190301886863178
0.6452110229974471
Epoch 81/215, Loss: 0.0919986367225647
0.6496977990217879
Epoch 91/215, Loss: 0.0915788925356335
0.6484195844599931
Epoch 101/215, Loss: 0.09103531295817996
0.6440426369842865
Epoch 111/215, Loss: 0.09156600433209586
0.6447606917314184
Epoch 121/215, Loss: 0.09179984225285431
0.6492351345096256
Epoch 131/215, Loss: 0.09172091733605142
0.6509106474618349
Epoch 141/215, Loss: 0.09106484487179727
0.6442767899731064
Epoch 151/215, Loss: 0.09161971142840764
0.6543712892562038
Epoch 161/215, L

[I 2024-01-22 23:07:45,925] Trial 8 finished with value: 0.6517526575241229 and parameters: {'hidden_dim_h': 44, 'dropout': 0.14419592339351964, 'batch_size': 190, 'n_epochs': 215}. Best is trial 8 with value: 0.6517526575241229.


Found better hyperparameter, update model
Build model with 1 layers of attention
Epoch 1/160, Loss: 0.2935559689998627
0.5476217164230358
Epoch 11/160, Loss: 0.18284221664071082
0.6349085036280757
Epoch 21/160, Loss: 0.16404929906129836
0.6434410592266141
Epoch 31/160, Loss: 0.15008802488446235
0.6435241307814556
Epoch 41/160, Loss: 0.13763569220900534
0.6447452651104463
Epoch 51/160, Loss: 0.12633805088698863
0.6400245916083372
Epoch 61/160, Loss: 0.11669879816472531
0.6477122221693483
Epoch 71/160, Loss: 0.1091160386800766
0.6442360488533052
Epoch 81/160, Loss: 0.10307793244719506
0.6466210358038796
Epoch 91/160, Loss: 0.0987310517579317
0.6486621183260405
Epoch 101/160, Loss: 0.09505140408873558
0.6521182223786547
Epoch 111/160, Loss: 0.09262299835681916
0.6492534469532766
Epoch 121/160, Loss: 0.09125526398420333
0.648618237191125
Epoch 131/160, Loss: 0.09004753716289997
0.6532440723131898
Epoch 141/160, Loss: 0.08970733620226383
0.6525945911141317
Epoch 151/160, Loss: 0.08890662565

[I 2024-01-22 23:08:26,346] Trial 9 finished with value: 0.6524624058273769 and parameters: {'hidden_dim_h': 22, 'dropout': 0.07965807876447999, 'batch_size': 1196, 'n_epochs': 160}. Best is trial 9 with value: 0.6524624058273769.


Found better hyperparameter, update model
Build model with 1 layers of attention
Epoch 1/144, Loss: 0.3582600516080856
0.5382718646630361
Epoch 11/144, Loss: 0.18309567213058472
0.6299662448071794
Epoch 21/144, Loss: 0.15043264970183373
0.6335263355132397
Epoch 31/144, Loss: 0.12421544745564461
0.6454998652264162
Epoch 41/144, Loss: 0.10653147831559182
0.646786494284245
Epoch 51/144, Loss: 0.09695278912782669
0.6487109145247324
Epoch 61/144, Loss: 0.09165038168430328
0.6525396460173802
Epoch 71/144, Loss: 0.09039269521832466
0.6536469607042176
Epoch 81/144, Loss: 0.09018689304590226
0.6437415357826087
Epoch 91/144, Loss: 0.08967123106122017
0.654437951062273
Epoch 101/144, Loss: 0.08921947538852691
0.6509981312756933
Epoch 111/144, Loss: 0.08949220836162568
0.6524237060432866
Epoch 121/144, Loss: 0.08914756283164024
0.6503599067473409
Epoch 131/144, Loss: 0.08897985264658928
0.6526940042956817
Epoch 141/144, Loss: 0.08889000356197357
0.6533269260395026


[I 2024-01-22 23:09:06,326] Trial 10 finished with value: 0.6533269260395026 and parameters: {'hidden_dim_h': 11, 'dropout': 0.05827418670945858, 'batch_size': 481, 'n_epochs': 144}. Best is trial 10 with value: 0.6533269260395026.


Found better hyperparameter, update model
Build model with 1 layers of attention
Epoch 1/139, Loss: 0.6580563325148362
0.5414871678723705
Epoch 11/139, Loss: 0.15627172460349706
0.6309448633210448
Epoch 21/139, Loss: 0.12303459329100755
0.64819604248699
Epoch 31/139, Loss: 0.11544595314906193
0.6492733870392674
Epoch 41/139, Loss: 0.10870961458064042
0.6452471923376826
Epoch 51/139, Loss: 0.10186614697942367
0.6492002883182112
Epoch 61/139, Loss: 0.096875039430765
0.6483382879495042
Epoch 71/139, Loss: 0.09238454713844337
0.6498030278787302
Epoch 81/139, Loss: 0.09093733332478084
0.6530815350425714
Epoch 91/139, Loss: 0.08969580253156331
0.6528848720583751
Epoch 101/139, Loss: 0.08911935512263042
0.6517362022574851
Epoch 111/139, Loss: 0.08938711867309533
0.6530295438160927
Epoch 121/139, Loss: 0.08913126946068727
0.6533344997670136
Epoch 131/139, Loss: 0.0892911352790319
0.6536218776333772


[I 2024-01-22 23:09:45,570] Trial 11 finished with value: 0.6536218776333772 and parameters: {'hidden_dim_h': 10, 'dropout': 0.05305718614424193, 'batch_size': 466, 'n_epochs': 139}. Best is trial 11 with value: 0.6536218776333772.


Found better hyperparameter, update model
Build model with 1 layers of attention
Epoch 1/133, Loss: 1.094547321983412
0.5519032742525177
Epoch 11/133, Loss: 0.2755550806428872
0.6229202871026978
Epoch 21/133, Loss: 0.14511866721452452
0.6404397454620637
Epoch 31/133, Loss: 0.13172623558956034
0.647148826133835
Epoch 41/133, Loss: 0.12357825289169948
0.6466940059102705
Epoch 51/133, Loss: 0.11590087384569879
0.6478568898730438
Epoch 61/133, Loss: 0.10808531005008548
0.6480147030204667
Epoch 71/133, Loss: 0.10130717444653604
0.6507557140795855
Epoch 81/133, Loss: 0.09645085737985723
0.6485777939798176
Epoch 91/133, Loss: 0.09265381623716916
0.6498289409781616
Epoch 101/133, Loss: 0.09037295334479388
0.6510255705024922
Epoch 111/133, Loss: 0.08997691904797274
0.6524008238181861
Epoch 121/133, Loss: 0.08968049594584633
0.6512471110121567
Epoch 131/133, Loss: 0.08891663495816436
0.6498316265406465


[I 2024-01-22 23:10:22,880] Trial 12 finished with value: 0.6498316265406465 and parameters: {'hidden_dim_h': 11, 'dropout': 0.06012842311726593, 'batch_size': 472, 'n_epochs': 133}. Best is trial 11 with value: 0.6536218776333772.


Build model with 1 layers of attention
Epoch 1/107, Loss: 1.2719992746909459
0.5468200959498442
Epoch 11/107, Loss: 0.3563105712334315
0.62634655908406
Epoch 21/107, Loss: 0.1562459133565426
0.6354001562074376
Epoch 31/107, Loss: 0.13170753559097648
0.6460179641595565
Epoch 41/107, Loss: 0.12461855821311474
0.6431380848175281
Epoch 51/107, Loss: 0.11795569645861785
0.6517448109269609
Epoch 61/107, Loss: 0.11132461205124855
0.6428885837346691
Epoch 71/107, Loss: 0.10517537640407681
0.6413136978508134
Epoch 81/107, Loss: 0.09957747890924414
0.6452608535636171
Epoch 91/107, Loss: 0.09501159703359008
0.6479141328683896
Epoch 101/107, Loss: 0.09228450510029991
0.6479537422911704


[I 2024-01-22 23:10:52,113] Trial 13 finished with value: 0.6479537422911704 and parameters: {'hidden_dim_h': 10, 'dropout': 0.058845039262257974, 'batch_size': 502, 'n_epochs': 107}. Best is trial 11 with value: 0.6536218776333772.


Build model with 1 layers of attention
Epoch 1/181, Loss: 0.21294801858338444
0.5622692057474774
Epoch 11/181, Loss: 0.1378530591726303
0.635281561228831
Epoch 21/181, Loss: 0.1082189764488827
0.639331764220436
Epoch 31/181, Loss: 0.09696554067459974
0.6447493546988128
Epoch 41/181, Loss: 0.09413910630074414
0.6456431185159462
Epoch 51/181, Loss: 0.09264132651415738
0.6438984812974005
Epoch 61/181, Loss: 0.09214086640964855
0.6444608695119173
Epoch 71/181, Loss: 0.0922144971110604
0.6453748500120051
Epoch 81/181, Loss: 0.09156983576037667
0.6488830843034599
Epoch 91/181, Loss: 0.09192882153120908
0.6512706902377747
Epoch 101/181, Loss: 0.09153651513836601
0.6517975371797676
Epoch 111/181, Loss: 0.09149218377741901
0.6495083739908822
Epoch 121/181, Loss: 0.09137679677117955
0.649712337813165
Epoch 131/181, Loss: 0.09112606387246738
0.6501395405426145
Epoch 141/181, Loss: 0.09130015738985756
0.6523850075037658
Epoch 151/181, Loss: 0.09117032573981719
0.6499942332266174
Epoch 161/181, Los

[I 2024-01-22 23:11:42,303] Trial 14 finished with value: 0.6508948097327797 and parameters: {'hidden_dim_h': 15, 'dropout': 0.20399459358534705, 'batch_size': 438, 'n_epochs': 181}. Best is trial 11 with value: 0.6536218776333772.


Epoch 181/181, Loss: 0.09060822413726287
0.6508948097327797
Build model with 1 layers of attention
Epoch 1/141, Loss: 0.21573779154831255
0.5747800097505738
Epoch 11/141, Loss: 0.12964582380274653
0.6366646893974572
Epoch 21/141, Loss: 0.10259569981988047
0.6375638080892008
Epoch 31/141, Loss: 0.0948368273150753
0.6476082365612765
Epoch 41/141, Loss: 0.09264872013263299
0.6500477824194624
Epoch 51/141, Loss: 0.09229550552620015
0.6420727151371581
Epoch 61/141, Loss: 0.09194411599720029
0.6448810618175707
Epoch 71/141, Loss: 0.09155114005569001
0.6492131810046865
Epoch 81/141, Loss: 0.09197436598405032
0.6518273628256772
Epoch 91/141, Loss: 0.0913593514074742
0.6449521848727541
Epoch 101/141, Loss: 0.09125670874622506
0.6450597496775675
Epoch 111/141, Loss: 0.09089117476218184
0.6513656284451853
Epoch 121/141, Loss: 0.0914499937018878
0.6508481464639776
Epoch 131/141, Loss: 0.0909833333022158
0.6522095468497262


[I 2024-01-22 23:12:27,392] Trial 15 finished with value: 0.6493913501700834 and parameters: {'hidden_dim_h': 26, 'dropout': 0.19770098491662058, 'batch_size': 340, 'n_epochs': 141}. Best is trial 11 with value: 0.6536218776333772.


Epoch 141/141, Loss: 0.09119648276500299
0.6493913501700834
Build model with 1 layers of attention
Epoch 1/296, Loss: 0.6980577157093928
0.5592864201959923
Epoch 11/296, Loss: 0.32161611853501737
0.6249014225397326
Epoch 21/296, Loss: 0.2632000144475546
0.631001981431895
Epoch 31/296, Loss: 0.2307135317570124
0.6414961379429934
Epoch 41/296, Loss: 0.1985866706340741
0.6376475361763645
Epoch 51/296, Loss: 0.16901978697532263
0.6402760218947211
Epoch 61/296, Loss: 0.14431767414013544
0.649270652028923
Epoch 71/296, Loss: 0.12480831451905079
0.6428212467772939
Epoch 81/296, Loss: 0.1109432883751698
0.6485746118001056
Epoch 91/296, Loss: 0.10093031709010784
0.6493746704383088
Epoch 101/296, Loss: 0.09534486039326741
0.6501569674734449
Epoch 111/296, Loss: 0.09138394510134673
0.6529879811859289
Epoch 121/296, Loss: 0.09018240123987198
0.6514030913248839
Epoch 131/296, Loss: 0.08935072311224082
0.6543371920795753
Epoch 141/296, Loss: 0.08865879437862298
0.6556945865851663
Epoch 151/296, Loss

[I 2024-01-22 23:13:41,981] Trial 16 finished with value: 0.6521585323952361 and parameters: {'hidden_dim_h': 14, 'dropout': 0.050565450515198666, 'batch_size': 616, 'n_epochs': 296}. Best is trial 11 with value: 0.6536218776333772.


Build model with 1 layers of attention
Epoch 1/94, Loss: 0.24857311103588495
0.5710681117271538
Epoch 11/94, Loss: 0.17345931361883116
0.641278080955202
Epoch 21/94, Loss: 0.13303575339989784
0.6403753466864841
Epoch 31/94, Loss: 0.11079200834800036
0.6504217713849051
Epoch 41/94, Loss: 0.0992573918058322
0.6390419626796313
Epoch 51/94, Loss: 0.09401467748177357
0.6452884766232265
Epoch 61/94, Loss: 0.09095504650702843
0.6512386501318861
Epoch 71/94, Loss: 0.0905665608170705
0.6519634007186099
Epoch 81/94, Loss: 0.09025852573223603
0.6494836264771978
Epoch 91/94, Loss: 0.08991302301486333
0.6457101967569432


[I 2024-01-22 23:14:08,988] Trial 17 finished with value: 0.6457101967569432 and parameters: {'hidden_dim_h': 34, 'dropout': 0.11064977468181503, 'batch_size': 615, 'n_epochs': 94}. Best is trial 11 with value: 0.6536218776333772.


Build model with 1 layers of attention
Epoch 1/211, Loss: 0.3566034258343279
0.5660962757306228
Epoch 11/211, Loss: 0.1793002024292946
0.6322921919106005
Epoch 21/211, Loss: 0.12975044902414085
0.6400661881602635
Epoch 31/211, Loss: 0.10674232169985771
0.638359074692058
Epoch 41/211, Loss: 0.09552903855219483
0.6478135379673078
Epoch 51/211, Loss: 0.09261723724193871
0.6497242508576575
Epoch 61/211, Loss: 0.09527945443987847
0.648655047777308
Epoch 71/211, Loss: 0.09603522028774023
0.6505143389762202
Epoch 81/211, Loss: 0.0935826406814158
0.6510072816050285
Epoch 91/211, Loss: 0.09131504092365503
0.6513116960290515
Epoch 101/211, Loss: 0.09096849039196968
0.6508590474753608
Epoch 111/211, Loss: 0.09276519408449531
0.6500107308514468
Epoch 121/211, Loss: 0.09136496596038342
0.6499820238771533
Epoch 131/211, Loss: 0.09075067527592182
0.6508162021140589
Epoch 141/211, Loss: 0.09197453884407877
0.6539355315897769
Epoch 151/211, Loss: 0.09182857871055602
0.6499118874683825
Epoch 161/211, Lo

[I 2024-01-22 23:15:19,495] Trial 18 finished with value: 0.6523203857788655 and parameters: {'hidden_dim_h': 15, 'dropout': 0.16684674700833782, 'batch_size': 302, 'n_epochs': 211}. Best is trial 11 with value: 0.6536218776333772.


Epoch 211/211, Loss: 0.09202708825469016
0.6523203857788655
Build model with 1 layers of attention
Epoch 1/174, Loss: 1.9164019136105555
0.541087948639955
Epoch 11/174, Loss: 0.6157360915410317
0.6070547789355352
Epoch 21/174, Loss: 0.2303861435187065
0.6034318810977842
Epoch 31/174, Loss: 0.18020060229099402
0.617025970437312
Epoch 41/174, Loss: 0.16384807986728214
0.635979257413508
Epoch 51/174, Loss: 0.14783279168403754
0.6346249925496369
Epoch 61/174, Loss: 0.13166846663264906
0.6415186258063492
Epoch 71/174, Loss: 0.1173691969301741
0.6398501775142625
Epoch 81/174, Loss: 0.10640639294002016
0.6429719227641898
Epoch 91/174, Loss: 0.09875351443129071
0.6450582964905149
Epoch 101/174, Loss: 0.09506843276953293
0.6490862304306564
Epoch 111/174, Loss: 0.09309446799047923
0.6448053982721662
Epoch 121/174, Loss: 0.09240500982535088
0.645075927072249
Epoch 131/174, Loss: 0.09199514328423193
0.6514842851895025
Epoch 141/174, Loss: 0.09167358948517654
0.6477961875530517
Epoch 151/174, Loss:

[I 2024-01-22 23:16:10,797] Trial 19 finished with value: 0.6492517726112866 and parameters: {'hidden_dim_h': 26, 'dropout': 0.25861924742025655, 'batch_size': 405, 'n_epochs': 174}. Best is trial 11 with value: 0.6536218776333772.


Build model with 1 layers of attention
Epoch 1/113, Loss: 0.3320518942041831
0.5562094663010497
Epoch 11/113, Loss: 0.22336072813380847
0.6286654180205463
Epoch 21/113, Loss: 0.16851058466867966
0.6378473979685023
Epoch 31/113, Loss: 0.13182682188397105
0.6442756169581788
Epoch 41/113, Loss: 0.110288135538047
0.6503968352370363
Epoch 51/113, Loss: 0.09835739603096788
0.6450808012018495
Epoch 61/113, Loss: 0.09334142641587691
0.6515611141507349
Epoch 71/113, Loss: 0.0906884798949415
0.6486951476429857
Epoch 81/113, Loss: 0.08989812416786497
0.6514405200578687
Epoch 91/113, Loss: 0.08950535631315275
0.6518450641530074
Epoch 101/113, Loss: 0.08964532664553686
0.6514988678586032
Epoch 111/113, Loss: 0.08889331245286898
0.6529469484252609


[I 2024-01-22 23:16:40,882] Trial 20 finished with value: 0.6529469484252609 and parameters: {'hidden_dim_h': 17, 'dropout': 0.0923347976079672, 'batch_size': 547, 'n_epochs': 113}. Best is trial 11 with value: 0.6536218776333772.


Build model with 1 layers of attention
Epoch 1/118, Loss: 0.9017283680827118
0.5531653626197489
Epoch 11/118, Loss: 0.2437548318574595
0.6269216398699677
Epoch 21/118, Loss: 0.12414016765217449
0.6375015072669902
Epoch 31/118, Loss: 0.11255227947650953
0.6431937437752038
Epoch 41/118, Loss: 0.10823493156322213
0.6492155269703844
Epoch 51/118, Loss: 0.10443633465572845
0.6502584417092464
Epoch 61/118, Loss: 0.10102864753368289
0.6462455500219192
Epoch 71/118, Loss: 0.09735071797703587
0.6477989452306427
Epoch 81/118, Loss: 0.09468156844377518
0.648239910564566
Epoch 91/118, Loss: 0.09266288197317789
0.6519017160187304
Epoch 101/118, Loss: 0.09130106952994368
0.6538062910760095
Epoch 111/118, Loss: 0.09007683881493501
0.6523604932489694


[I 2024-01-22 23:17:12,024] Trial 21 finished with value: 0.6523604932489694 and parameters: {'hidden_dim_h': 17, 'dropout': 0.09337587158511379, 'batch_size': 562, 'n_epochs': 118}. Best is trial 11 with value: 0.6536218776333772.


Build model with 1 layers of attention
Epoch 1/152, Loss: 0.24800928821787238
0.5459836331945763
Epoch 11/152, Loss: 0.1989357778802514
0.6282764617736949
Epoch 21/152, Loss: 0.16374035459011793
0.6400611565958052
Epoch 31/152, Loss: 0.1377272834070027
0.6418230271541641
Epoch 41/152, Loss: 0.11978141381405294
0.6485841983518283
Epoch 51/152, Loss: 0.10741913830861449
0.6464491079492007
Epoch 61/152, Loss: 0.0993414493277669
0.6437086706530121
Epoch 71/152, Loss: 0.09436389175243676
0.6489547164797804
Epoch 81/152, Loss: 0.09246816975064576
0.6491907849809075
Epoch 91/152, Loss: 0.09024335630238056
0.6504593415382105
Epoch 101/152, Loss: 0.08979978458955884
0.653277663514885
Epoch 111/152, Loss: 0.08950203587301075
0.6529758250849874
Epoch 121/152, Loss: 0.0893586443271488
0.65206044874523
Epoch 131/152, Loss: 0.08906146371737123
0.6489619917309012
Epoch 141/152, Loss: 0.08900760510005057
0.653211051483583
Epoch 151/152, Loss: 0.08892902010120451
0.6538841229133152


[I 2024-01-22 23:17:49,065] Trial 22 finished with value: 0.6538841229133152 and parameters: {'hidden_dim_h': 10, 'dropout': 0.07920274407977348, 'batch_size': 758, 'n_epochs': 152}. Best is trial 22 with value: 0.6538841229133152.


Found better hyperparameter, update model
Build model with 1 layers of attention
Epoch 1/151, Loss: 1.4014415591955185
0.5239515178777396
Epoch 11/151, Loss: 0.6186591554433107
0.6096614224298736
Epoch 21/151, Loss: 0.28205867018550634
0.6264008852426506
Epoch 31/151, Loss: 0.17212901404127479
0.6382691442693069
Epoch 41/151, Loss: 0.14343223394826055
0.6427948988692165
Epoch 51/151, Loss: 0.13549032667651772
0.6430031317949585
Epoch 61/151, Loss: 0.13115139724686742
0.6459439830249994
Epoch 71/151, Loss: 0.1260856215376407
0.6428087553400363
Epoch 81/151, Loss: 0.12148350151255727
0.6471206739255225
Epoch 91/151, Loss: 0.1165244069416076
0.645005512743094
Epoch 101/151, Loss: 0.11118725431151688
0.6469433407793068
Epoch 111/151, Loss: 0.10727174254134297
0.6458298618493333
Epoch 121/151, Loss: 0.1026967738289386
0.6481960423699471
Epoch 131/151, Loss: 0.09870016318745911
0.645817646735932
Epoch 141/151, Loss: 0.09623931674286723
0.6477858413090303


[I 2024-01-22 23:18:25,654] Trial 23 finished with value: 0.6480908961933204 and parameters: {'hidden_dim_h': 10, 'dropout': 0.12495170160706078, 'batch_size': 765, 'n_epochs': 151}. Best is trial 22 with value: 0.6538841229133152.


Epoch 151/151, Loss: 0.09421584312804043
0.6480908961933204
Build model with 1 layers of attention
Epoch 1/197, Loss: 0.5636782263006482
0.5396669528290166
Epoch 11/197, Loss: 0.2086665140730994
0.6312411545326341
Epoch 21/197, Loss: 0.12965868333620684
0.6387958593809309
Epoch 31/197, Loss: 0.11763442148055349
0.6418183503104794
Epoch 41/197, Loss: 0.11354350458298411
0.6461138719211015
Epoch 51/197, Loss: 0.10971946721630436
0.6482329529657344
Epoch 61/197, Loss: 0.10609362088143826
0.6497455628483381
Epoch 71/197, Loss: 0.10296292070831571
0.6506557202694727
Epoch 81/197, Loss: 0.10001988283225469
0.6514049899996049
Epoch 91/197, Loss: 0.09746718007539
0.6520354867926892
Epoch 101/197, Loss: 0.09514695326132434
0.6501127526532576
Epoch 111/197, Loss: 0.09281844166772706
0.6501504911460273
Epoch 121/197, Loss: 0.09152837496783052
0.6522331139547676
Epoch 131/197, Loss: 0.09075325754071985
0.6530498097352694
Epoch 141/197, Loss: 0.08963860145636968
0.6526829707128212
Epoch 151/197, Lo

[I 2024-01-22 23:19:12,255] Trial 24 finished with value: 0.6525150873378543 and parameters: {'hidden_dim_h': 13, 'dropout': 0.07471326643415789, 'batch_size': 871, 'n_epochs': 197}. Best is trial 22 with value: 0.6538841229133152.


Build model with 1 layers of attention
Epoch 1/74, Loss: 0.36282940871185726
0.5685600493060511
Epoch 11/74, Loss: 0.1236227879093753
0.6327261018284289
Epoch 21/74, Loss: 0.1097207638538546
0.6464026291014429
Epoch 31/74, Loss: 0.10524197088347541
0.6494790255895843
Epoch 41/74, Loss: 0.10092116913033856
0.64764274150805
Epoch 51/74, Loss: 0.09772674449616009
0.6483086331333349
Epoch 61/74, Loss: 0.09452706492609447
0.6489965622653926
Epoch 71/74, Loss: 0.0932981562283304
0.6494703063785116


[I 2024-01-22 23:19:31,202] Trial 25 finished with value: 0.6494703063785116 and parameters: {'hidden_dim_h': 23, 'dropout': 0.05204078012700568, 'batch_size': 679, 'n_epochs': 74}. Best is trial 22 with value: 0.6538841229133152.


Build model with 1 layers of attention
Epoch 1/158, Loss: 0.23171899361269815
0.567955418170625
Epoch 11/158, Loss: 0.15402830262032766
0.6369000464003008
Epoch 21/158, Loss: 0.11567691725397868
0.6457062846101173
Epoch 31/158, Loss: 0.09959922387959465
0.6477266308341777
Epoch 41/158, Loss: 0.09311751548259978
0.6475038934237868
Epoch 51/158, Loss: 0.09177881431958032
0.6478161214199514
Epoch 61/158, Loss: 0.09140142230760484
0.6500657557528569
Epoch 71/158, Loss: 0.09080204651469276
0.651737934776544
Epoch 81/158, Loss: 0.09063309797691921
0.6512079622637816
Epoch 91/158, Loss: 0.08986974365654446
0.6513793160561573
Epoch 101/158, Loss: 0.09025007320774926
0.6485158886841669
Epoch 111/158, Loss: 0.09054432407258049
0.6515386443548338
Epoch 121/158, Loss: 0.09013955744486006
0.6534880696369249
Epoch 131/158, Loss: 0.09022474371724659
0.6493692406925291
Epoch 141/158, Loss: 0.09002521953412465
0.6504350397542707
Epoch 151/158, Loss: 0.09017612158306061
0.6532137036560054


[I 2024-01-22 23:20:18,136] Trial 26 finished with value: 0.6532137036560054 and parameters: {'hidden_dim_h': 12, 'dropout': 0.11680795749091366, 'batch_size': 381, 'n_epochs': 158}. Best is trial 22 with value: 0.6538841229133152.


Build model with 1 layers of attention
Epoch 1/227, Loss: 0.6232887768745422
0.5434123445898317
Epoch 11/227, Loss: 0.313655264377594
0.6230045859859121
Epoch 21/227, Loss: 0.23144200921058655
0.6391916686832375
Epoch 31/227, Loss: 0.20796113967895508
0.6335482660117223
Epoch 41/227, Loss: 0.193180051445961
0.6442589277513032
Epoch 51/227, Loss: 0.17804122030735015
0.6424120979133845
Epoch 61/227, Loss: 0.1633668714761734
0.6430694191752814
Epoch 71/227, Loss: 0.14991866409778595
0.6439866726186957
Epoch 81/227, Loss: 0.13724933862686156
0.642713707204795
Epoch 91/227, Loss: 0.12625576704740524
0.6467001555432358
Epoch 101/227, Loss: 0.11657844483852386
0.6505159885233944
Epoch 111/227, Loss: 0.1086903029680252
0.6467825943390764
Epoch 121/227, Loss: 0.103322451710701
0.6482861391994803
Epoch 131/227, Loss: 0.0982380598783493
0.6487215485350983
Epoch 141/227, Loss: 0.09502696573734283
0.6510595851923797
Epoch 151/227, Loss: 0.09277865409851074
0.6507677787050178
Epoch 161/227, Loss: 0.

[I 2024-01-22 23:21:14,882] Trial 27 finished with value: 0.65511507524099 and parameters: {'hidden_dim_h': 17, 'dropout': 0.07449487642576756, 'batch_size': 976, 'n_epochs': 227}. Best is trial 27 with value: 0.65511507524099.


Found better hyperparameter, update model
Build model with 1 layers of attention
Epoch 1/235, Loss: 0.271860271692276
0.544444719806922
Epoch 11/235, Loss: 0.2138107462475697
0.6323888296108386
Epoch 21/235, Loss: 0.17473067653675875
0.6395940693976063
Epoch 31/235, Loss: 0.14639213805397353
0.6438019455673025
Epoch 41/235, Loss: 0.12705967326958975
0.6415186244291435
Epoch 51/235, Loss: 0.11328040342777967
0.6484170150527375
Epoch 61/235, Loss: 0.10419889104863007
0.6482268844228727
Epoch 71/235, Loss: 0.09751867596060038
0.6511883269938226
Epoch 81/235, Loss: 0.09433320537209511
0.6480496734796266
Epoch 91/235, Loss: 0.09203826201458772
0.6493929843133496
Epoch 101/235, Loss: 0.0906005206828316
0.653812301192719
Epoch 111/235, Loss: 0.0900896539290746
0.6522450657228673
Epoch 121/235, Loss: 0.08967992632339399
0.6510386583267401
Epoch 131/235, Loss: 0.08932999956111114
0.6503542407292324
Epoch 141/235, Loss: 0.08959587911764781
0.6539963909970303
Epoch 151/235, Loss: 0.08954834130903

[I 2024-01-22 23:22:12,592] Trial 28 finished with value: 0.6535367550785561 and parameters: {'hidden_dim_h': 17, 'dropout': 0.13824161090423237, 'batch_size': 1004, 'n_epochs': 235}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/256, Loss: 1.2434289703766506
0.5432821343381649
Epoch 11/256, Loss: 0.6781026323636373
0.4005511377464663
Epoch 21/256, Loss: 0.35758475214242935
0.5825696111048336
Epoch 31/256, Loss: 0.22406329525013766
0.6120296904021428
Epoch 41/256, Loss: 0.17818304027120271
0.6190863582948702
Epoch 51/256, Loss: 0.16315043220917383
0.6334119972123546
Epoch 61/256, Loss: 0.15648255621393523
0.6318102701246329
Epoch 71/256, Loss: 0.15023962035775185
0.6387136747200239
Epoch 81/256, Loss: 0.14536787755787373
0.6400935445770872
Epoch 91/256, Loss: 0.13960668196280798
0.6390314367956793
Epoch 101/256, Loss: 0.13382633154590926
0.6442874121205036
Epoch 111/256, Loss: 0.12888124957680702
0.6416685075122277
Epoch 121/256, Loss: 0.12270995726188023
0.6441331705076364
Epoch 131/256, Loss: 0.1181618943810463
0.6480977719439701
Epoch 141/256, Loss: 0.113251693546772
0.6449145817237957
Epoch 151/256, Loss: 0.108547518029809
0.644978729830413
Epoch 161/256, Loss:

[I 2024-01-22 23:23:16,962] Trial 29 finished with value: 0.6473077408111995 and parameters: {'hidden_dim_h': 22, 'dropout': 0.16881737244640968, 'batch_size': 1026, 'n_epochs': 256}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/35, Loss: 0.2407138483090834
0.5578543409464322
Epoch 11/35, Loss: 0.20140099796381863
0.6326631753618334
Epoch 21/35, Loss: 0.17416526512666183
0.6336870898346597
Epoch 31/35, Loss: 0.15233598784966904
0.6402568515716788


[I 2024-01-22 23:23:28,091] Trial 30 finished with value: 0.6402568515716788 and parameters: {'hidden_dim_h': 49, 'dropout': 0.3424166121551969, 'batch_size': 1099, 'n_epochs': 35}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/230, Loss: 0.5140165340900421
0.5470208760281835
Epoch 11/230, Loss: 0.2427031457424164
0.6269544640669766
Epoch 21/230, Loss: 0.1783778750896454
0.639323582180305
Epoch 31/230, Loss: 0.1639760535955429
0.6432503079495382
Epoch 41/230, Loss: 0.15591907143592834
0.6481642601005436
Epoch 51/230, Loss: 0.14699321687221528
0.6432824954047838
Epoch 61/230, Loss: 0.13821904122829437
0.6486208315986591
Epoch 71/230, Loss: 0.1306866317987442
0.6465267292685555
Epoch 81/230, Loss: 0.12260084420442581
0.6486839577988186
Epoch 91/230, Loss: 0.11535710424184799
0.6466055595259089
Epoch 101/230, Loss: 0.10943627387285232
0.6456114993627489
Epoch 111/230, Loss: 0.10412565886974334
0.6502306902178379
Epoch 121/230, Loss: 0.09992638617753982
0.6479971630763969
Epoch 131/230, Loss: 0.09661449015140533
0.6513624150370038
Epoch 141/230, Loss: 0.09392145663499832
0.651080329416365
Epoch 151/230, Loss: 0.09258627355098724
0.6526627648704647
Epoch 161/230, Loss

[I 2024-01-22 23:24:25,619] Trial 31 finished with value: 0.652145260000897 and parameters: {'hidden_dim_h': 17, 'dropout': 0.14377845412447715, 'batch_size': 979, 'n_epochs': 230}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/281, Loss: 0.34122342864672345
0.5461022017091879
Epoch 11/281, Loss: 0.16350371887286505
0.6341723608580413
Epoch 21/281, Loss: 0.14562076578537622
0.6416176789487158
Epoch 31/281, Loss: 0.13540029674768447
0.6429157250715282
Epoch 41/281, Loss: 0.1257719541589419
0.6458731805458225
Epoch 51/281, Loss: 0.11641740699609121
0.6486662312091045
Epoch 61/281, Loss: 0.10859163179993629
0.6463075535797095
Epoch 71/281, Loss: 0.1023028905193011
0.6493503459476191
Epoch 81/281, Loss: 0.0976454089085261
0.6526397301134993
Epoch 91/281, Loss: 0.09420036002993584
0.6494751376942758
Epoch 101/281, Loss: 0.09197331219911575
0.6481279807973367
Epoch 111/281, Loss: 0.09093259001771609
0.6507277462282169
Epoch 121/281, Loss: 0.08978346263368925
0.65403814787927
Epoch 131/281, Loss: 0.08952472284436226
0.6499657856658274
Epoch 141/281, Loss: 0.08952780291438103
0.6491976896832935
Epoch 151/281, Loss: 0.08916258042057355
0.6556511815767554
Epoch 161/281, Lo

[I 2024-01-22 23:25:33,263] Trial 32 finished with value: 0.6537947521400642 and parameters: {'hidden_dim_h': 14, 'dropout': 0.08403921607079479, 'batch_size': 800, 'n_epochs': 281}. Best is trial 27 with value: 0.65511507524099.


Epoch 281/281, Loss: 0.08884105185667673
0.6537947521400642
Build model with 1 layers of attention
Epoch 1/298, Loss: 2.1519402384757997
0.5351917490034174
Epoch 11/298, Loss: 1.1771193941434224
0.5905270775879631
Epoch 21/298, Loss: 0.661259514093399
0.6102821750058344
Epoch 31/298, Loss: 0.417841308315595
0.6267603542751896
Epoch 41/298, Loss: 0.3197000245253245
0.6302169595009887
Epoch 51/298, Loss: 0.2832225074370702
0.6322252273884432
Epoch 61/298, Loss: 0.26401017755270006
0.6351339263037895
Epoch 71/298, Loss: 0.2484890396396319
0.6377042683755486
Epoch 81/298, Loss: 0.23210272739330928
0.6375161922541741
Epoch 91/298, Loss: 0.2156293178598086
0.6401349704701784
Epoch 101/298, Loss: 0.19875717908143997
0.6418823620823082
Epoch 111/298, Loss: 0.18250725865364076
0.6417331714612831
Epoch 121/298, Loss: 0.16673569083213807
0.642033949370238
Epoch 131/298, Loss: 0.15185431440671285
0.6434415283591586
Epoch 141/298, Loss: 0.13819506367047626
0.6493826706213969
Epoch 151/298, Loss: 0.

[I 2024-01-22 23:26:44,801] Trial 33 finished with value: 0.6549314831854035 and parameters: {'hidden_dim_h': 14, 'dropout': 0.07757215913333983, 'batch_size': 806, 'n_epochs': 298}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/299, Loss: 0.6385570437677445
0.5485334911626276
Epoch 11/299, Loss: 0.22276476410127455
0.6288899978206087
Epoch 21/299, Loss: 0.12883858406736004
0.6399437913802688
Epoch 31/299, Loss: 0.11583889636301226
0.6473689185900717
Epoch 41/299, Loss: 0.11229015406100981
0.650262807209904
Epoch 51/299, Loss: 0.10828091684849031
0.6508827646405833
Epoch 61/299, Loss: 0.10501343949187186
0.6463974038326598
Epoch 71/299, Loss: 0.1012549078272235
0.6516007301063619
Epoch 81/299, Loss: 0.09911939453694128
0.6491585879921011
Epoch 91/299, Loss: 0.09596584520993694
0.6531546034839194
Epoch 101/299, Loss: 0.094043469957767
0.6480016670133758
Epoch 111/299, Loss: 0.09199632872496882
0.652000407250961
Epoch 121/299, Loss: 0.09105596306823915
0.6544776937394542
Epoch 131/299, Loss: 0.09052182445603033
0.651737922668617
Epoch 141/299, Loss: 0.08967389863344931
0.650945346253967
Epoch 151/299, Loss: 0.08920241027109084
0.6506445033750164
Epoch 161/299, Loss:

[I 2024-01-22 23:28:00,081] Trial 34 finished with value: 0.653694173659196 and parameters: {'hidden_dim_h': 20, 'dropout': 0.08122695118250267, 'batch_size': 784, 'n_epochs': 299}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/257, Loss: 1.0166828591248085
0.5475224343253193
Epoch 11/257, Loss: 0.5044933958300228
0.6135668456625961
Epoch 21/257, Loss: 0.329267851237593
0.6320824961313305
Epoch 31/257, Loss: 0.2775396601907138
0.634873074715108
Epoch 41/257, Loss: 0.25550801127121364
0.6386191996600672
Epoch 51/257, Loss: 0.23576919015111594
0.6398296263418776
Epoch 61/257, Loss: 0.21616855315093336
0.6407673523551418
Epoch 71/257, Loss: 0.19607380700522456
0.6415482554417682
Epoch 81/257, Loss: 0.17661450546363305
0.6385998199390308
Epoch 91/257, Loss: 0.15944071627896408
0.6401268152457459
Epoch 101/257, Loss: 0.14400151320572557
0.642893200156899
Epoch 111/257, Loss: 0.1303107024266802
0.647065301156258
Epoch 121/257, Loss: 0.1187618996562629
0.6473506731559355
Epoch 131/257, Loss: 0.11000891595051207
0.6424917784430911
Epoch 141/257, Loss: 0.10310639055638478
0.6459267055282163
Epoch 151/257, Loss: 0.09849992702747214
0.6507381699539425
Epoch 161/257, Loss: 0

[I 2024-01-22 23:29:01,490] Trial 35 finished with value: 0.6483283207316096 and parameters: {'hidden_dim_h': 13, 'dropout': 0.10022452099713532, 'batch_size': 842, 'n_epochs': 257}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/280, Loss: 0.22479850113391875
0.5624738296853997
Epoch 11/280, Loss: 0.17205994248390197
0.6418795723514662
Epoch 21/280, Loss: 0.14149077594280243
0.6449436280597496
Epoch 31/280, Loss: 0.12139314204454422
0.6435453570005454
Epoch 41/280, Loss: 0.10824900388717651
0.6488633365345206
Epoch 51/280, Loss: 0.100417700111866
0.6516668267235053
Epoch 61/280, Loss: 0.09513116419315339
0.6480285102774035
Epoch 71/280, Loss: 0.0918431207537651
0.6506821538211911
Epoch 81/280, Loss: 0.09070115000009536
0.64911490656319
Epoch 91/280, Loss: 0.08975633800029754
0.6491882017418141
Epoch 101/280, Loss: 0.08923401057720184
0.6525364619172043
Epoch 111/280, Loss: 0.08902478009462357
0.6517743069826888
Epoch 121/280, Loss: 0.0890871262550354
0.6522236361441505
Epoch 131/280, Loss: 0.08886827260255814
0.654117803072856
Epoch 141/280, Loss: 0.08871569216251374
0.6533393903399851
Epoch 151/280, Loss: 0.08854530990123749
0.6518765504096676
Epoch 161/280, Loss

[I 2024-01-22 23:30:15,285] Trial 36 finished with value: 0.6502380691728561 and parameters: {'hidden_dim_h': 26, 'dropout': 0.07789668603091783, 'batch_size': 966, 'n_epochs': 280}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/265, Loss: 0.18621985556987616
0.5432205592691426
Epoch 11/265, Loss: 0.14556408788149172
0.6233925462216751
Epoch 21/265, Loss: 0.12579244547165358
0.6313078339395863
Epoch 31/265, Loss: 0.11286599962757184
0.6359798594070988
Epoch 41/265, Loss: 0.10399274270121868
0.6435658817821597
Epoch 51/265, Loss: 0.09951052413536952
0.6442471639062697
Epoch 61/265, Loss: 0.09653865030178657
0.6469288368769802
Epoch 71/265, Loss: 0.09426643470158944
0.6455149810155191
Epoch 81/265, Loss: 0.09402231413584489
0.6497458089407749
Epoch 91/265, Loss: 0.09290047419758943
0.6513173766164894
Epoch 101/265, Loss: 0.092196164222864
0.6493721482343422
Epoch 111/265, Loss: 0.09295563571728192
0.6490467811737336
Epoch 121/265, Loss: 0.09274936094880104
0.6469187773184085
Epoch 131/265, Loss: 0.09272362291812897
0.6470583875365514
Epoch 141/265, Loss: 0.0922799795292891
0.6503812363579757
Epoch 151/265, Loss: 0.09291110302393253
0.650308075839658
Epoch 161/265, L

[I 2024-01-22 23:31:19,584] Trial 37 finished with value: 0.6504832442499231 and parameters: {'hidden_dim_h': 15, 'dropout': 0.3136754107402634, 'batch_size': 928, 'n_epochs': 265}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/283, Loss: 1.0947142601013184
0.55533677242119
Epoch 11/283, Loss: 0.44425103267033894
0.6102638194713237
Epoch 21/283, Loss: 0.1943237413962682
0.6319264867161184
Epoch 31/283, Loss: 0.11831831733385721
0.6429625524597986
Epoch 41/283, Loss: 0.10266141469279925
0.6460469600653643
Epoch 51/283, Loss: 0.0993201362589995
0.6460857488816163
Epoch 61/283, Loss: 0.09809423113862674
0.6481495523791725
Epoch 71/283, Loss: 0.09714522113402685
0.650335350295386
Epoch 81/283, Loss: 0.09557441448171934
0.6510976757229751
Epoch 91/283, Loss: 0.09518978546063105
0.6498582747324576
Epoch 101/283, Loss: 0.09349129150311151
0.6504986320221595
Epoch 111/283, Loss: 0.09273698305090268
0.6505988598574998
Epoch 121/283, Loss: 0.09157577852408091
0.6500432245538098
Epoch 131/283, Loss: 0.09057146658500036
0.6501728329867216
Epoch 141/283, Loss: 0.0901337131857872
0.6507697199433694
Epoch 151/283, Loss: 0.09016956066091855
0.6527760687256986
Epoch 161/283, Loss

[I 2024-01-22 23:32:29,918] Trial 38 finished with value: 0.6527709238852017 and parameters: {'hidden_dim_h': 19, 'dropout': 0.09183598434787485, 'batch_size': 812, 'n_epochs': 283}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/282, Loss: 0.8624950226615457
0.5497717845925097
Epoch 11/282, Loss: 0.3983113090781605
0.617228363826449
Epoch 21/282, Loss: 0.2895776042166878
0.6245227104831024
Epoch 31/282, Loss: 0.2592403034076971
0.633040202025866
Epoch 41/282, Loss: 0.23378604738151326
0.6361842045691278
Epoch 51/282, Loss: 0.20803426556727467
0.6395705636757116
Epoch 61/282, Loss: 0.18384901565663955
0.6405769740778535
Epoch 71/282, Loss: 0.16134146776269465
0.6455089022396197
Epoch 81/282, Loss: 0.1419602856040001
0.6434298949441175
Epoch 91/282, Loss: 0.12649883439435677
0.6429103339300369
Epoch 101/282, Loss: 0.11436421029707965
0.6461162888011834
Epoch 111/282, Loss: 0.10501464543973699
0.6506304259375901
Epoch 121/282, Loss: 0.09868838199797798
0.6473272436087304
Epoch 131/282, Loss: 0.09468979629523613
0.6476489906906748
Epoch 141/282, Loss: 0.09206217395908692
0.6497106083950163
Epoch 151/282, Loss: 0.09091865862993632
0.6517905415037899
Epoch 161/282, Loss

[I 2024-01-22 23:33:40,572] Trial 39 finished with value: 0.6531162801658293 and parameters: {'hidden_dim_h': 13, 'dropout': 0.12970027087359529, 'batch_size': 709, 'n_epochs': 282}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/247, Loss: 0.3138738815431242
0.5631022261882278
Epoch 11/247, Loss: 0.16383269374017362
0.6377917911222117
Epoch 21/247, Loss: 0.14704258960706215
0.6403666983010978
Epoch 31/247, Loss: 0.134722708551972
0.6432646464460269
Epoch 41/247, Loss: 0.12361002713441849
0.6428895744780457
Epoch 51/247, Loss: 0.11402609833964596
0.6455719046854915
Epoch 61/247, Loss: 0.10679435150490867
0.6459347894858661
Epoch 71/247, Loss: 0.10084168033467399
0.644700862102561
Epoch 81/247, Loss: 0.09664509610997306
0.646970603456384
Epoch 91/247, Loss: 0.0942846961595394
0.6495135826355978
Epoch 101/247, Loss: 0.09246455353719217
0.6475591746314431
Epoch 111/247, Loss: 0.09158061996654228
0.6498012947695211
Epoch 121/247, Loss: 0.09101196461253697
0.650768352265219
Epoch 131/247, Loss: 0.09038880384630626
0.647215675280472
Epoch 141/247, Loss: 0.09003432757324642
0.6506530324001721
Epoch 151/247, Loss: 0.09012329854347088
0.6490478212448735
Epoch 161/247, Loss:

[I 2024-01-22 23:34:49,704] Trial 40 finished with value: 0.6525913997573638 and parameters: {'hidden_dim_h': 35, 'dropout': 0.18886701034362288, 'batch_size': 904, 'n_epochs': 247}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/294, Loss: 2.8121867256779827
0.5220630362580783
Epoch 11/294, Loss: 1.6273011007616598
0.5852715878814478
Epoch 21/294, Loss: 0.937127128724129
0.6084593833410298
Epoch 31/294, Loss: 0.5731045257660651
0.0027907626891410823
Epoch 41/294, Loss: 0.31530728263239705
0.592950196834011
Epoch 51/294, Loss: 0.21755112659546635
0.6201470918765059
Epoch 61/294, Loss: 0.18500754910130654
0.6355109831235928
Epoch 71/294, Loss: 0.17304938791259641
0.6387868940493486
Epoch 81/294, Loss: 0.16508734947250736
0.6450636012320339
Epoch 91/294, Loss: 0.15809604910112196
0.6464598740090393
Epoch 101/294, Loss: 0.150605863621158
0.643498326932211
Epoch 111/294, Loss: 0.14280382183290297
0.6413761267659471
Epoch 121/294, Loss: 0.13555897868448688
0.645716185788654
Epoch 131/294, Loss: 0.127614269333501
0.6457149132302311
Epoch 141/294, Loss: 0.12033502132661882
0.6451697135007682
Epoch 151/294, Loss: 0.11372895322499736
0.6511733179406394
Epoch 161/294, Loss: 

[I 2024-01-22 23:36:03,556] Trial 41 finished with value: 0.6532426292698836 and parameters: {'hidden_dim_h': 20, 'dropout': 0.0713151406122681, 'batch_size': 783, 'n_epochs': 294}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/299, Loss: 0.2795826676301658
0.5513362447463906
Epoch 11/299, Loss: 0.16743861977010965
0.6292563754478342
Epoch 21/299, Loss: 0.14581379340961576
0.640570045675039
Epoch 31/299, Loss: 0.12757011968642473
0.6458913516462249
Epoch 41/299, Loss: 0.11305062705650926
0.6415900744154873
Epoch 51/299, Loss: 0.10319742211140692
0.6493674868336532
Epoch 61/299, Loss: 0.09667777945287526
0.6467322493637265
Epoch 71/299, Loss: 0.09341479488648474
0.6523291591353936
Epoch 81/299, Loss: 0.09127277019433677
0.6513981892508699
Epoch 91/299, Loss: 0.09049651841633022
0.6495308669150757
Epoch 101/299, Loss: 0.08953034551814198
0.6500476948616212
Epoch 111/299, Loss: 0.08954029669985175
0.6519208301574031
Epoch 121/299, Loss: 0.08975772373378277
0.6513756947366253
Epoch 131/299, Loss: 0.08914663922041655
0.6537826429031613
Epoch 141/299, Loss: 0.08952018339186907
0.6506241206396064
Epoch 151/299, Loss: 0.08899892750196159
0.6505574360554189
Epoch 161/299,

[I 2024-01-22 23:37:18,751] Trial 42 finished with value: 0.6488452868959583 and parameters: {'hidden_dim_h': 19, 'dropout': 0.10769027535099668, 'batch_size': 751, 'n_epochs': 299}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/270, Loss: 0.7719238698482513
0.5398445204044235
Epoch 11/270, Loss: 0.37436106530102814
0.6241132742171048
Epoch 21/270, Loss: 0.20618834685195575
0.6373199093139912
Epoch 31/270, Loss: 0.14807871864600616
0.6372325301544856
Epoch 41/270, Loss: 0.13165920160033487
0.6483692255293955
Epoch 51/270, Loss: 0.1263182976029136
0.6468964546716881
Epoch 61/270, Loss: 0.12305967543612827
0.6499081336215001
Epoch 71/270, Loss: 0.11954645778645169
0.6451967504978016
Epoch 81/270, Loss: 0.11614118550311435
0.6466573075317903
Epoch 91/270, Loss: 0.1129960675131191
0.651449356122243
Epoch 101/270, Loss: 0.10965833846818317
0.6484439255107193
Epoch 111/270, Loss: 0.1060832870954817
0.6518709904956985
Epoch 121/270, Loss: 0.1035002324391495
0.652330121811348
Epoch 131/270, Loss: 0.1006341627375646
0.6521255596315423
Epoch 141/270, Loss: 0.0987057892436331
0.6506785114907282
Epoch 151/270, Loss: 0.095904563638297
0.6525021689616606
Epoch 161/270, Loss: 0.

[I 2024-01-22 23:38:26,376] Trial 43 finished with value: 0.6498377330746802 and parameters: {'hidden_dim_h': 24, 'dropout': 0.07979191243583017, 'batch_size': 1103, 'n_epochs': 270}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/212, Loss: 0.8375724494457245
0.5436319117271179
Epoch 11/212, Loss: 0.3246507008870443
0.6118817200556164
Epoch 21/212, Loss: 0.1646169349551201
0.6315782029356801
Epoch 31/212, Loss: 0.1284508836766084
0.6380218081752752
Epoch 41/212, Loss: 0.12192025159796079
0.6455977615516073
Epoch 51/212, Loss: 0.11802295769254366
0.6414735552232054
Epoch 61/212, Loss: 0.11362071533997854
0.6437039029796381
Epoch 71/212, Loss: 0.11086528822779655
0.6461560589938696
Epoch 81/212, Loss: 0.10710538576046626
0.6476250310890098
Epoch 91/212, Loss: 0.10449249818921089
0.646271976878743
Epoch 101/212, Loss: 0.10121109162767729
0.6485091534119924
Epoch 111/212, Loss: 0.0983376217385133
0.6501563666765747
Epoch 121/212, Loss: 0.09540621240933736
0.6485918899201412
Epoch 131/212, Loss: 0.09459260875980059
0.6471546148509982
Epoch 141/212, Loss: 0.09430390844742458
0.648868439965063
Epoch 151/212, Loss: 0.09284234295288722
0.6512507916189588
Epoch 161/212, Loss

[I 2024-01-22 23:39:19,312] Trial 44 finished with value: 0.6503091302901768 and parameters: {'hidden_dim_h': 16, 'dropout': 0.22112095108288898, 'batch_size': 821, 'n_epochs': 212}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/277, Loss: 1.62354177236557
0.556542909849055
Epoch 11/277, Loss: 0.7696011281675763
0.6094690686123776
Epoch 21/277, Loss: 0.34888335565725964
0.5922690542131039
Epoch 31/277, Loss: 0.2083566938009527
0.6218917485230262
Epoch 41/277, Loss: 0.17750759878092343
0.6347118561320657
Epoch 51/277, Loss: 0.1657638479438093
0.6407902820821533
Epoch 61/277, Loss: 0.15683621706234085
0.6432543751400034
Epoch 71/277, Loss: 0.14747214855419266
0.641853156648454
Epoch 81/277, Loss: 0.1380800637933943
0.6408447194601774
Epoch 91/277, Loss: 0.128660769512256
0.6481582713495522
Epoch 101/277, Loss: 0.11921887161831062
0.6445907297185299
Epoch 111/277, Loss: 0.11151838737229507
0.6430523578434951
Epoch 121/277, Loss: 0.10506991669535637
0.6421037062477336
Epoch 131/277, Loss: 0.1004481195575661
0.6417194637453274
Epoch 141/277, Loss: 0.09652566537261009
0.6456218084414976
Epoch 151/277, Loss: 0.09374080701834625
0.6490143852330892
Epoch 161/277, Loss: 0.0

[I 2024-01-22 23:40:33,590] Trial 45 finished with value: 0.652685690171682 and parameters: {'hidden_dim_h': 30, 'dropout': 0.15831041695962256, 'batch_size': 676, 'n_epochs': 277}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/194, Loss: 2.172598517858065
0.5446242247674719
Epoch 11/194, Loss: 1.2585025429725647
0.5449515844875357
Epoch 21/194, Loss: 0.6864516528753134
0.5873163351826706
Epoch 31/194, Loss: 0.3781411831195538
0.6064286354255831
Epoch 41/194, Loss: 0.22797340040023512
0.6228585085799879
Epoch 51/194, Loss: 0.16717040137602732
0.6330268007398315
Epoch 61/194, Loss: 0.14286867815714616
0.6408271170399025
Epoch 71/194, Loss: 0.1344006213431175
0.6424543182367468
Epoch 81/194, Loss: 0.13096460671379015
0.6461547553593097
Epoch 91/194, Loss: 0.12731623620941088
0.6459839032726558
Epoch 101/194, Loss: 0.12459916277573659
0.6448752536716149
Epoch 111/194, Loss: 0.12058906916242379
0.6497689188935111
Epoch 121/194, Loss: 0.1177250688465742
0.6483043497690468
Epoch 131/194, Loss: 0.11352706700563431
0.6479931644208855
Epoch 141/194, Loss: 0.1103389526789005
0.6512039459952271
Epoch 151/194, Loss: 0.1070808619260788
0.651570247540378
Epoch 161/194, Loss: 0

[I 2024-01-22 23:41:21,666] Trial 46 finished with value: 0.6530550528643205 and parameters: {'hidden_dim_h': 20, 'dropout': 0.06854041968547595, 'batch_size': 947, 'n_epochs': 194}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/225, Loss: 0.38526670954057146
0.5476549129483643
Epoch 11/225, Loss: 0.24941548971193178
0.6169369146908794
Epoch 21/225, Loss: 0.2172151937016419
0.6309965970735734
Epoch 31/225, Loss: 0.18756907965455735
0.634978441572988
Epoch 41/225, Loss: 0.16226390270250185
0.6430733467487442
Epoch 51/225, Loss: 0.14115673676133156
0.6454094800615834
Epoch 61/225, Loss: 0.12512884422072343
0.645975369671529
Epoch 71/225, Loss: 0.11232627928256989
0.6428727708093674
Epoch 81/225, Loss: 0.10302557556756906
0.6468765388377519
Epoch 91/225, Loss: 0.09865381754934788
0.6487277797938895
Epoch 101/225, Loss: 0.09447956697217055
0.6526798197893843
Epoch 111/225, Loss: 0.09255796218557018
0.6505868037815012
Epoch 121/225, Loss: 0.09053903046463217
0.6499928108737931
Epoch 131/225, Loss: 0.08940452177609716
0.6516422406563037
Epoch 141/225, Loss: 0.09023683651217393
0.6498559980874778
Epoch 151/225, Loss: 0.08987771267337459
0.6527156231065359
Epoch 161/225, 

[I 2024-01-22 23:42:16,613] Trial 47 finished with value: 0.6541477526828792 and parameters: {'hidden_dim_h': 12, 'dropout': 0.08603843833312479, 'batch_size': 878, 'n_epochs': 225}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/218, Loss: 0.9165638199558964
0.5337180222246302
Epoch 11/218, Loss: 0.39144358811555086
0.6182653603379135
Epoch 21/218, Loss: 0.19159793191485935
0.630694638749091
Epoch 31/218, Loss: 0.1327233482842092
0.6305202183479502
Epoch 41/218, Loss: 0.11954911632670297
0.6436172662464915
Epoch 51/218, Loss: 0.1154798945343053
0.6448182709960921
Epoch 61/218, Loss: 0.11283739076720344
0.6465015493774654
Epoch 71/218, Loss: 0.10970495282499879
0.6457894184933713
Epoch 81/218, Loss: 0.10724692046642303
0.6464595014180679
Epoch 91/218, Loss: 0.10397159528953058
0.6475408237291016
Epoch 101/218, Loss: 0.10136525608875134
0.6474495817083185
Epoch 111/218, Loss: 0.09936881092963396
0.652553653611646
Epoch 121/218, Loss: 0.09682009120782216
0.6471177636978515
Epoch 131/218, Loss: 0.0948648425164046
0.6492973544081099
Epoch 141/218, Loss: 0.0934994822299039
0.6483661440868301
Epoch 151/218, Loss: 0.09196528461244372
0.6487120775399929
Epoch 161/218, Loss

[I 2024-01-22 23:43:07,838] Trial 48 finished with value: 0.650950507722739 and parameters: {'hidden_dim_h': 11, 'dropout': 0.08855573373086764, 'batch_size': 901, 'n_epochs': 218}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/247, Loss: 1.380691694361823
0.530951379833746
Epoch 11/247, Loss: 0.6586033531597683
0.6056172976493231
Epoch 21/247, Loss: 0.31412734410592486
0.6260696958325991
Epoch 31/247, Loss: 0.17664067606840814
0.6305608916028999
Epoch 41/247, Loss: 0.13314010176275456
0.6409669742995341
Epoch 51/247, Loss: 0.12133532523044518
0.6476624001756656
Epoch 61/247, Loss: 0.11757930421403476
0.6480213277989558
Epoch 71/247, Loss: 0.11515030930084842
0.6441054170903041
Epoch 81/247, Loss: 0.11227947579962867
0.6489651141570076
Epoch 91/247, Loss: 0.10943576001695224
0.651719817549855
Epoch 101/247, Loss: 0.1067125608346292
0.651466761099085
Epoch 111/247, Loss: 0.1037217660674027
0.6503207982976966
Epoch 121/247, Loss: 0.10133798500256878
0.6461340513555166
Epoch 131/247, Loss: 0.09877587296068668
0.6493959038709315
Epoch 141/247, Loss: 0.09670509264937469
0.6492440856205984
Epoch 151/247, Loss: 0.09461412552211966
0.6505401027956532
Epoch 161/247, Loss:

[I 2024-01-22 23:44:08,266] Trial 49 finished with value: 0.6535003557642449 and parameters: {'hidden_dim_h': 12, 'dropout': 0.10558742269708876, 'batch_size': 858, 'n_epochs': 247}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/225, Loss: 0.8160185200326583
0.5503025027297908
Epoch 11/225, Loss: 0.34197438552099113
0.6344325480151812
Epoch 21/225, Loss: 0.23245367013356266
0.6438467468647112
Epoch 31/225, Loss: 0.207995845552753
0.6452506798868738
Epoch 41/225, Loss: 0.1899157267282991
0.6452761465664408
Epoch 51/225, Loss: 0.17178678381092408
0.6489773541594542
Epoch 61/225, Loss: 0.15451954392825856
0.6465086621084886
Epoch 71/225, Loss: 0.13809916157932842
0.6478991502271811
Epoch 81/225, Loss: 0.12488611743730657
0.6500124800629271
Epoch 91/225, Loss: 0.11379632726311684
0.6484081635204285
Epoch 101/225, Loss: 0.10501726388054736
0.651075157132863
Epoch 111/225, Loss: 0.0993351916618207
0.6480017775814746
Epoch 121/225, Loss: 0.09452484679572723
0.6498863987751133
Epoch 131/225, Loss: 0.09132552738575374
0.6514435616350517
Epoch 141/225, Loss: 0.09030700518804438
0.6539042314946973
Epoch 151/225, Loss: 0.08943938442012843
0.6525067593857475
Epoch 161/225, Los

[I 2024-01-22 23:45:03,821] Trial 50 finished with value: 0.6533573278241505 and parameters: {'hidden_dim_h': 14, 'dropout': 0.06312737454113802, 'batch_size': 718, 'n_epochs': 225}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/293, Loss: 2.1588088194529216
0.5339203072447292
Epoch 11/293, Loss: 1.2219135642051697
0.5917733784134165
Epoch 21/293, Loss: 0.606382938226064
0.5732086061762208
Epoch 31/293, Loss: 0.314397602279981
0.612910503875561
Epoch 41/293, Loss: 0.1999445175131162
0.6312769270913039
Epoch 51/293, Loss: 0.1619737058877945
0.642640258755848
Epoch 61/293, Loss: 0.1496094192067782
0.6391604529448823
Epoch 71/293, Loss: 0.1436887895067533
0.640354993906197
Epoch 81/293, Loss: 0.1387039472659429
0.6458322671750936
Epoch 91/293, Loss: 0.13331952020525933
0.6448932573663039
Epoch 101/293, Loss: 0.1277848186592261
0.640110637884009
Epoch 111/293, Loss: 0.12261979778607686
0.6494009765084942
Epoch 121/293, Loss: 0.11724962294101715
0.6472539165357432
Epoch 131/293, Loss: 0.11197245195508003
0.6488059279908637
Epoch 141/293, Loss: 0.10731787259380023
0.6485842312688651
Epoch 151/293, Loss: 0.10339235489567121
0.6480578545952659
Epoch 161/293, Loss: 0.09942

[I 2024-01-22 23:46:16,313] Trial 51 finished with value: 0.6526108874061888 and parameters: {'hidden_dim_h': 18, 'dropout': 0.08280927042727759, 'batch_size': 804, 'n_epochs': 293}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/261, Loss: 0.4162944086960384
0.5806589603468227
Epoch 11/261, Loss: 0.13723635339531406
0.633021881501762
Epoch 21/261, Loss: 0.09700873802448141
0.6410072748244344
Epoch 31/261, Loss: 0.09317351050050975
0.6481025233439974
Epoch 41/261, Loss: 0.09301966590247131
0.6485015339932229
Epoch 51/261, Loss: 0.09246967923758652
0.6483545248914382
Epoch 61/261, Loss: 0.09249574263474625
0.6461714269628412
Epoch 71/261, Loss: 0.09223632545673789
0.64895517621824
Epoch 81/261, Loss: 0.09231250599173489
0.6479865486200476
Epoch 91/261, Loss: 0.0919613376480018
0.6492321950522922
Epoch 101/261, Loss: 0.09171469450877805
0.6495127427260582
Epoch 111/261, Loss: 0.0917186379212464
0.6511720786641951
Epoch 121/261, Loss: 0.09109004996166441
0.6537292901989389
Epoch 131/261, Loss: 0.09189178236746436
0.6521282145425806
Epoch 141/261, Loss: 0.09169581657266382
0.6486149209272462
Epoch 151/261, Loss: 0.09158269304947313
0.6512334737091733
Epoch 161/261, Los

[I 2024-01-22 23:48:49,503] Trial 52 finished with value: 0.652012606642278 and parameters: {'hidden_dim_h': 15, 'dropout': 0.1167588491011672, 'batch_size': 118, 'n_epochs': 261}. Best is trial 27 with value: 0.65511507524099.


Epoch 261/261, Loss: 0.09100169086544385
0.652012606642278
Build model with 1 layers of attention
Epoch 1/167, Loss: 0.3606979250907898
0.5436524320029823
Epoch 11/167, Loss: 0.2224194938555742
0.6226482311026699
Epoch 21/167, Loss: 0.18291823336711296
0.6424818406094632
Epoch 31/167, Loss: 0.14981691118998405
0.6336842710011347
Epoch 41/167, Loss: 0.12527821098382658
0.642694396245681
Epoch 51/167, Loss: 0.10929284359400089
0.6480131163628365
Epoch 61/167, Loss: 0.09913820429490162
0.6484208847859494
Epoch 71/167, Loss: 0.09368048589199017
0.6443203686847645
Epoch 81/167, Loss: 0.09150475836717166
0.6489930103896508
Epoch 91/167, Loss: 0.09015834388824609
0.6499533017610339
Epoch 101/167, Loss: 0.08949028662382028
0.651584082515709
Epoch 111/167, Loss: 0.08930723197185077
0.6537741094407775
Epoch 121/167, Loss: 0.08932768152310298
0.6533460275511606
Epoch 131/167, Loss: 0.08911403058431087
0.6525819595076012
Epoch 141/167, Loss: 0.08923638936800835
0.6511168920420866
Epoch 151/167, Lo

[I 2024-01-22 23:49:32,453] Trial 53 finished with value: 0.6498342022917959 and parameters: {'hidden_dim_h': 10, 'dropout': 0.06635298475551352, 'batch_size': 623, 'n_epochs': 167}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/201, Loss: 1.8480998426675797
0.5424827685132494
Epoch 11/201, Loss: 0.8726683985441923
0.6012778408616707
Epoch 21/201, Loss: 0.39350805152207613
0.6198514599391176
Epoch 31/201, Loss: 0.1954351393505931
0.6347973680396543
Epoch 41/201, Loss: 0.13088185735978186
0.6404141041364705
Epoch 51/201, Loss: 0.1137498973403126
0.6428614795887538
Epoch 61/201, Loss: 0.10964473546482623
0.6453284575160304
Epoch 71/201, Loss: 0.10728807607665658
0.6499193786661788
Epoch 81/201, Loss: 0.10525779891759157
0.648890040982397
Epoch 91/201, Loss: 0.1032806618604809
0.6508228908615523
Epoch 101/201, Loss: 0.1013639725279063
0.6483194580949434
Epoch 111/201, Loss: 0.09923891606740654
0.6504441587810834
Epoch 121/201, Loss: 0.09722377499565482
0.6480372021155467
Epoch 131/201, Loss: 0.0956833609379828
0.6493202101677826
Epoch 141/201, Loss: 0.09375671739690006
0.6511203050605231
Epoch 151/201, Loss: 0.0926756919361651
0.6502856766093611
Epoch 161/201, Loss: 

[I 2024-01-22 23:50:21,272] Trial 54 finished with value: 0.6500160554954296 and parameters: {'hidden_dim_h': 13, 'dropout': 0.10421672216614238, 'batch_size': 746, 'n_epochs': 201}. Best is trial 27 with value: 0.65511507524099.


Epoch 201/201, Loss: 0.08957972261123359
0.6500160554954296
Build model with 1 layers of attention
Epoch 1/271, Loss: 1.1644228904143623
0.5384268220591719
Epoch 11/271, Loss: 0.6265753766764766
0.6109787455323598
Epoch 21/271, Loss: 0.3468096269213635
0.6254148198634751
Epoch 31/271, Loss: 0.2225031949903654
0.6396550157035616
Epoch 41/271, Loss: 0.1753543751395267
0.6419180318416822
Epoch 51/271, Loss: 0.15831617881422458
0.6439123096145286
Epoch 61/271, Loss: 0.152258249728576
0.6460744402019092
Epoch 71/271, Loss: 0.14725852595723193
0.6512419349436436
Epoch 81/271, Loss: 0.14182186321071957
0.6478137407535082
Epoch 91/271, Loss: 0.1372390434793804
0.6488575020684679
Epoch 101/271, Loss: 0.13222464076850726
0.6495887899126838
Epoch 111/271, Loss: 0.12677896476310233
0.6486020347268422
Epoch 121/271, Loss: 0.12222076369368512
0.6490380880487879
Epoch 131/271, Loss: 0.11758960848269255
0.649728003404704
Epoch 141/271, Loss: 0.11254929394825645
0.6515507059696528
Epoch 151/271, Loss: 

[I 2024-01-22 23:51:25,607] Trial 55 finished with value: 0.6528730300022795 and parameters: {'hidden_dim_h': 12, 'dropout': 0.09636661709995682, 'batch_size': 1070, 'n_epochs': 271}. Best is trial 27 with value: 0.65511507524099.


Epoch 271/271, Loss: 0.08876338795475337
0.6528730300022795
Build model with 1 layers of attention
Epoch 1/287, Loss: 0.5381694045560114
0.5500746118847175
Epoch 11/287, Loss: 0.271631036339135
0.613518197156843
Epoch 21/287, Loss: 0.22018332728024187
0.6284390009899014
Epoch 31/287, Loss: 0.19784999458954253
0.628907912098389
Epoch 41/287, Loss: 0.18075308820296979
0.6394430308856396
Epoch 51/287, Loss: 0.16857787761194953
0.6402471733791018
Epoch 61/287, Loss: 0.14602666476677204
0.6406283941344035
Epoch 71/287, Loss: 0.13604186552351918
0.6435927637855509
Epoch 81/287, Loss: 0.1192905502072696
0.6419212888978167
Epoch 91/287, Loss: 0.1129754591604759
0.643620695250315
Epoch 101/287, Loss: 0.10492429522604778
0.6477509081267889
Epoch 111/287, Loss: 0.09969259618685164
0.6474213765945894
Epoch 121/287, Loss: 0.09837112935452626
0.6432615687180674
Epoch 131/287, Loss: 0.09500406605416331
0.6462855709525885
Epoch 141/287, Loss: 0.09390471670134314
0.6449852530573713
Epoch 151/287, Loss:

[I 2024-01-22 23:52:36,532] Trial 56 finished with value: 0.6503784133466838 and parameters: {'hidden_dim_h': 16, 'dropout': 0.06506280104122501, 'batch_size': 852, 'n_epochs': 287}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/300, Loss: 0.8707588072176333
0.5542743912561012
Epoch 11/300, Loss: 0.40095217691527474
0.6480519876838556
Epoch 21/300, Loss: 0.23787484069665274
0.6489639966481414
Epoch 31/300, Loss: 0.1935248816454852
0.6468787061869219
Epoch 41/300, Loss: 0.17908952578350348
0.6444456306951681
Epoch 51/300, Loss: 0.16811090818157903
0.6438784927753647
Epoch 61/300, Loss: 0.1575943330923716
0.6454451094396468
Epoch 71/300, Loss: 0.14748433508254863
0.6439576977226048
Epoch 81/300, Loss: 0.13705367650146838
0.644785762293508
Epoch 91/300, Loss: 0.12862489814007724
0.6464679882491718
Epoch 101/300, Loss: 0.11979582519442947
0.6464681636931503
Epoch 111/300, Loss: 0.1123003606443052
0.6499484606659082
Epoch 121/300, Loss: 0.10625069229691117
0.6493170983328913
Epoch 131/300, Loss: 0.10102036954076202
0.6471530821636154
Epoch 141/300, Loss: 0.0976963523361418
0.6487702998622071
Epoch 151/300, Loss: 0.09386788612162625
0.6500896953146439
Epoch 161/300, Los

[I 2024-01-22 23:53:51,197] Trial 57 finished with value: 0.6546438727119476 and parameters: {'hidden_dim_h': 21, 'dropout': 0.08342575840968187, 'batch_size': 910, 'n_epochs': 300}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/245, Loss: 0.41210320821175206
0.5520744120764538
Epoch 11/245, Loss: 0.24527901525680834
0.6370585783684093
Epoch 21/245, Loss: 0.2142961323261261
0.6384083644104998
Epoch 31/245, Loss: 0.19093434111430094
0.6416848507502483
Epoch 41/245, Loss: 0.16903367237402842
0.6419331590729266
Epoch 51/245, Loss: 0.1496137804709948
0.6455914249005777
Epoch 61/245, Loss: 0.13357354929813972
0.6459968776041618
Epoch 71/245, Loss: 0.12048941277540647
0.6427664559362652
Epoch 81/245, Loss: 0.11038608161302713
0.6454983862721315
Epoch 91/245, Loss: 0.10322123765945435
0.6493115566976099
Epoch 101/245, Loss: 0.09783927723765373
0.6502141541281788
Epoch 111/245, Loss: 0.09425290444722542
0.6526016076142236
Epoch 121/245, Loss: 0.09204198162143047
0.6474063963225233
Epoch 131/245, Loss: 0.09066544272578679
0.6495492416830826
Epoch 141/245, Loss: 0.09056360532458012
0.6510621662359827
Epoch 151/245, Loss: 0.08972613828686568
0.6532709227964473
Epoch 161/245,

[I 2024-01-22 23:54:51,131] Trial 58 finished with value: 0.6537146010237124 and parameters: {'hidden_dim_h': 22, 'dropout': 0.12397784031874012, 'batch_size': 921, 'n_epochs': 245}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/128, Loss: 0.624977388381958
0.5642227136880705
Epoch 11/128, Loss: 0.2683918625116348
0.6304503472324572
Epoch 21/128, Loss: 0.1592641615867615
0.6300080745272804
Epoch 31/128, Loss: 0.13617079943418503
0.6409155031027328
Epoch 41/128, Loss: 0.12911461889743805
0.6491692309517146
Epoch 51/128, Loss: 0.12429143786430359
0.6491619041924166
Epoch 61/128, Loss: 0.12031958192586899
0.6475463399731376
Epoch 71/128, Loss: 0.11612472176551819
0.6507256651374531
Epoch 81/128, Loss: 0.11070012509822845
0.646473948655084
Epoch 91/128, Loss: 0.10714382380247116
0.6460020956408123
Epoch 101/128, Loss: 0.10354496031999588
0.6480478263132844
Epoch 111/128, Loss: 0.1000780126452446
0.6508876976683786
Epoch 121/128, Loss: 0.09646308660507202
0.6472926317029287


[I 2024-01-22 23:55:29,753] Trial 59 finished with value: 0.6472926317029287 and parameters: {'hidden_dim_h': 39, 'dropout': 0.050009247571781976, 'batch_size': 987, 'n_epochs': 128}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/183, Loss: 1.5805496999195643
0.5217486770645168
Epoch 11/183, Loss: 0.9531245231628418
0.5892366202000273
Epoch 21/183, Loss: 0.5936723408244905
0.618860966589318
Epoch 31/183, Loss: 0.39204764649981544
0.6281155386842702
Epoch 41/183, Loss: 0.2925063031060355
0.6373474310134708
Epoch 51/183, Loss: 0.24670859958444322
0.6411064357653915
Epoch 61/183, Loss: 0.2261980353366761
0.6415247220514949
Epoch 71/183, Loss: 0.21497286572342827
0.6439566880697745
Epoch 81/183, Loss: 0.2063314417997996
0.646034265919735
Epoch 91/183, Loss: 0.19758708491211846
0.6462106714640364
Epoch 101/183, Loss: 0.18907322557199568
0.6461899863921684
Epoch 111/183, Loss: 0.18029430437655675
0.6440281677794003
Epoch 121/183, Loss: 0.17103906401566096
0.6456916003175516
Epoch 131/183, Loss: 0.16204388865402766
0.6462004803185498
Epoch 141/183, Loss: 0.15350670332000368
0.6496214117361927
Epoch 151/183, Loss: 0.14476268206323897
0.6482857112046843
Epoch 161/183, Loss:

[I 2024-01-22 23:56:12,432] Trial 60 finished with value: 0.647476572681522 and parameters: {'hidden_dim_h': 14, 'dropout': 0.155479407257475, 'batch_size': 1159, 'n_epochs': 183}. Best is trial 27 with value: 0.65511507524099.


Build model with 1 layers of attention
Epoch 1/247, Loss: 0.23538414085352863
0.5608460321192829
Epoch 11/247, Loss: 0.1756246448666961
0.6362304427552117
Epoch 21/247, Loss: 0.15550897629172714
0.6437602176438133
Epoch 31/247, Loss: 0.13751266179261384
0.641323176489692
Epoch 41/247, Loss: 0.12273724680697476
0.6464445923558141
Epoch 51/247, Loss: 0.11154148976008098
0.6487879587196232
Epoch 61/247, Loss: 0.10360013748760577
0.6481721144773327
Epoch 71/247, Loss: 0.09786410629749298
0.6482169065393726
Epoch 81/247, Loss: 0.09452836601822465
0.6500058090832921
Epoch 91/247, Loss: 0.09213991766726529
0.6502657755949193
Epoch 101/247, Loss: 0.0911609889180572
0.6527830982041515
Epoch 111/247, Loss: 0.09020249214437273
0.6513659369404102
Epoch 121/247, Loss: 0.08989228611743008
0.6509665720768789
Epoch 131/247, Loss: 0.08989398291817417
0.6500950980459146
Epoch 141/247, Loss: 0.08940063113415683
0.6550080392392611
Epoch 151/247, Loss: 0.0893945128277496
0.6545555011433438
Epoch 161/247, L

[I 2024-01-22 23:57:17,811] Trial 61 finished with value: 0.6555382706706964 and parameters: {'hidden_dim_h': 28, 'dropout': 0.11557890065705236, 'batch_size': 884, 'n_epochs': 247}. Best is trial 61 with value: 0.6555382706706964.


Found better hyperparameter, update model
Build model with 1 layers of attention
Epoch 1/240, Loss: 0.5269122035415085
0.5518850812666842
Epoch 11/240, Loss: 0.27598226732677883
0.6400159143912714
Epoch 21/240, Loss: 0.2259405591973552
0.6397997706747777
Epoch 31/240, Loss: 0.20521042578750187
0.6451346113969595
Epoch 41/240, Loss: 0.18549804389476776
0.641622710846394
Epoch 51/240, Loss: 0.16669643918673197
0.6404056301232285
Epoch 61/240, Loss: 0.14953812846431025
0.6467387610849511
Epoch 71/240, Loss: 0.13475375236184509
0.6439754269513923
Epoch 81/240, Loss: 0.1222486956803887
0.641978755820011
Epoch 91/240, Loss: 0.11236508466579297
0.6496122939887516
Epoch 101/240, Loss: 0.10469479693306817
0.6447222405925473
Epoch 111/240, Loss: 0.0993023125661744
0.6541248250447065
Epoch 121/240, Loss: 0.09542030675543679
0.6460795829399978
Epoch 131/240, Loss: 0.09283228660071338
0.6437643007286942
Epoch 141/240, Loss: 0.09081179096742913
0.6526690338471175
Epoch 151/240, Loss: 0.0901175254472

[I 2024-01-22 23:58:21,283] Trial 62 finished with value: 0.6519887080839941 and parameters: {'hidden_dim_h': 28, 'dropout': 0.08632701455571407, 'batch_size': 887, 'n_epochs': 240}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/273, Loss: 0.5410758294165134
0.5542655402387946
Epoch 11/273, Loss: 0.21726059168577194
0.6274026253483524
Epoch 21/273, Loss: 0.1264379552255074
0.6380914323330374
Epoch 31/273, Loss: 0.10742357031752665
0.6418835697254494
Epoch 41/273, Loss: 0.10404435669382413
0.6439644762174391
Epoch 51/273, Loss: 0.10221025409797828
0.6425687663905695
Epoch 61/273, Loss: 0.10045831153790157
0.6471306709081182
Epoch 71/273, Loss: 0.09832169612248738
0.6470262481434281
Epoch 81/273, Loss: 0.09693131688982248
0.6505956765332646
Epoch 91/273, Loss: 0.09591118308405082
0.6476628821893484
Epoch 101/273, Loss: 0.09439189173281193
0.6498645125266705
Epoch 111/273, Loss: 0.0934754724924763
0.6450900630025439
Epoch 121/273, Loss: 0.0920339726532499
0.6498439224348821
Epoch 131/273, Loss: 0.09111969359219074
0.654671504506141
Epoch 141/273, Loss: 0.09107229455063741
0.650471453757442
Epoch 151/273, Loss: 0.09018927843620379
0.6514725612936596
Epoch 161/273, Los

[I 2024-01-22 23:59:38,176] Trial 63 finished with value: 0.6515054832430928 and parameters: {'hidden_dim_h': 32, 'dropout': 0.11180323536388649, 'batch_size': 1031, 'n_epochs': 273}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/289, Loss: 1.3294210762813174
0.5344933166887089
Epoch 11/289, Loss: 0.6689353790776483
0.6113786587108374
Epoch 21/289, Loss: 0.3907076408123148
0.6256269630843064
Epoch 31/289, Loss: 0.29377173863608264
0.6341589967702486
Epoch 41/289, Loss: 0.26134462860123864
0.6378212650409758
Epoch 51/289, Loss: 0.2435346600310556
0.6370467942611214
Epoch 61/289, Loss: 0.22702196343191738
0.6412934799333273
Epoch 71/289, Loss: 0.20990240779416314
0.6402131193135822
Epoch 81/289, Loss: 0.19288208515479646
0.6396733530982964
Epoch 91/289, Loss: 0.17643555896035557
0.6419130358077708
Epoch 101/289, Loss: 0.1603827548438105
0.6246246179339418
Epoch 111/289, Loss: 0.1462388436855941
0.646905947373669
Epoch 121/289, Loss: 0.13303458947559882
0.6451524139975485
Epoch 131/289, Loss: 0.12232836978188877
0.6448474192932148
Epoch 141/289, Loss: 0.11303902854179514
0.6382671868639104
Epoch 151/289, Loss: 0.1057235246588444
0.6338837381626977
Epoch 161/289, Loss:

[I 2024-01-23 00:00:47,204] Trial 64 finished with value: 0.6478193634977377 and parameters: {'hidden_dim_h': 11, 'dropout': 0.07499887862340757, 'batch_size': 834, 'n_epochs': 289}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/224, Loss: 1.620527487534743
0.5155058826760633
Epoch 11/224, Loss: 0.8985156187644372
0.6028322066130387
Epoch 21/224, Loss: 0.5224699022678229
0.6211235309476715
Epoch 31/224, Loss: 0.3487741442827078
0.6318710303436942
Epoch 41/224, Loss: 0.27870467419807726
0.6394500054211463
Epoch 51/224, Loss: 0.25186922630438435
0.6396673048241674
Epoch 61/224, Loss: 0.23764214320824698
0.6424213331830159
Epoch 71/224, Loss: 0.22397954418109015
0.6434045138626135
Epoch 81/224, Loss: 0.21125872318561262
0.6421455328365427
Epoch 91/224, Loss: 0.19806169202694526
0.6405589339090157
Epoch 101/224, Loss: 0.18468761959901223
0.638572354294397
Epoch 111/224, Loss: 0.17200034914108422
0.6441399659564148
Epoch 121/224, Loss: 0.1582993996831087
0.6397607193826974
Epoch 131/224, Loss: 0.14681162914404502
0.6443464503863562
Epoch 141/224, Loss: 0.13576237800029609
0.6478200733880264
Epoch 151/224, Loss: 0.12607010081410408
0.6461106539846186
Epoch 161/224, Loss

[I 2024-01-23 00:01:45,187] Trial 65 finished with value: 0.6507581655990743 and parameters: {'hidden_dim_h': 24, 'dropout': 0.0984140667824384, 'batch_size': 937, 'n_epochs': 224}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/262, Loss: 0.7454352165971484
0.5431205695284954
Epoch 11/262, Loss: 0.3034202360681125
0.6244173235472958
Epoch 21/262, Loss: 0.1677812018564769
0.63891721333191
Epoch 31/262, Loss: 0.1376285558300359
0.6482315194891282
Epoch 41/262, Loss: 0.13038754968770913
0.6458619492042142
Epoch 51/262, Loss: 0.12609551713934966
0.647365580058489
Epoch 61/262, Loss: 0.12101005842643124
0.6477898303520732
Epoch 71/262, Loss: 0.11584960376577717
0.6493229498383623
Epoch 81/262, Loss: 0.11103894854230541
0.6474882095670025
Epoch 91/262, Loss: 0.10687360273940223
0.6473964901593665
Epoch 101/262, Loss: 0.10255268907972745
0.648594049181618
Epoch 111/262, Loss: 0.09913180741880621
0.6504270511877454
Epoch 121/262, Loss: 0.09620321941162858
0.6505348437095648
Epoch 131/262, Loss: 0.09400298153715474
0.6521235376761517
Epoch 141/262, Loss: 0.09206294135323592
0.6518679611225418
Epoch 151/262, Loss: 0.0909465357129063
0.6516050459328931
Epoch 161/262, Loss: 

[I 2024-01-23 00:02:47,900] Trial 66 finished with value: 0.6504872810609965 and parameters: {'hidden_dim_h': 16, 'dropout': 0.06175413770027978, 'batch_size': 868, 'n_epochs': 262}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/254, Loss: 0.2919830921533945
0.5610692116781801
Epoch 11/254, Loss: 0.2043757797093005
0.6300052964387072
Epoch 21/254, Loss: 0.1755093094464895
0.6435842059515853
Epoch 31/254, Loss: 0.14877053692534165
0.6419218432100691
Epoch 41/254, Loss: 0.1282185604040687
0.6436194750254017
Epoch 51/254, Loss: 0.11322551844893275
0.6427558453059857
Epoch 61/254, Loss: 0.10292524200033497
0.6455826531888145
Epoch 71/254, Loss: 0.09724890561522664
0.6456355290716597
Epoch 81/254, Loss: 0.09390719174533277
0.6484323450705515
Epoch 91/254, Loss: 0.09097354838976988
0.6502506602398814
Epoch 101/254, Loss: 0.09105210268014186
0.6502673994390891
Epoch 111/254, Loss: 0.08999986161251326
0.6528844491368427
Epoch 121/254, Loss: 0.0896764598988198
0.6528792616352547
Epoch 131/254, Loss: 0.09007720246508315
0.648064159283645
Epoch 141/254, Loss: 0.08932659332011197
0.6513357480585368
Epoch 151/254, Loss: 0.09001416793546162
0.6515131637544104
Epoch 161/254, Los

[I 2024-01-23 00:03:55,889] Trial 67 finished with value: 0.6542213134242674 and parameters: {'hidden_dim_h': 28, 'dropout': 0.13156820020889726, 'batch_size': 659, 'n_epochs': 254}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/207, Loss: 0.653152888597444
0.5570637863860195
Epoch 11/207, Loss: 0.1910627983337225
0.6351971569967922
Epoch 21/207, Loss: 0.1383186991131583
0.6415150725562354
Epoch 31/207, Loss: 0.12914540116177048
0.6439754672507624
Epoch 41/207, Loss: 0.12136316576669383
0.6466373315678569
Epoch 51/207, Loss: 0.11400610485742259
0.6488528581929599
Epoch 61/207, Loss: 0.10603380601766497
0.64519716874511
Epoch 71/207, Loss: 0.10045285661553227
0.6495403097267345
Epoch 81/207, Loss: 0.09631063113378924
0.6507529345498737
Epoch 91/207, Loss: 0.09335029073232828
0.6435611802811851
Epoch 101/207, Loss: 0.0919830540931502
0.6493232533129096
Epoch 111/207, Loss: 0.09099696178075879
0.6513188672247447
Epoch 121/207, Loss: 0.09039865607439085
0.6507628912660919
Epoch 131/207, Loss: 0.08999477465485417
0.6498607961617661
Epoch 141/207, Loss: 0.08956458922042403
0.6526307060320125
Epoch 151/207, Loss: 0.08969582842532979
0.6499785780847234
Epoch 161/207, Loss

[I 2024-01-23 00:04:53,366] Trial 68 finished with value: 0.6524973143153106 and parameters: {'hidden_dim_h': 29, 'dropout': 0.13318170298464677, 'batch_size': 565, 'n_epochs': 207}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/192, Loss: 0.3539094103349222
0.5579705707469425
Epoch 11/192, Loss: 0.14185985358985695
0.6412995356038741
Epoch 21/192, Loss: 0.12749821592021632
0.6400092731756218
Epoch 31/192, Loss: 0.11924042210385606
0.6395891695620506
Epoch 41/192, Loss: 0.11153041047824395
0.6476355737109166
Epoch 51/192, Loss: 0.10441612673772348
0.6452093615012247
Epoch 61/192, Loss: 0.09896008509236413
0.6441056228257185
Epoch 71/192, Loss: 0.09512697240790806
0.648209105796969
Epoch 81/192, Loss: 0.09221206746391349
0.6477617615928312
Epoch 91/192, Loss: 0.09091262821410154
0.6511133953791701
Epoch 101/192, Loss: 0.09086679626961013
0.6507521201449618
Epoch 111/192, Loss: 0.09034323027810535
0.6513585131480336
Epoch 121/192, Loss: 0.08943616659254641
0.6509379806055104
Epoch 131/192, Loss: 0.08954121877212783
0.6485224733692482
Epoch 141/192, Loss: 0.08951775991433375
0.651881123043006
Epoch 151/192, Loss: 0.08930767307410369
0.6499874170296112
Epoch 161/192, 

[I 2024-01-23 00:05:45,557] Trial 69 finished with value: 0.6526868371053259 and parameters: {'hidden_dim_h': 32, 'dropout': 0.121961993995743, 'batch_size': 657, 'n_epochs': 192}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/236, Loss: 0.2204892219919147
0.5625554651762034
Epoch 11/236, Loss: 0.17094246049722037
0.6330152763894752
Epoch 21/236, Loss: 0.1441610220706824
0.6352786912612519
Epoch 31/236, Loss: 0.12364968702648625
0.6476536413856432
Epoch 41/236, Loss: 0.10914896970445459
0.6472758113850547
Epoch 51/236, Loss: 0.1006194402774175
0.6492451560291905
Epoch 61/236, Loss: 0.09535740067561467
0.6469913872528873
Epoch 71/236, Loss: 0.09268374583034804
0.6506059078168493
Epoch 81/236, Loss: 0.09164331221219266
0.6470914943014666
Epoch 91/236, Loss: 0.09071734683080153
0.649381758510199
Epoch 101/236, Loss: 0.090215623830304
0.6505750575397433
Epoch 111/236, Loss: 0.09018087274197376
0.6517043204111851
Epoch 121/236, Loss: 0.08997776381897205
0.6497499938161081
Epoch 131/236, Loss: 0.08926043885223793
0.6513301433217793
Epoch 141/236, Loss: 0.08988355213042462
0.6530138199656594
Epoch 151/236, Loss: 0.08904676952145317
0.6550650041695694
Epoch 161/236, Los

[I 2024-01-23 00:06:48,358] Trial 70 finished with value: 0.6501549889794397 and parameters: {'hidden_dim_h': 27, 'dropout': 0.09312198554351624, 'batch_size': 743, 'n_epochs': 236}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/253, Loss: 0.3163821235779793
0.5575247139715426
Epoch 11/253, Loss: 0.1566566171184663
0.6349698726390604
Epoch 21/253, Loss: 0.14108598136132763
0.6464584973187856
Epoch 31/253, Loss: 0.13020224436636893
0.6453096916883196
Epoch 41/253, Loss: 0.1200517435227671
0.6459020072073661
Epoch 51/253, Loss: 0.11206688203157918
0.6509130919847866
Epoch 61/253, Loss: 0.10537288169707021
0.6471560430864317
Epoch 71/253, Loss: 0.09896154821880403
0.6489873540291496
Epoch 81/253, Loss: 0.09500787815740032
0.6477134699058259
Epoch 91/253, Loss: 0.09301010663470914
0.648959542714619
Epoch 101/253, Loss: 0.09051483533074779
0.650483988487353
Epoch 111/253, Loss: 0.09033821955803902
0.6515619839137716
Epoch 121/253, Loss: 0.08931171653732177
0.6487084824541777
Epoch 131/253, Loss: 0.08952756129926251
0.6509509172083996
Epoch 141/253, Loss: 0.08938139797218385
0.6512621939131743
Epoch 151/253, Loss: 0.08971817214642802
0.6489947225366549
Epoch 161/253, Lo

[I 2024-01-23 00:07:53,132] Trial 71 finished with value: 0.6528569854222338 and parameters: {'hidden_dim_h': 18, 'dropout': 0.08500409385311174, 'batch_size': 788, 'n_epochs': 253}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/151, Loss: 1.189759513911079
0.5517905396047225
Epoch 11/151, Loss: 0.5031447068733328
0.5770124535400382
Epoch 21/151, Loss: 0.23941173755070744
0.6218067764115344
Epoch 31/151, Loss: 0.17808293156764088
0.6339309665931642
Epoch 41/151, Loss: 0.1637804438086117
0.6407150775185625
Epoch 51/151, Loss: 0.15495421036201365
0.6409865790963254
Epoch 61/151, Loss: 0.14606370119487538
0.6428423451922038
Epoch 71/151, Loss: 0.13710468328174422
0.6395797563869755
Epoch 81/151, Loss: 0.12812579992939443
0.6465267848104893
Epoch 91/151, Loss: 0.11949692535049775
0.6476172250839818
Epoch 101/151, Loss: 0.11228322960874614
0.6474789505626722
Epoch 111/151, Loss: 0.10597046639989405
0.6476549603138297
Epoch 121/151, Loss: 0.1004988546757137
0.6463307252480194
Epoch 131/151, Loss: 0.09654726329095223
0.6463561427668814
Epoch 141/151, Loss: 0.0940628682865816
0.6471803193224367


[I 2024-01-23 00:08:34,659] Trial 72 finished with value: 0.6511477121847296 and parameters: {'hidden_dim_h': 31, 'dropout': 0.07388557771217888, 'batch_size': 703, 'n_epochs': 151}. Best is trial 61 with value: 0.6555382706706964.


Epoch 151/151, Loss: 0.09170890051652403
0.6511477121847296
Build model with 1 layers of attention
Epoch 1/277, Loss: 0.40637147188186645
0.5371211561718927
Epoch 11/277, Loss: 0.251787930727005
0.6232677619203102
Epoch 21/277, Loss: 0.22113336324691774
0.6362567769565569
Epoch 31/277, Loss: 0.1965010553598404
0.6415750616496945
Epoch 41/277, Loss: 0.17360899806022645
0.641939503300396
Epoch 51/277, Loss: 0.15334488153457643
0.643889011685403
Epoch 61/277, Loss: 0.13616201221942903
0.6481962360793436
Epoch 71/277, Loss: 0.12271629214286804
0.6464524715421215
Epoch 81/277, Loss: 0.11263691216707229
0.6459765548315949
Epoch 91/277, Loss: 0.10525178045034408
0.6489705559510357
Epoch 101/277, Loss: 0.09894984632730484
0.6474045607294262
Epoch 111/277, Loss: 0.09510247886180878
0.6495050127730837
Epoch 121/277, Loss: 0.09281272917985917
0.6500641613097206
Epoch 131/277, Loss: 0.09082318902015686
0.6525224644113459
Epoch 141/277, Loss: 0.09015357255935669
0.6522533343852893
Epoch 151/277, Lo

[I 2024-01-23 00:09:41,506] Trial 73 finished with value: 0.6532049883595001 and parameters: {'hidden_dim_h': 14, 'dropout': 0.11432587429141461, 'batch_size': 962, 'n_epochs': 277}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/254, Loss: 0.17588500237023388
0.5579770044950035
Epoch 11/254, Loss: 0.12754526789541598
0.6418235615166592
Epoch 21/254, Loss: 0.10938845381692604
0.6444973614767721
Epoch 31/254, Loss: 0.09923819011008297
0.645179230490247
Epoch 41/254, Loss: 0.09378268542113127
0.6507454882454999
Epoch 51/254, Loss: 0.09145107258249212
0.6495522481433502
Epoch 61/254, Loss: 0.09001426509133091
0.6530470068836388
Epoch 71/254, Loss: 0.08962415821022457
0.6523271250794753
Epoch 81/254, Loss: 0.08875033066228584
0.6491506968680015
Epoch 91/254, Loss: 0.0891013236509429
0.6546175098278908
Epoch 101/254, Loss: 0.08887470817124402
0.6545065696462605
Epoch 111/254, Loss: 0.08877898421552446
0.64991436056074
Epoch 121/254, Loss: 0.08857604474933059
0.6500415262240272
Epoch 131/254, Loss: 0.08865339750493015
0.6543518734439
Epoch 141/254, Loss: 0.088918161061075
0.6526877140298133
Epoch 151/254, Loss: 0.08851881490813361
0.6509563242323707
Epoch 161/254, Loss: 

[I 2024-01-23 00:10:46,768] Trial 74 finished with value: 0.6519303065429913 and parameters: {'hidden_dim_h': 25, 'dropout': 0.055747200248915066, 'batch_size': 896, 'n_epochs': 254}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/290, Loss: 1.411061976014114
0.5460422537365425
Epoch 11/290, Loss: 0.5494616089797602
0.6146686956339746
Epoch 21/290, Loss: 0.2969311083235392
0.6349026560896189
Epoch 31/290, Loss: 0.24156995372074405
0.6402196539686341
Epoch 41/290, Loss: 0.2207793338996608
0.6401323996028505
Epoch 51/290, Loss: 0.1999979200886517
0.6396250906461168
Epoch 61/290, Loss: 0.17986994581978497
0.6367663107764175
Epoch 71/290, Loss: 0.15984899060028354
0.6458541625802365
Epoch 81/290, Loss: 0.14179504862645778
0.6406814530029941
Epoch 91/290, Loss: 0.12632629838658543
0.6438750452383507
Epoch 101/290, Loss: 0.11338085481306402
0.6469644788730852
Epoch 111/290, Loss: 0.10423352969128911
0.648522233560401
Epoch 121/290, Loss: 0.09749091671007436
0.6474649461203955
Epoch 131/290, Loss: 0.09357715943237631
0.6493867534012229
Epoch 141/290, Loss: 0.0909284531343274
0.6488535874714656
Epoch 151/290, Loss: 0.08965191045185415
0.6498033904574254
Epoch 161/290, Loss:

[I 2024-01-23 00:12:02,575] Trial 75 finished with value: 0.6529914995264898 and parameters: {'hidden_dim_h': 21, 'dropout': 0.10088017961998165, 'batch_size': 591, 'n_epochs': 290}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/232, Loss: 0.15827519744634627
0.564534801733315
Epoch 11/232, Loss: 0.12092397585511208
0.6314711687694331
Epoch 21/232, Loss: 0.10501781304677328
0.6369756331000439
Epoch 31/232, Loss: 0.09822790225346884
0.6420765853151624
Epoch 41/232, Loss: 0.09397449865937232
0.6496195378446984
Epoch 51/232, Loss: 0.09159743338823319
0.649420762265806
Epoch 61/232, Loss: 0.09298778598507246
0.6454966906749043
Epoch 71/232, Loss: 0.09014816756049791
0.6500453345173509
Epoch 81/232, Loss: 0.09039446264505387
0.6500345478525525
Epoch 91/232, Loss: 0.09213739261031151
0.647891915345514
Epoch 101/232, Loss: 0.0902709553639094
0.6479235556531737
Epoch 111/232, Loss: 0.09033878048261007
0.649304824089148
Epoch 121/232, Loss: 0.09051758870482444
0.6514297960497929
Epoch 131/232, Loss: 0.09005530004700026
0.6500544894033143
Epoch 141/232, Loss: 0.09030598650376002
0.6504312615811292
Epoch 151/232, Loss: 0.08925776009758314
0.6493770402058247
Epoch 161/232, Lo

[I 2024-01-23 00:13:09,947] Trial 76 finished with value: 0.6502006119740156 and parameters: {'hidden_dim_h': 34, 'dropout': 0.13737483192325575, 'batch_size': 821, 'n_epochs': 232}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/268, Loss: 0.2977341403884272
0.5477936395030713
Epoch 11/268, Loss: 0.2041921038781443
0.6263507124122678
Epoch 21/268, Loss: 0.17045520918984566
0.6322630162531858
Epoch 31/268, Loss: 0.14280411216520494
0.6467133475662371
Epoch 41/268, Loss: 0.12281507974670779
0.6469460396972027
Epoch 51/268, Loss: 0.10892568096037834
0.6445691755738605
Epoch 61/268, Loss: 0.10016862086711391
0.6501376347596859
Epoch 71/268, Loss: 0.0951617831184018
0.6502158335904191
Epoch 81/268, Loss: 0.09222041911655857
0.6507059770445647
Epoch 91/268, Loss: 0.09027635161915133
0.6519451969201493
Epoch 101/268, Loss: 0.08954368940284176
0.6510646584467042
Epoch 111/268, Loss: 0.09024998233202965
0.6527945935886419
Epoch 121/268, Loss: 0.08961670196825458
0.654118478593689
Epoch 131/268, Loss: 0.08947120366557952
0.6525166283784958
Epoch 141/268, Loss: 0.08910213338751946
0.655360671882269
Epoch 151/268, Loss: 0.08887081232763105
0.6505882133881111
Epoch 161/268, Lo

[I 2024-01-23 00:14:15,234] Trial 77 finished with value: 0.6533881042027843 and parameters: {'hidden_dim_h': 12, 'dropout': 0.08712755871526691, 'batch_size': 781, 'n_epochs': 268}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/171, Loss: 3.268611619869868
0.5193840396936595
Epoch 11/171, Loss: 2.173253079255422
0.545594279880068
Epoch 21/171, Loss: 1.4378003031015396
0.5749856615153056
Epoch 31/171, Loss: 0.9498179877797762
0.5942139391229977
Epoch 41/171, Loss: 0.644562932352225
0.6095062575260991
Epoch 51/171, Loss: 0.46680649866660434
0.6179221101302103
Epoch 61/171, Loss: 0.3621565488477548
0.09566737181066523
Epoch 71/171, Loss: 0.2662869691848755
0.5563752787466572
Epoch 81/171, Loss: 0.22421755331257978
0.6017453666296846
Epoch 91/171, Loss: 0.2058358540137609
0.6164142060429916
Epoch 101/171, Loss: 0.19579407386481762
0.6379986200355322
Epoch 111/171, Loss: 0.18795412654678026
0.6404492774332532
Epoch 121/171, Loss: 0.18108328680197397
0.6417186783426925
Epoch 131/171, Loss: 0.17353035571674505
0.6418138890960913
Epoch 141/171, Loss: 0.1664054530362288
0.642420385019615
Epoch 151/171, Loss: 0.15856537595391273
0.6487129526552039
Epoch 161/171, Loss: 0.15

[I 2024-01-23 00:14:55,948] Trial 78 finished with value: 0.6491038713935006 and parameters: {'hidden_dim_h': 10, 'dropout': 0.07173171708422416, 'batch_size': 1006, 'n_epochs': 171}. Best is trial 61 with value: 0.6555382706706964.


Epoch 171/171, Loss: 0.14351236447691917
0.6491038713935006
Build model with 1 layers of attention
Epoch 1/284, Loss: 0.335938104561397
0.5762171355384216
Epoch 11/284, Loss: 0.14934619196823665
0.6269104238256318
Epoch 21/284, Loss: 0.13522193389279502
0.6392347785740121
Epoch 31/284, Loss: 0.12581679246255328
0.6436579163340351
Epoch 41/284, Loss: 0.11662554889917373
0.6448792454147001
Epoch 51/284, Loss: 0.10931099887405123
0.6455088882363543
Epoch 61/284, Loss: 0.10282006348882403
0.642902281551951
Epoch 71/284, Loss: 0.09851670478071485
0.6413747476183499
Epoch 81/284, Loss: 0.09628146801676069
0.6480946606068061
Epoch 91/284, Loss: 0.0935075649193355
0.6500760653004853
Epoch 101/284, Loss: 0.09240712012563433
0.6469737237657633
Epoch 111/284, Loss: 0.09207718606506075
0.6489792913978699
Epoch 121/284, Loss: 0.0923439519745963
0.6476866051105552
Epoch 131/284, Loss: 0.09246846288442612
0.647644218706814
Epoch 141/284, Loss: 0.09154892542532513
0.6480720457360137
Epoch 151/284, Los

[I 2024-01-23 00:16:10,985] Trial 79 finished with value: 0.6487872983061614 and parameters: {'hidden_dim_h': 28, 'dropout': 0.2844555410408278, 'batch_size': 693, 'n_epochs': 284}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/218, Loss: 0.9342415676168774
0.5547629487417094
Epoch 11/218, Loss: 0.22852548892083374
0.6294883226390057
Epoch 21/218, Loss: 0.11441707206161125
0.6408585300354881
Epoch 31/218, Loss: 0.10049636062720547
0.6451253113603346
Epoch 41/218, Loss: 0.10007460561135541
0.6458468444960438
Epoch 51/218, Loss: 0.09765278661380643
0.6485952554094261
Epoch 61/218, Loss: 0.09695139673093091
0.6484000190082274
Epoch 71/218, Loss: 0.09535247460007668
0.6493002290607114
Epoch 81/218, Loss: 0.0920734248407509
0.651556941424246
Epoch 91/218, Loss: 0.09170457720756531
0.6505855675379351
Epoch 101/218, Loss: 0.08963643670406031
0.648620836341976
Epoch 111/218, Loss: 0.09150019109896991
0.6499195800837617
Epoch 121/218, Loss: 0.08926635024988133
0.648611810046684
Epoch 131/218, Loss: 0.08953850619170976
0.6499027790399385
Epoch 141/218, Loss: 0.08832776287327641
0.652491677506063
Epoch 151/218, Loss: 0.09038231116922005
0.649893781484637
Epoch 161/218, Loss

[I 2024-01-23 00:17:09,985] Trial 80 finished with value: 0.6515495627608994 and parameters: {'hidden_dim_h': 18, 'dropout': 0.05857923559507333, 'batch_size': 530, 'n_epochs': 218}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/224, Loss: 0.2931933171219296
0.5561396873765017
Epoch 11/224, Loss: 0.17701881792810228
0.6356179006703496
Epoch 21/224, Loss: 0.1570683949523502
0.6394315001945065
Epoch 31/224, Loss: 0.14034797306414004
0.6432253284276659
Epoch 41/224, Loss: 0.1253578911225001
0.6435707929904118
Epoch 51/224, Loss: 0.11412175165282355
0.6476096138736531
Epoch 61/224, Loss: 0.10487549503644307
0.649700312853718
Epoch 71/224, Loss: 0.09889029545916452
0.6480197690800422
Epoch 81/224, Loss: 0.09521252634348693
0.6488340105637793
Epoch 91/224, Loss: 0.0924665845102734
0.6506321179192324
Epoch 101/224, Loss: 0.09071226197260397
0.6486214742424896
Epoch 111/224, Loss: 0.09051911284526189
0.6543749106341655
Epoch 121/224, Loss: 0.09035096924614024
0.6537367385600877
Epoch 131/224, Loss: 0.08964409899932367
0.6508085219087125
Epoch 141/224, Loss: 0.08927565785469832
0.6513516298618515
Epoch 151/224, Loss: 0.08984338381776104
0.6501993954247371
Epoch 161/224, Lo

[I 2024-01-23 00:18:06,235] Trial 81 finished with value: 0.651468057185463 and parameters: {'hidden_dim_h': 24, 'dropout': 0.1251038956660741, 'batch_size': 908, 'n_epochs': 224}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/248, Loss: 1.5087886452674866
0.4404495936964073
Epoch 11/248, Loss: 0.7474752515554428
0.5800954378503843
Epoch 21/248, Loss: 0.3710892094033105
0.6169895436338301
Epoch 31/248, Loss: 0.21610862176333154
0.6342430055610294
Epoch 41/248, Loss: 0.16425992282373564
0.63744783414696
Epoch 51/248, Loss: 0.1483057721384934
0.644955243025433
Epoch 61/248, Loss: 0.1423415978040014
0.6486371296485645
Epoch 71/248, Loss: 0.13726589030453137
0.6460440220295566
Epoch 81/248, Loss: 0.13267488884074347
0.6422422101232382
Epoch 91/248, Loss: 0.12739817851356097
0.6500895605945262
Epoch 101/248, Loss: 0.12265302666596004
0.6476433323680932
Epoch 111/248, Loss: 0.1172992613698755
0.6482161142707259
Epoch 121/248, Loss: 0.11267317592033319
0.6480014081053691
Epoch 131/248, Loss: 0.10821571360741343
0.6505802049281083
Epoch 141/248, Loss: 0.10407236112015587
0.6493354645751921
Epoch 151/248, Loss: 0.10076405134584222
0.6504066110296217
Epoch 161/248, Loss: 

[I 2024-01-23 00:19:08,224] Trial 82 finished with value: 0.6520101895403505 and parameters: {'hidden_dim_h': 22, 'dropout': 0.10655501988525759, 'batch_size': 868, 'n_epochs': 248}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/242, Loss: 0.21861508202094299
0.5566807315503816
Epoch 11/242, Loss: 0.1528994349332956
0.632054508635417
Epoch 21/242, Loss: 0.13802119344472885
0.640239273260862
Epoch 31/242, Loss: 0.12505812685077006
0.640547000982136
Epoch 41/242, Loss: 0.11376017016860154
0.6447288545054299
Epoch 51/242, Loss: 0.10614293097303464
0.648785599815821
Epoch 61/242, Loss: 0.1003806613958799
0.6451283112873125
Epoch 71/242, Loss: 0.09664102357167464
0.6491606795784111
Epoch 81/242, Loss: 0.09399062824937013
0.6500195225612152
Epoch 91/242, Loss: 0.09242780105425762
0.6468894172882759
Epoch 101/242, Loss: 0.0919808172262632
0.6517531675863952
Epoch 111/242, Loss: 0.09120680277164166
0.6486867891028874
Epoch 121/242, Loss: 0.09161734065184227
0.6513083655262263
Epoch 131/242, Loss: 0.09078782682235424
0.649653526310389
Epoch 141/242, Loss: 0.09060627508621949
0.6554407540884962
Epoch 151/242, Loss: 0.09051608924682324
0.6512363390871129
Epoch 161/242, Loss:

[I 2024-01-23 00:20:09,425] Trial 83 finished with value: 0.652085706963169 and parameters: {'hidden_dim_h': 21, 'dropout': 0.22927586294947566, 'batch_size': 932, 'n_epochs': 242}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/298, Loss: 1.206824136816937
0.5461849762736656
Epoch 11/298, Loss: 0.6178074271782584
0.6018534637513848
Epoch 21/298, Loss: 0.3146688808565554
0.6252239539101007
Epoch 31/298, Loss: 0.1732491960991984
0.6339043975792553
Epoch 41/298, Loss: 0.11861510639605315
0.6406400755021847
Epoch 51/298, Loss: 0.10110330290120581
0.6455231411060107
Epoch 61/298, Loss: 0.09623008188994034
0.649179417319118
Epoch 71/298, Loss: 0.09581941722527794
0.650011486457122
Epoch 81/298, Loss: 0.09493282080992409
0.6509593792859898
Epoch 91/298, Loss: 0.09393612327783005
0.6496194002765283
Epoch 101/298, Loss: 0.09372637615255687
0.6489940454694293
Epoch 111/298, Loss: 0.09295678333095882
0.6539992705928868
Epoch 121/298, Loss: 0.09270722548598828
0.6507289965928266
Epoch 131/298, Loss: 0.09237107342999915
0.6529059256728007
Epoch 141/298, Loss: 0.09184387909329456
0.6530503955760164
Epoch 151/298, Loss: 0.09158681559821834
0.6524531683346888
Epoch 161/298, Loss

[I 2024-01-23 00:21:25,235] Trial 84 finished with value: 0.6524303362633004 and parameters: {'hidden_dim_h': 23, 'dropout': 0.1571698582721869, 'batch_size': 1060, 'n_epochs': 298}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/263, Loss: 0.17918064583230903
0.5531841099975489
Epoch 11/263, Loss: 0.14056329042823226
0.6349208257115493
Epoch 21/263, Loss: 0.12020132138773247
0.6378888014288604
Epoch 31/263, Loss: 0.1098588169724853
0.6444280695807466
Epoch 41/263, Loss: 0.09997687406010097
0.6477449665032641
Epoch 51/263, Loss: 0.0967675632348767
0.6486523513915553
Epoch 61/263, Loss: 0.09292068304838957
0.6502915167632471
Epoch 71/263, Loss: 0.09140033826783851
0.6512746310381684
Epoch 81/263, Loss: 0.09073062875756512
0.6495370239130438
Epoch 91/263, Loss: 0.0893473611385734
0.6528839892197595
Epoch 101/263, Loss: 0.09081184450123045
0.6514501670208831
Epoch 111/263, Loss: 0.08942999304444701
0.650808862661541
Epoch 121/263, Loss: 0.09046213190864634
0.6503598100687705
Epoch 131/263, Loss: 0.08869019481870863
0.6515271717560308
Epoch 141/263, Loss: 0.0903816239701377
0.650146510318278
Epoch 151/263, Loss: 0.089270136974476
0.652613648392636
Epoch 161/263, Loss: 

[I 2024-01-23 00:22:27,282] Trial 85 finished with value: 0.653742050918841 and parameters: {'hidden_dim_h': 15, 'dropout': 0.09641835108838209, 'batch_size': 916, 'n_epochs': 263}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/260, Loss: 2.8921285181334526
0.5434598931994858
Epoch 11/260, Loss: 1.6163237275499287
0.5766838871425984
Epoch 21/260, Loss: 0.9039208419395216
0.6067461203921809
Epoch 31/260, Loss: 0.5429599799893119
0.6128437307109441
Epoch 41/260, Loss: 0.31033707116589404
0.561823315075515
Epoch 51/260, Loss: 0.21852991978327432
0.6062077272069671
Epoch 61/260, Loss: 0.1895275301102436
0.6332280530994583
Epoch 71/260, Loss: 0.17824229552890314
0.638248641812583
Epoch 81/260, Loss: 0.1696554133386323
0.6376110701618162
Epoch 91/260, Loss: 0.16136747101942697
0.641383038216792
Epoch 101/260, Loss: 0.1521700376814062
0.64406041784547
Epoch 111/260, Loss: 0.14331284075072317
0.6479181981102308
Epoch 121/260, Loss: 0.134435492031502
0.6429989374363373
Epoch 131/260, Loss: 0.12576883218505167
0.64624174730393
Epoch 141/260, Loss: 0.11825564684289874
0.646767793948081
Epoch 151/260, Loss: 0.11090450101729596
0.6504401738475651
Epoch 161/260, Loss: 0.104990

[I 2024-01-23 00:23:32,010] Trial 86 finished with value: 0.6530912421712228 and parameters: {'hidden_dim_h': 15, 'dropout': 0.09355852139652826, 'batch_size': 731, 'n_epochs': 260}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/275, Loss: 0.3111043741305669
0.5477054188534776
Epoch 11/275, Loss: 0.2200698624054591
0.6353057817464187
Epoch 21/275, Loss: 0.19539019713799158
0.6399004943928328
Epoch 31/275, Loss: 0.1706194927295049
0.6399136486329232
Epoch 41/275, Loss: 0.14975136915842693
0.6443063860448641
Epoch 51/275, Loss: 0.13159170895814895
0.6442408584161254
Epoch 61/275, Loss: 0.11797801032662392
0.6513254347400591
Epoch 71/275, Loss: 0.10762963568170865
0.6493836844501562
Epoch 81/275, Loss: 0.10046438425779343
0.654812680737879
Epoch 91/275, Loss: 0.09570128048459689
0.6520735220166572
Epoch 101/275, Loss: 0.09257683803637823
0.64928569456576
Epoch 111/275, Loss: 0.09125138719876608
0.6553615006332885
Epoch 121/275, Loss: 0.08984348475933075
0.6550875478913881
Epoch 131/275, Loss: 0.08963614056507746
0.6551531418935692
Epoch 141/275, Loss: 0.0893366349240144
0.6545276271165276
Epoch 151/275, Loss: 0.08894641175866128
0.6543851861299615
Epoch 161/275, Loss

[I 2024-01-23 00:24:38,316] Trial 87 finished with value: 0.6539608284907454 and parameters: {'hidden_dim_h': 12, 'dropout': 0.07879613942333454, 'batch_size': 809, 'n_epochs': 275}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/277, Loss: 1.361258190125227
0.536161085967464
Epoch 11/277, Loss: 0.5802140384912491
0.6101742106242931
Epoch 21/277, Loss: 0.25478003174066544
0.6287208443164263
Epoch 31/277, Loss: 0.15013308450579643
0.6391385156482381
Epoch 41/277, Loss: 0.12437112419866025
0.641552715697428
Epoch 51/277, Loss: 0.11836456297896802
0.6489017491322362
Epoch 61/277, Loss: 0.11525188223458827
0.6496870491207788
Epoch 71/277, Loss: 0.1125619295053184
0.6502491855956188
Epoch 81/277, Loss: 0.10881094331853092
0.6467554965495036
Epoch 91/277, Loss: 0.1056382420938462
0.6487715036219417
Epoch 101/277, Loss: 0.10254500387236476
0.6499369637546749
Epoch 111/277, Loss: 0.09915820835158229
0.6486138779353776
Epoch 121/277, Loss: 0.0970747780520469
0.6495095727635231
Epoch 131/277, Loss: 0.09461820521391928
0.6489943706259086
Epoch 141/277, Loss: 0.09352410328574479
0.6516383770187201
Epoch 151/277, Loss: 0.0917745886836201
0.6491905852180757
Epoch 161/277, Loss: 

[I 2024-01-23 00:25:45,731] Trial 88 finished with value: 0.6512284228735465 and parameters: {'hidden_dim_h': 11, 'dropout': 0.07865143483480316, 'batch_size': 763, 'n_epochs': 277}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/285, Loss: 0.5450558503468831
0.5485229084595935
Epoch 11/285, Loss: 0.28864146371682486
0.6326186606999666
Epoch 21/285, Loss: 0.24170807351668674
0.6435173571575713
Epoch 31/285, Loss: 0.21614082604646684
0.6435706807545449
Epoch 41/285, Loss: 0.19128619680802028
0.6427039710482109
Epoch 51/285, Loss: 0.16801634728908538
0.638179060438257
Epoch 61/285, Loss: 0.1478392442067464
0.6416264779488116
Epoch 71/285, Loss: 0.1309598830838998
0.6477221356605316
Epoch 81/285, Loss: 0.11758450369040171
0.6462740012233837
Epoch 91/285, Loss: 0.1077084886531035
0.6491047963397224
Epoch 101/285, Loss: 0.10043269445498784
0.6503844816210583
Epoch 111/285, Loss: 0.09577771201729775
0.6481073807773314
Epoch 121/285, Loss: 0.09231678520639737
0.6511825905688209
Epoch 131/285, Loss: 0.09095804120103518
0.6481433367686863
Epoch 141/285, Loss: 0.08960715259114901
0.6515980274771414
Epoch 151/285, Loss: 0.08893510078390439
0.6549788415741905
Epoch 161/285, Lo

[I 2024-01-23 00:26:54,490] Trial 89 finished with value: 0.6531626282321759 and parameters: {'hidden_dim_h': 13, 'dropout': 0.0678318878743299, 'batch_size': 806, 'n_epochs': 285}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/273, Loss: 0.2996148539000544
0.544821937824595
Epoch 11/273, Loss: 0.20534160681839647
0.6297498849139888
Epoch 21/273, Loss: 0.17435514258927312
0.6391929497608476
Epoch 31/273, Loss: 0.14760906686042918
0.6388907504432378
Epoch 41/273, Loss: 0.1276397687093965
0.6413335993394651
Epoch 51/273, Loss: 0.11264275065783796
0.6456365541295702
Epoch 61/273, Loss: 0.10282062507908919
0.6449073522300564
Epoch 71/273, Loss: 0.09669953276371133
0.6481066058138765
Epoch 81/273, Loss: 0.09352671483467365
0.6488699963238429
Epoch 91/273, Loss: 0.09062381962250018
0.651372175879704
Epoch 101/273, Loss: 0.09009089346589713
0.6503968785378588
Epoch 111/273, Loss: 0.08973451446870277
0.650170618809315
Epoch 121/273, Loss: 0.0892063145493639
0.6507603495384762
Epoch 131/273, Loss: 0.08922943806853788
0.6527253490389532
Epoch 141/273, Loss: 0.0890748097464956
0.6490735539520484
Epoch 151/273, Loss: 0.08908435008649168
0.6509254494320812
Epoch 161/273, Loss

[I 2024-01-23 00:27:59,760] Trial 90 finished with value: 0.6526636575311319 and parameters: {'hidden_dim_h': 12, 'dropout': 0.0800835257834012, 'batch_size': 836, 'n_epochs': 273}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/268, Loss: 0.9450589513778687
0.5383567958344604
Epoch 11/268, Loss: 0.4463847458362579
0.6217282092945833
Epoch 21/268, Loss: 0.23299895226955414
0.6262700747897512
Epoch 31/268, Loss: 0.1594688218832016
0.6363606892124
Epoch 41/268, Loss: 0.13939284682273864
0.6424422405382008
Epoch 51/268, Loss: 0.13307799965143205
0.6444594141435568
Epoch 61/268, Loss: 0.12887516111135483
0.6487351238773428
Epoch 71/268, Loss: 0.1248856657743454
0.6445992508749095
Epoch 81/268, Loss: 0.12104882508516311
0.6466821859017832
Epoch 91/268, Loss: 0.11656169176101684
0.6471725061993953
Epoch 101/268, Loss: 0.11232290893793107
0.6507705349308004
Epoch 111/268, Loss: 0.1088172373175621
0.6524097583370917
Epoch 121/268, Loss: 0.10533274918794631
0.6475120682797956
Epoch 131/268, Loss: 0.10146105408668518
0.6512765020146797
Epoch 141/268, Loss: 0.09908061504364013
0.6500985454515877
Epoch 151/268, Loss: 0.09605309754610061
0.649725844336053
Epoch 161/268, Loss: 

[I 2024-01-23 00:29:04,434] Trial 91 finished with value: 0.6542508919663783 and parameters: {'hidden_dim_h': 14, 'dropout': 0.0992939378280852, 'batch_size': 962, 'n_epochs': 268}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/250, Loss: 0.2292873740196228
0.5412322639935764
Epoch 11/250, Loss: 0.1884344005584717
0.6289337506033565
Epoch 21/250, Loss: 0.16067663252353667
0.6423642757814556
Epoch 31/250, Loss: 0.13973255276679994
0.6459510884813442
Epoch 41/250, Loss: 0.12365410268306733
0.6412281518514688
Epoch 51/250, Loss: 0.11173800796270371
0.6463364063399661
Epoch 61/250, Loss: 0.10390306502580643
0.6427245789418179
Epoch 71/250, Loss: 0.09829673916101456
0.6493495009798966
Epoch 81/250, Loss: 0.09429325342178345
0.6499846659006724
Epoch 91/250, Loss: 0.09206095218658447
0.650876191843578
Epoch 101/250, Loss: 0.09081462025642395
0.6503564538212454
Epoch 111/250, Loss: 0.09015129923820496
0.6496403118617389
Epoch 121/250, Loss: 0.0897312206029892
0.6523971189616548
Epoch 131/250, Loss: 0.08916231662034989
0.6492943614584894
Epoch 141/250, Loss: 0.08951079130172729
0.6512651095333861
Epoch 151/250, Loss: 0.08894558966159821
0.6504420085592133
Epoch 161/250, L

[I 2024-01-23 00:30:04,694] Trial 92 finished with value: 0.6542682980785538 and parameters: {'hidden_dim_h': 13, 'dropout': 0.08868352423339793, 'batch_size': 964, 'n_epochs': 250}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/251, Loss: 0.9739951252937317
0.5337322876140556
Epoch 11/251, Loss: 0.44301112174987795
0.5948682699674622
Epoch 21/251, Loss: 0.2167542588710785
0.6175740390366151
Epoch 31/251, Loss: 0.13624873995780945
0.6321360866646869
Epoch 41/251, Loss: 0.11727776706218719
0.640233980842759
Epoch 51/251, Loss: 0.11010376065969467
0.6444737791551854
Epoch 61/251, Loss: 0.10807454437017441
0.6448281744982532
Epoch 71/251, Loss: 0.10763550907373429
0.6466501326343884
Epoch 81/251, Loss: 0.10344080060720444
0.6486929507544509
Epoch 91/251, Loss: 0.10217061966657638
0.6486925851920551
Epoch 101/251, Loss: 0.09953330606222152
0.6505464317962518
Epoch 111/251, Loss: 0.10071887850761413
0.6473837267147488
Epoch 121/251, Loss: 0.09606526523828507
0.6515658244494352
Epoch 131/251, Loss: 0.09521200120449066
0.6482032967572223
Epoch 141/251, Loss: 0.09550944715738297
0.6522581372373307
Epoch 151/251, Loss: 0.09626085072755813
0.6500088443365335
Epoch 161/251, 

[I 2024-01-23 00:31:08,298] Trial 93 finished with value: 0.6515777548709556 and parameters: {'hidden_dim_h': 10, 'dropout': 0.10954307149863214, 'batch_size': 993, 'n_epochs': 251}. Best is trial 61 with value: 0.6555382706706964.


Epoch 251/251, Loss: 0.09061837196350098
0.6515777548709556
Build model with 1 layers of attention
Epoch 1/229, Loss: 0.1749574589729309
0.5466865997750104
Epoch 11/229, Loss: 0.1264403474330902
0.6259506908591194
Epoch 21/229, Loss: 0.10969170778989792
0.638463263032679
Epoch 31/229, Loss: 0.09999296605587006
0.6440159473171193
Epoch 41/229, Loss: 0.09455140441656112
0.6500509467130505
Epoch 51/229, Loss: 0.0920261225104332
0.6478760707904001
Epoch 61/229, Loss: 0.09087418496608735
0.6459328747854414
Epoch 71/229, Loss: 0.09014416307210922
0.6500658551286448
Epoch 81/229, Loss: 0.09015815317630768
0.6512545515231147
Epoch 91/229, Loss: 0.08958060920238495
0.6522404053502765
Epoch 101/229, Loss: 0.08948983430862427
0.6514546165070754
Epoch 111/229, Loss: 0.08913519084453583
0.6514031649853789
Epoch 121/229, Loss: 0.08871140092611313
0.6505493215524553
Epoch 131/229, Loss: 0.08904615551233291
0.6514479967543498
Epoch 141/229, Loss: 0.08926126927137375
0.6520722136786663
Epoch 151/229, L

[I 2024-01-23 00:32:03,927] Trial 94 finished with value: 0.6517768658141934 and parameters: {'hidden_dim_h': 16, 'dropout': 0.10144826176547482, 'batch_size': 959, 'n_epochs': 229}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/240, Loss: 0.38854341705640155
0.5499763440840589
Epoch 11/240, Loss: 0.2083673191567262
0.6231324461909672
Epoch 21/240, Loss: 0.1775202782203754
0.6373941301629508
Epoch 31/240, Loss: 0.16604129349191984
0.643909238626545
Epoch 41/240, Loss: 0.1558223987619082
0.6431967664006232
Epoch 51/240, Loss: 0.1449937354773283
0.6448336765347165
Epoch 61/240, Loss: 0.13400795559088388
0.6489477287983256
Epoch 71/240, Loss: 0.1250214840595921
0.6434069935383852
Epoch 81/240, Loss: 0.11708453204482794
0.6483674473527447
Epoch 91/240, Loss: 0.11043368776639302
0.6488138958617514
Epoch 101/240, Loss: 0.10484648775309324
0.6459852173220438
Epoch 111/240, Loss: 0.10051350419720013
0.6537168558957488
Epoch 121/240, Loss: 0.09663254891832669
0.6515880390230637
Epoch 131/240, Loss: 0.09429872905214627
0.6529890167463209
Epoch 141/240, Loss: 0.09324503348519404
0.6508777704883606
Epoch 151/240, Loss: 0.09180960649003585
0.6491156965753239
Epoch 161/240, Los

[I 2024-01-23 00:33:01,373] Trial 95 finished with value: 0.654050515690034 and parameters: {'hidden_dim_h': 13, 'dropout': 0.14955872561064365, 'batch_size': 1024, 'n_epochs': 240}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/235, Loss: 0.19850995826224485
0.5449220365623626
Epoch 11/235, Loss: 0.1475016629944245
0.635053729520757
Epoch 21/235, Loss: 0.12521068596591553
0.6427804822023533
Epoch 31/235, Loss: 0.11153296753764153
0.6452778370020271
Epoch 41/235, Loss: 0.10209777982284625
0.6458325198283514
Epoch 51/235, Loss: 0.09687421346704166
0.6495082168406731
Epoch 61/235, Loss: 0.09388869379957517
0.6491962965129026
Epoch 71/235, Loss: 0.09199502132833004
0.6509797046135511
Epoch 81/235, Loss: 0.0914634196087718
0.649329650533537
Epoch 91/235, Loss: 0.09111785795539618
0.6491014134106942
Epoch 101/235, Loss: 0.09064876722792785
0.6514057185228687
Epoch 111/235, Loss: 0.09027465308705966
0.6502989851861203
Epoch 121/235, Loss: 0.08979710408796866
0.6506372084871285
Epoch 131/235, Loss: 0.09002064106365044
0.650938379887641
Epoch 141/235, Loss: 0.08996424544602633
0.6500024890728504
Epoch 151/235, Loss: 0.08999783359467983
0.651760578662393
Epoch 161/235, Los

[I 2024-01-23 00:33:57,666] Trial 96 finished with value: 0.6507155470089321 and parameters: {'hidden_dim_h': 13, 'dropout': 0.1518497716101696, 'batch_size': 1017, 'n_epochs': 235}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/241, Loss: 0.6023674218550973
0.5437927492544256
Epoch 11/241, Loss: 0.35832565763722296
0.6032197713142006
Epoch 21/241, Loss: 0.28855222722758417
0.0009196568381014382
Epoch 31/241, Loss: 0.22905797802883646
0.5855194118029757
Epoch 41/241, Loss: 0.2094627100488414
0.6178546077959379
Epoch 51/241, Loss: 0.1940091530913892
0.6308514343200419
Epoch 61/241, Loss: 0.17853163247523102
0.6283058899016261
Epoch 71/241, Loss: 0.16397528026414954
0.6418562189550575
Epoch 81/241, Loss: 0.1499427291362182
0.6390268360028992
Epoch 91/241, Loss: 0.13801625435766968
0.6428483431680585
Epoch 101/241, Loss: 0.12715777884358945
0.6432968539823245
Epoch 111/241, Loss: 0.11829546547454337
0.6453957504629421
Epoch 121/241, Loss: 0.11080465595359387
0.645468195438523
Epoch 131/241, Loss: 0.10508322553790134
0.6478931941365615
Epoch 141/241, Loss: 0.10018809398879176
0.6475451353285009
Epoch 151/241, Loss: 0.09661929730487906
0.6477675399372155
Epoch 161/241,

[I 2024-01-23 00:34:57,036] Trial 97 finished with value: 0.6492755639996836 and parameters: {'hidden_dim_h': 17, 'dropout': 0.14567599288078287, 'batch_size': 1054, 'n_epochs': 241}. Best is trial 61 with value: 0.6555382706706964.


Epoch 241/241, Loss: 0.09005059237065523
0.6492755639996836
Build model with 1 layers of attention
Epoch 1/265, Loss: 0.461052190173756
0.5501553733390534
Epoch 11/265, Loss: 0.19567093049938028
0.6249604967791038
Epoch 21/265, Loss: 0.12288762053305452
0.6425937744492094
Epoch 31/265, Loss: 0.10824732956561176
0.6437793787792477
Epoch 41/265, Loss: 0.10532195662910288
0.6450777401183078
Epoch 51/265, Loss: 0.10370832546190782
0.6472168255630107
Epoch 61/265, Loss: 0.10139390893957832
0.6465224828766156
Epoch 71/265, Loss: 0.0999103926799514
0.6502180986672148
Epoch 81/265, Loss: 0.09838549589568918
0.6483146899338902
Epoch 91/265, Loss: 0.09671169045296582
0.6534989135678001
Epoch 101/265, Loss: 0.09498524090105837
0.6499314322856775
Epoch 111/265, Loss: 0.09416489336978305
0.6501750011569233
Epoch 121/265, Loss: 0.09277494962919842
0.650900084248364
Epoch 131/265, Loss: 0.09185813807628372
0.6522280777086074
Epoch 141/265, Loss: 0.09122482551769777
0.6517189928694634
Epoch 151/265, L

[I 2024-01-23 00:35:58,727] Trial 98 finished with value: 0.6530765818054047 and parameters: {'hidden_dim_h': 19, 'dropout': 0.18015341321424932, 'batch_size': 1092, 'n_epochs': 265}. Best is trial 61 with value: 0.6555382706706964.


Build model with 1 layers of attention
Epoch 1/300, Loss: 1.4620852057750409
0.5371298922488973
Epoch 11/300, Loss: 0.7968951601248521
0.5988678634543991
Epoch 21/300, Loss: 0.46730778882136714
0.6194897449073978
Epoch 31/300, Loss: 0.3251579633125892
0.6313664917966048
Epoch 41/300, Loss: 0.2751364971582706
0.6387733989505743
Epoch 51/300, Loss: 0.25119618899547136
0.6387061015042053
Epoch 61/300, Loss: 0.23862613279085892
0.6413662944518752
Epoch 71/300, Loss: 0.22409971746114585
0.6374230281491446
Epoch 81/300, Loss: 0.20958854601933405
0.6379875825343709
Epoch 91/300, Loss: 0.1954636172606395
0.646328247551603
Epoch 101/300, Loss: 0.18174579166449034
0.6435389023534298
Epoch 111/300, Loss: 0.16747571470645758
0.6435238121088317
Epoch 121/300, Loss: 0.15473296493291855
0.645461580038438
Epoch 131/300, Loss: 0.14259898719879296
0.6459765793750333
Epoch 141/300, Loss: 0.13167208060622215
0.6434554592554478
Epoch 151/300, Loss: 0.12198599800467491
0.6494145296197608
Epoch 161/300, Loss

[I 2024-01-23 00:37:10,324] Trial 99 finished with value: 0.6520817561877679 and parameters: {'hidden_dim_h': 14, 'dropout': 0.11797004618845439, 'batch_size': 947, 'n_epochs': 300}. Best is trial 61 with value: 0.6555382706706964.
[I 2024-01-23 00:37:10,401] A new study created in memory with name: no-name-b89b042d-7d63-4a00-9d72-a03d7f5e05b5


Best Trial:
  Criterion: 0.6555
  Params: 
    hidden_dim_h: 28
    dropout: 0.11557890065705236
    batch_size: 884
    n_epochs: 247
TF_1 achieved R2 = 0.6527893204455587
Build model with 2 layers of attention
Epoch 1/129, Loss: 1.1284335792064666
0.548230906671903
Epoch 11/129, Loss: 0.11483230097662835
0.646965536627242
Epoch 21/129, Loss: 0.09155050281967436
0.6786836018680222
Epoch 31/129, Loss: 0.07959651162936573
0.7025166788719794
Epoch 41/129, Loss: 0.07212912782671906
0.7129555399850162
Epoch 51/129, Loss: 0.06860069440943854
0.7214293324122525
Epoch 61/129, Loss: 0.06373332875470321
0.7308502317111405
Epoch 71/129, Loss: 0.061461889726065456
0.7267854154197431
Epoch 81/129, Loss: 0.05817965872230984
0.7288741114286095
Epoch 91/129, Loss: 0.05721161179244518
0.7311848240175645
Epoch 101/129, Loss: 0.05426234196694124
0.7300526367639765
Epoch 111/129, Loss: 0.053270535383905686
0.7361230501692366
Epoch 121/129, Loss: 0.05120389543118931
0.7368660240012932


[I 2024-01-23 00:38:56,609] Trial 0 finished with value: 0.7368660240012932 and parameters: {'hidden_dim_h': 28, 'dropout': 0.20882068709656448, 'batch_size': 114, 'n_epochs': 129}. Best is trial 0 with value: 0.7368660240012932.


Found better hyperparameter, update model
Build model with 2 layers of attention
Epoch 1/30, Loss: 0.249738703481853
0.5714280326601775
Epoch 11/30, Loss: 0.14832213365783295
0.635294233217764
Epoch 21/30, Loss: 0.11922873463481665
0.6595343645468784


[I 2024-01-23 00:39:10,553] Trial 1 finished with value: 0.6595343645468784 and parameters: {'hidden_dim_h': 47, 'dropout': 0.058739074521315486, 'batch_size': 506, 'n_epochs': 30}. Best is trial 0 with value: 0.7368660240012932.


Build model with 2 layers of attention
Epoch 1/291, Loss: 1.1942608328360431
0.5091788334925842
Epoch 11/291, Loss: 0.12511501412065523
0.640594757917459
Epoch 21/291, Loss: 0.10287573069052876
0.6645363726695188
Epoch 31/291, Loss: 0.09237246046651085
0.673700077248594
Epoch 41/291, Loss: 0.08111515549837418
0.6953206686057567
Epoch 51/291, Loss: 0.07478559073412193
0.7048028771203176
Epoch 61/291, Loss: 0.06916684759253601
0.7160081309496708
Epoch 71/291, Loss: 0.06596084030450515
0.7151277622334
Epoch 81/291, Loss: 0.06487518482191383
0.7242140642653385
Epoch 91/291, Loss: 0.06163703709981352
0.7300662653097091
Epoch 101/291, Loss: 0.05842501448713384
0.7204987014068743
Epoch 111/291, Loss: 0.05691540459135793
0.7260287979548831
Epoch 121/291, Loss: 0.05499577357099866
0.7299688515812185
Epoch 131/291, Loss: 0.0536632387424415
0.7262343954300897
Epoch 141/291, Loss: 0.05199273717853258
0.7283641220938879
Epoch 151/291, Loss: 0.05123570543836872
0.7272543079244334
Epoch 161/291, Loss

[I 2024-01-23 00:41:49,246] Trial 2 finished with value: 0.7382189303326421 and parameters: {'hidden_dim_h': 36, 'dropout': 0.27956341820897956, 'batch_size': 226, 'n_epochs': 291}. Best is trial 2 with value: 0.7382189303326421.


Epoch 291/291, Loss: 0.04074150109487885
0.7382189303326421
Found better hyperparameter, update model
Build model with 2 layers of attention
Epoch 1/242, Loss: 1.0661415238129466
0.5382522098219555
Epoch 11/242, Loss: 0.35338101496821955
0.6249409727319878
Epoch 21/242, Loss: 0.1532018988540298
0.6745167004132901
Epoch 31/242, Loss: 0.11558742429080762
0.6760318006694558
Epoch 41/242, Loss: 0.10554009087775883
0.7019132941209557
Epoch 51/242, Loss: 0.09795501926227619
0.7229367587730102
Epoch 61/242, Loss: 0.08930483029076927
0.7283579167908253
Epoch 71/242, Loss: 0.08130331886442084
0.7351055459275112
Epoch 81/242, Loss: 0.07228229410554234
0.7390677892662368
Epoch 91/242, Loss: 0.06516810849701103
0.7380330852537867
Epoch 101/242, Loss: 0.05720655061304569
0.7484370990666942
Epoch 111/242, Loss: 0.0512821264564991
0.7459818076119494
Epoch 121/242, Loss: 0.04690562570957761
0.7401151398943346
Epoch 131/242, Loss: 0.04234495121789606
0.7504594358941927
Epoch 141/242, Loss: 0.0396087376

[I 2024-01-23 00:43:29,919] Trial 3 finished with value: 0.7436351121368797 and parameters: {'hidden_dim_h': 40, 'dropout': 0.09514513286140537, 'batch_size': 637, 'n_epochs': 242}. Best is trial 3 with value: 0.7436351121368797.


Found better hyperparameter, update model
Build model with 2 layers of attention
Epoch 1/125, Loss: 0.2338366698400647
0.5476440837318473
Epoch 11/125, Loss: 0.16811533038522683
0.6123297924110721
Epoch 21/125, Loss: 0.12684084372777565
0.6478177257699009
Epoch 31/125, Loss: 0.10184801150770749
0.6744731254334453
Epoch 41/125, Loss: 0.08676751220927519
0.6814813662174302
Epoch 51/125, Loss: 0.07794730365276337
0.7017431346435279
Epoch 61/125, Loss: 0.07290646046692249
0.7118157134785535
Epoch 71/125, Loss: 0.06896758904936266
0.7115959485927221
Epoch 81/125, Loss: 0.0669354002411459
0.7214259998745678
Epoch 91/125, Loss: 0.06331771707125738
0.7265210137253576
Epoch 101/125, Loss: 0.06157646686130879
0.7299407795445689
Epoch 111/125, Loss: 0.060013887344622145
0.7309900544728782
Epoch 121/125, Loss: 0.057558291098650766
0.7382216000461983


[I 2024-01-23 00:44:16,199] Trial 4 finished with value: 0.7382216000461983 and parameters: {'hidden_dim_h': 19, 'dropout': 0.2129603736232572, 'batch_size': 474, 'n_epochs': 125}. Best is trial 3 with value: 0.7436351121368797.


Build model with 2 layers of attention
Epoch 1/53, Loss: 1.1271927470252627
0.5402961011188425
Epoch 11/53, Loss: 0.22617294698480575
0.6024703039071645
Epoch 21/53, Loss: 0.1393828552866739
0.6328161933727797
Epoch 31/53, Loss: 0.12680560858949783
0.650734499547588
Epoch 41/53, Loss: 0.11408562851803643
0.6643709951058148
Epoch 51/53, Loss: 0.10247996130159923
0.6758680624841187


[I 2024-01-23 00:44:36,247] Trial 5 finished with value: 0.6758680624841187 and parameters: {'hidden_dim_h': 16, 'dropout': 0.33814569298757113, 'batch_size': 384, 'n_epochs': 53}. Best is trial 3 with value: 0.7436351121368797.


Build model with 2 layers of attention
Epoch 1/90, Loss: 0.39240064720312756
0.5491875258703224
Epoch 11/90, Loss: 0.1937549989670515
0.6311547290955809
Epoch 21/90, Loss: 0.1609511934220791
0.648936132208001
Epoch 31/90, Loss: 0.14714080095291138
0.6513808859325358
Epoch 41/90, Loss: 0.1328005027025938
0.6730005874687641
Epoch 51/90, Loss: 0.1197842371960481
0.6961683199493648
Epoch 61/90, Loss: 0.1065843782077233
0.7074173230898445
Epoch 71/90, Loss: 0.09440204439063866
0.7104075296930036
Epoch 81/90, Loss: 0.08344396017491817
0.7277406688738415


[I 2024-01-23 00:45:18,470] Trial 6 finished with value: 0.7277406688738415 and parameters: {'hidden_dim_h': 49, 'dropout': 0.18348187206006206, 'batch_size': 1015, 'n_epochs': 90}. Best is trial 3 with value: 0.7436351121368797.


Build model with 2 layers of attention
Epoch 1/114, Loss: 0.16867261793878344
0.5389975054500076
Epoch 11/114, Loss: 0.10430739199121793
0.6244336529650859
Epoch 21/114, Loss: 0.09342854704569888
0.6396725105466499
Epoch 31/114, Loss: 0.0882919705990288
0.664188763757538
Epoch 41/114, Loss: 0.08419423902200328
0.6820456691755413
Epoch 51/114, Loss: 0.08033519986740968
0.6945692209646852
Epoch 61/114, Loss: 0.07776007532245582
0.700184142780524
Epoch 71/114, Loss: 0.07581908007462819
0.7089453071144404
Epoch 81/114, Loss: 0.07382327945971931
0.7116878733720854
Epoch 91/114, Loss: 0.07239932366819293
0.7114944568508982
Epoch 101/114, Loss: 0.07139819656947145
0.7144302561843644
Epoch 111/114, Loss: 0.06946497634743098
0.7217063643952828


[I 2024-01-23 00:46:13,379] Trial 7 finished with value: 0.7217063643952828 and parameters: {'hidden_dim_h': 10, 'dropout': 0.2494290272843066, 'batch_size': 222, 'n_epochs': 114}. Best is trial 3 with value: 0.7436351121368797.


Build model with 2 layers of attention
Epoch 1/111, Loss: 0.17747837472707034
0.5343427653536627
Epoch 11/111, Loss: 0.11206661788746715
0.6114468907657694
Epoch 21/111, Loss: 0.09610790237784386
0.636687759883952
Epoch 31/111, Loss: 0.08962516812607646
0.6555782685995752
Epoch 41/111, Loss: 0.08677361430600286
0.6703783926764098
Epoch 51/111, Loss: 0.08328081406652928
0.6783799162283238
Epoch 61/111, Loss: 0.08112059850245715
0.6890289924127007
Epoch 71/111, Loss: 0.07870906898751855
0.6920456348097473
Epoch 81/111, Loss: 0.07745820893906057
0.7003827025413152
Epoch 91/111, Loss: 0.0755813850555569
0.7037505984253203
Epoch 101/111, Loss: 0.07470258097164333
0.7062895673130711


[I 2024-01-23 00:46:59,858] Trial 8 finished with value: 0.7097451084623799 and parameters: {'hidden_dim_h': 11, 'dropout': 0.349516953926457, 'batch_size': 300, 'n_epochs': 111}. Best is trial 3 with value: 0.7436351121368797.


Epoch 111/111, Loss: 0.07291247812099755
0.7097451084623799
Build model with 2 layers of attention
Epoch 1/62, Loss: 0.661277847290039
0.5483382115798887
Epoch 11/62, Loss: 0.20584268152713775
0.6161572840809754
Epoch 21/62, Loss: 0.16490245481332144
0.6443986996358491
Epoch 31/62, Loss: 0.12616074879964193
0.6575453635732319
Epoch 41/62, Loss: 0.09726742784182231
0.6987040910123148
Epoch 51/62, Loss: 0.07974296738704045
0.7020680301178367
Epoch 61/62, Loss: 0.07059095432360966
0.7139045871063047


[I 2024-01-23 00:47:24,977] Trial 9 finished with value: 0.7139045871063047 and parameters: {'hidden_dim_h': 13, 'dropout': 0.13581081210824256, 'batch_size': 321, 'n_epochs': 62}. Best is trial 3 with value: 0.7436351121368797.


Build model with 2 layers of attention
Epoch 1/235, Loss: 1.2569586942935813
0.5406335667349486
Epoch 11/235, Loss: 0.6443908995595472
0.5945693568716971
Epoch 21/235, Loss: 0.39202430124940546
0.6252733471279559
Epoch 31/235, Loss: 0.2950374371018903
0.04865684101358177
Epoch 41/235, Loss: 0.2477925430084097
0.28845623025980827
Epoch 51/235, Loss: 0.2208847516569598
0.5494540767206039
Epoch 61/235, Loss: 0.19873046926383314
0.6368053412205542
Epoch 71/235, Loss: 0.1807618988999005
0.6698737027588542
Epoch 81/235, Loss: 0.16392765219869285
0.6844510917525514
Epoch 91/235, Loss: 0.14881227550835446
0.6869840721840167
Epoch 101/235, Loss: 0.13325361162424088
0.7007764792253548
Epoch 111/235, Loss: 0.11957250098729956
0.6989268241117431
Epoch 121/235, Loss: 0.10489250411247385
0.719068166895897
Epoch 131/235, Loss: 0.09307413522539468
0.721865614136572
Epoch 141/235, Loss: 0.08382108088197379
0.7173699793036107
Epoch 151/235, Loss: 0.07568703514748606
0.72426851367383
Epoch 161/235, Loss:

[I 2024-01-23 00:49:02,333] Trial 10 finished with value: 0.7345665540680005 and parameters: {'hidden_dim_h': 39, 'dropout': 0.07007384521386273, 'batch_size': 846, 'n_epochs': 235}. Best is trial 3 with value: 0.7436351121368797.


Build model with 2 layers of attention
Epoch 1/201, Loss: 0.2558923452287107
0.5395639995490528
Epoch 11/201, Loss: 0.19527481214420214
0.6291554584434743
Epoch 21/201, Loss: 0.157794396619539
0.6498665606715193
Epoch 31/201, Loss: 0.1276790552042626
0.6750526204793226
Epoch 41/201, Loss: 0.10372634030677177
0.69921801862684
Epoch 51/201, Loss: 0.08608412641931225
0.721063124368514
Epoch 61/201, Loss: 0.07399865360678853
0.7278633906761819
Epoch 71/201, Loss: 0.064975026491526
0.7385031778393396
Epoch 81/201, Loss: 0.058947970838965594
0.7393905491940052
Epoch 91/201, Loss: 0.05478522644655125
0.7456984261397281
Epoch 101/201, Loss: 0.05187095993676701
0.7460512178444056
Epoch 111/201, Loss: 0.04968070550947576
0.7498459186649364
Epoch 121/201, Loss: 0.04772154926448255
0.7515608225589688
Epoch 131/201, Loss: 0.04630887276820234
0.7503097296142909
Epoch 141/201, Loss: 0.04482192727359566
0.7488959939727282
Epoch 151/201, Loss: 0.04307738701636727
0.7530842201478223
Epoch 161/201, Loss:

[I 2024-01-23 00:50:12,089] Trial 11 finished with value: 0.7545831181863579 and parameters: {'hidden_dim_h': 24, 'dropout': 0.13450020208328284, 'batch_size': 646, 'n_epochs': 201}. Best is trial 11 with value: 0.7545831181863579.


Epoch 201/201, Loss: 0.03861890934609078
0.7545831181863579
Found better hyperparameter, update model
Build model with 2 layers of attention
Epoch 1/204, Loss: 2.534587684799643
0.5201834607352239
Epoch 11/204, Loss: 1.3629853690371794
0.5624823536456677
Epoch 21/204, Loss: 0.7443957363857943
0.5242750451843177
Epoch 31/204, Loss: 0.35412877798080444
0.6362305995770383
Epoch 41/204, Loss: 0.21364150459275527
0.6545279142170661
Epoch 51/204, Loss: 0.17017044258468292
0.6633263403092521
Epoch 61/204, Loss: 0.15511958344894297
0.6764163269142758
Epoch 71/204, Loss: 0.1456694730064448
0.6927087359331145
Epoch 81/204, Loss: 0.13662874348023357
0.7023498713881597
Epoch 91/204, Loss: 0.12688222242628827
0.7092502465507232
Epoch 101/204, Loss: 0.1169022472904009
0.7133317823259498
Epoch 111/204, Loss: 0.10616733265273712
0.7224260980616222
Epoch 121/204, Loss: 0.09597086753038798
0.7215553211678845
Epoch 131/204, Loss: 0.08608910967321957
0.7266900465601703
Epoch 141/204, Loss: 0.0768276428913

[I 2024-01-23 00:51:26,751] Trial 12 finished with value: 0.7460174163927114 and parameters: {'hidden_dim_h': 26, 'dropout': 0.11616274579845375, 'batch_size': 707, 'n_epochs': 204}. Best is trial 11 with value: 0.7545831181863579.


Build model with 2 layers of attention
Epoch 1/175, Loss: 0.3879484800355775
0.5576919851919495
Epoch 11/175, Loss: 0.2223885187080928
0.6255418138180174
Epoch 21/175, Loss: 0.1926384328731469
0.6458781990914245
Epoch 31/175, Loss: 0.16767870102609908
0.654509061288272
Epoch 41/175, Loss: 0.14376653571213996
0.6725226060043156
Epoch 51/175, Loss: 0.12200718399669443
0.7096634058040131
Epoch 61/175, Loss: 0.10330456255802087
0.7148160164508259
Epoch 71/175, Loss: 0.08701076331947531
0.7321335712645203
Epoch 81/175, Loss: 0.0753329758133207
0.738972381064302
Epoch 91/175, Loss: 0.06564904137381486
0.7419228858054834
Epoch 101/175, Loss: 0.05903400494051831
0.7500929255648952
Epoch 111/175, Loss: 0.0534784131284271
0.7462983445423041
Epoch 121/175, Loss: 0.04997594215508018
0.7469961820087486
Epoch 131/175, Loss: 0.047624235706669946
0.7448671514192071
Epoch 141/175, Loss: 0.0451382864266634
0.7469394722391673
Epoch 151/175, Loss: 0.04327522723802498
0.745311437509
Epoch 161/175, Loss: 0.

[I 2024-01-23 00:52:28,941] Trial 13 finished with value: 0.7464580760957237 and parameters: {'hidden_dim_h': 25, 'dropout': 0.13370074459826942, 'batch_size': 869, 'n_epochs': 175}. Best is trial 11 with value: 0.7545831181863579.


Build model with 2 layers of attention
Epoch 1/169, Loss: 0.729044687180292
0.5402050804645636
Epoch 11/169, Loss: 0.41918901744342985
0.008345691593892491
Epoch 21/169, Loss: 0.29697748876753305
0.02900478096269739
Epoch 31/169, Loss: 0.2629402918475015
0.006625972525863998
Epoch 41/169, Loss: 0.2597185295252573
0.004714123316967936
Epoch 51/169, Loss: 0.2552282902456465
0.0042493165610991155
Epoch 61/169, Loss: 0.2574352884576434
0.011865012792934202
Epoch 71/169, Loss: 0.26122215957868666
0.0025786523431230142
Epoch 81/169, Loss: 0.2571698817468825
0.012221131494357065
Epoch 91/169, Loss: 0.2595116865067255
0.02035079652591754
Epoch 101/169, Loss: 0.257989397361165
0.011338559786068238
Epoch 111/169, Loss: 0.2624204002675556
0.01737643964383017
Epoch 121/169, Loss: 0.2572065648578462
0.02361807709919106
Epoch 131/169, Loss: 0.2578877203521274
0.040624636200743434
Epoch 141/169, Loss: 0.2587375406708036
0.034666308906471
Epoch 151/169, Loss: 0.25609532992045086
0.04976256143294931
Ep

[I 2024-01-23 00:53:26,415] Trial 14 finished with value: 0.06181721905452112 and parameters: {'hidden_dim_h': 22, 'dropout': 0.16085446653969526, 'batch_size': 1191, 'n_epochs': 169}. Best is trial 11 with value: 0.7545831181863579.


Build model with 2 layers of attention
Epoch 1/170, Loss: 0.6201596260070801
0.5592727757981077
Epoch 11/170, Loss: 0.2525580951145717
0.6217274738556968
Epoch 21/170, Loss: 0.15606182866862842
0.6623200137980617
Epoch 31/170, Loss: 0.13582854797797544
0.6854084898939103
Epoch 41/170, Loss: 0.12660792549805983
0.7072037051829665
Epoch 51/170, Loss: 0.11627360805869102
0.721818970335526
Epoch 61/170, Loss: 0.1059305455003466
0.7248721975741285
Epoch 71/170, Loss: 0.09622113965451717
0.7359560351118646
Epoch 81/170, Loss: 0.08614175181303706
0.7354206728284716
Epoch 91/170, Loss: 0.07802423435662474
0.745441714190178
Epoch 101/170, Loss: 0.06774618543152298
0.7436853083625274
Epoch 111/170, Loss: 0.061233339964279106
0.7485776563625736
Epoch 121/170, Loss: 0.05444473121315241
0.7494992985110797
Epoch 131/170, Loss: 0.0511466059833765
0.7532350274992922
Epoch 141/170, Loss: 0.04638943076133728
0.7496992729523662
Epoch 151/170, Loss: 0.043420534314853806
0.7515276697193475
Epoch 161/170, L

[I 2024-01-23 00:54:33,958] Trial 15 finished with value: 0.7527985298646079 and parameters: {'hidden_dim_h': 33, 'dropout': 0.14851986774846587, 'batch_size': 879, 'n_epochs': 170}. Best is trial 11 with value: 0.7545831181863579.


Build model with 2 layers of attention
Epoch 1/209, Loss: 1.4820766551154
0.5173682545794325
Epoch 11/209, Loss: 0.599277104650225
0.5792451250532221
Epoch 21/209, Loss: 0.23958848331655774
0.6269047223368307
Epoch 31/209, Loss: 0.12921469126428878
0.6632729625885312
Epoch 41/209, Loss: 0.10266823428017753
0.6781587509754538
Epoch 51/209, Loss: 0.09494370307241168
0.6893513709594689
Epoch 61/209, Loss: 0.09041732634816851
0.7027244896797605
Epoch 71/209, Loss: 0.08534691418920244
0.7122427519051538
Epoch 81/209, Loss: 0.08169102945498058
0.7230904772578978
Epoch 91/209, Loss: 0.07591774037906102
0.7271365118238589
Epoch 101/209, Loss: 0.07057625842945917
0.7323312397277578
Epoch 111/209, Loss: 0.06587406492659025
0.7400042007062939
Epoch 121/209, Loss: 0.06332117574555533
0.7360553655016021
Epoch 131/209, Loss: 0.05839658815945898
0.7417061718812146
Epoch 141/209, Loss: 0.0563142155962331
0.7435179052889059
Epoch 151/209, Loss: 0.05281373815877097
0.7444533336236463
Epoch 161/209, Loss

[I 2024-01-23 00:55:53,397] Trial 16 finished with value: 0.7516519265258871 and parameters: {'hidden_dim_h': 32, 'dropout': 0.16450564398565456, 'batch_size': 701, 'n_epochs': 209}. Best is trial 11 with value: 0.7545831181863579.


Build model with 2 layers of attention
Epoch 1/283, Loss: 0.8016969037055969
0.5417939777521165
Epoch 11/283, Loss: 0.3450625967979431
0.6335021793402544
Epoch 21/283, Loss: 0.16996126115322113
0.6607870615604667
Epoch 31/283, Loss: 0.11524874359369278
0.6843451235095459
Epoch 41/283, Loss: 0.09841149598360062
0.7069779237940726
Epoch 51/283, Loss: 0.09026305764913559
0.7156873935222445
Epoch 61/283, Loss: 0.08518980592489242
0.7278609271867279
Epoch 71/283, Loss: 0.07933024376630783
0.7268743773606737
Epoch 81/283, Loss: 0.07354670763015747
0.7373055135503054
Epoch 91/283, Loss: 0.0678459057211876
0.7383952494425414
Epoch 101/283, Loss: 0.06275521546602249
0.7433449520498575
Epoch 111/283, Loss: 0.05786539137363434
0.7451448732541304
Epoch 121/283, Loss: 0.05310676544904709
0.7488848187205518
Epoch 131/283, Loss: 0.0494444265961647
0.7515889236207719
Epoch 141/283, Loss: 0.04586195424199104
0.7456353728063743
Epoch 151/283, Loss: 0.042353363186120985
0.7525163349873527
Epoch 161/283, 

[I 2024-01-23 00:57:38,970] Trial 17 finished with value: 0.7498145553048592 and parameters: {'hidden_dim_h': 32, 'dropout': 0.09532513683407465, 'batch_size': 983, 'n_epochs': 283}. Best is trial 11 with value: 0.7545831181863579.


Build model with 2 layers of attention
Epoch 1/151, Loss: 0.5744670040723754
0.5378582611555067
Epoch 11/151, Loss: 0.28299283908634654
0.6209074031640688
Epoch 21/151, Loss: 0.23760208160412022
0.633692903903152
Epoch 31/151, Loss: 0.20243756473064423
0.6189680306885642
Epoch 41/151, Loss: 0.16574217560814647
0.6457084678774704
Epoch 51/151, Loss: 0.13974027590053836
0.5485192951353062
Epoch 61/151, Loss: 0.20350708449032248
0.00028548979065041405
Epoch 71/151, Loss: 0.2596656832753158
0.013649788903574206
Epoch 81/151, Loss: 0.26011591313815696
0.009943177645003205
Epoch 91/151, Loss: 0.2555277391177852
0.015196492675297922
Epoch 101/151, Loss: 0.258017782031036
0.012160528130366227
Epoch 111/151, Loss: 0.2566732949599987
0.013303583476937145
Epoch 121/151, Loss: 0.25481021476954946
0.023967801160763445
Epoch 131/151, Loss: 0.2563886904134983
0.031062698271306817
Epoch 141/151, Loss: 0.2547633211787154
0.02277746999441248


[I 2024-01-23 00:58:46,145] Trial 18 finished with value: 0.031622269304193755 and parameters: {'hidden_dim_h': 43, 'dropout': 0.2440751677415835, 'batch_size': 585, 'n_epochs': 151}. Best is trial 11 with value: 0.7545831181863579.


Epoch 151/151, Loss: 0.25439383271263866
0.031622269304193755
Build model with 2 layers of attention
Epoch 1/194, Loss: 0.2262841344907366
0.5525854564525616
Epoch 11/194, Loss: 0.1809807080647041
0.6273781110374179
Epoch 21/194, Loss: 0.1497840059214625
0.6514135584650081
Epoch 31/194, Loss: 0.1256780899290381
0.6643053970849041
Epoch 41/194, Loss: 0.10603290153988476
0.6875628402738851
Epoch 51/194, Loss: 0.09160343506212892
0.699561275123918
Epoch 61/194, Loss: 0.0799927195084506
0.7216234351238295
Epoch 71/194, Loss: 0.07172980801812534
0.7308269045506733
Epoch 81/194, Loss: 0.06570014999858265
0.7344470710613132
Epoch 91/194, Loss: 0.06106763397311342
0.7432536074171625
Epoch 101/194, Loss: 0.05771120483505315
0.745747185450078
Epoch 111/194, Loss: 0.05581739840322528
0.7464046176321435
Epoch 121/194, Loss: 0.05314572605079618
0.7474847122872482
Epoch 131/194, Loss: 0.050536470295026385
0.7484290628069378
Epoch 141/194, Loss: 0.05008610286589327
0.7499966162467887
Epoch 151/194, L

[I 2024-01-23 00:59:52,867] Trial 19 finished with value: 0.7526716901040156 and parameters: {'hidden_dim_h': 22, 'dropout': 0.1483960365708282, 'batch_size': 845, 'n_epochs': 194}. Best is trial 11 with value: 0.7545831181863579.


Build model with 2 layers of attention
Epoch 1/232, Loss: 1.5322145104408265
0.5397136854469823
Epoch 11/232, Loss: 0.9708339512348175
0.5780872574656392
Epoch 21/232, Loss: 0.6359328240156173
0.6208786122567393
Epoch 31/232, Loss: 0.4473692148923874
0.6401093936230944
Epoch 41/232, Loss: 0.35125434696674346
0.6546556328673246
Epoch 51/232, Loss: 0.30540926158428194
0.6623032271806929
Epoch 61/232, Loss: 0.2820808842778206
0.6645148490768337
Epoch 71/232, Loss: 0.2666643597185612
0.6672549443932463
Epoch 81/232, Loss: 0.25313377380371094
0.6728916850198992
Epoch 91/232, Loss: 0.23954075425863267
0.6715640299627013
Epoch 101/232, Loss: 0.2252313271164894
0.6920670871623583
Epoch 111/232, Loss: 0.2098231054842472
0.7085032556876661
Epoch 121/232, Loss: 0.194314918667078
0.7086565401706885
Epoch 131/232, Loss: 0.17825894355773925
0.7226734859287319
Epoch 141/232, Loss: 0.16238181293010712
0.7186650934267153
Epoch 151/232, Loss: 0.14713845774531364
0.7334135152834913
Epoch 161/232, Loss: 0

[I 2024-01-23 01:01:25,019] Trial 20 finished with value: 0.7518134243218864 and parameters: {'hidden_dim_h': 35, 'dropout': 0.10304546668054658, 'batch_size': 1197, 'n_epochs': 232}. Best is trial 11 with value: 0.7545831181863579.


Build model with 2 layers of attention
Epoch 1/196, Loss: 0.6705915212631226
0.546267160573649
Epoch 11/196, Loss: 0.26970532685518267
0.6202474714831901
Epoch 21/196, Loss: 0.16989877422650654
0.6462342526525258
Epoch 31/196, Loss: 0.14936615973711015
0.6661028365030198
Epoch 41/196, Loss: 0.13737814724445344
0.6941090373399948
Epoch 51/196, Loss: 0.12654297774036724
0.6986865957961039
Epoch 61/196, Loss: 0.11443578898906707
0.7144666157742977
Epoch 71/196, Loss: 0.10294126744071642
0.7249485243496178
Epoch 81/196, Loss: 0.09283727755149206
0.7277354687528974
Epoch 91/196, Loss: 0.08312641804416974
0.7288019380701689
Epoch 101/196, Loss: 0.07481347198287645
0.7380914534821251
Epoch 111/196, Loss: 0.0674535721540451
0.7418003066310865
Epoch 121/196, Loss: 0.061752357458074884
0.7369687900884913
Epoch 131/196, Loss: 0.056996179992953935
0.7445619599496601
Epoch 141/196, Loss: 0.05285149315992991
0.7480895055350207
Epoch 151/196, Loss: 0.05022017459074656
0.7507853044434422
Epoch 161/196

[I 2024-01-23 01:02:30,717] Trial 21 finished with value: 0.7520844473632504 and parameters: {'hidden_dim_h': 21, 'dropout': 0.15155588615694265, 'batch_size': 813, 'n_epochs': 196}. Best is trial 11 with value: 0.7545831181863579.


Build model with 2 layers of attention
Epoch 1/261, Loss: 0.2795616537332535
0.536295085071313
Epoch 11/261, Loss: 0.2043672299385071
0.6260406102259634
Epoch 21/261, Loss: 0.18200326979160308
0.642555145870753
Epoch 31/261, Loss: 0.16018771409988403
0.6714906569026604
Epoch 41/261, Loss: 0.1396082431077957
0.6824473680833658
Epoch 51/261, Loss: 0.12083728700876235
0.6883833916814397
Epoch 61/261, Loss: 0.10507515281438827
0.710268762347061
Epoch 71/261, Loss: 0.09024958163499833
0.7185741765659908
Epoch 81/261, Loss: 0.07768365949392318
0.7295356125694936
Epoch 91/261, Loss: 0.06834076136350632
0.7403013641019066
Epoch 101/261, Loss: 0.060687006264925
0.7454063894430443
Epoch 111/261, Loss: 0.0546799473464489
0.7522902153401541
Epoch 121/261, Loss: 0.05115413039922714
0.7472635198113653
Epoch 131/261, Loss: 0.047863775193691255
0.7528777428149088
Epoch 141/261, Loss: 0.045264070630073545
0.7504504766330089
Epoch 151/261, Loss: 0.042495506554841994
0.7491834076458083
Epoch 161/261, Los

[I 2024-01-23 01:04:06,570] Trial 22 finished with value: 0.7546840023585993 and parameters: {'hidden_dim_h': 29, 'dropout': 0.1306133434810734, 'batch_size': 967, 'n_epochs': 261}. Best is trial 22 with value: 0.7546840023585993.


Epoch 261/261, Loss: 0.032386942729353906
0.7546840023585993
Found better hyperparameter, update model
Build model with 2 layers of attention
Epoch 1/267, Loss: 0.5802194674809774
0.5318372934559313
Epoch 11/267, Loss: 0.27135930210351944
0.6146329722700784
Epoch 21/267, Loss: 0.18697594292461872
0.6360040726636366
Epoch 31/267, Loss: 0.16504351049661636
0.6589687642934214
Epoch 41/267, Loss: 0.15248311931888262
0.6822599328587192
Epoch 51/267, Loss: 0.14002380271752676
0.6937756199893923
Epoch 61/267, Loss: 0.1266228687018156
0.7091600338151867
Epoch 71/267, Loss: 0.1134944623336196
0.7231798795411276
Epoch 81/267, Loss: 0.10190237158288558
0.7303281563630926
Epoch 91/267, Loss: 0.0906698585798343
0.73352337107129
Epoch 101/267, Loss: 0.08075216909249623
0.7451444035663864
Epoch 111/267, Loss: 0.07210942171514034
0.7447369239367784
Epoch 121/267, Loss: 0.06506756165375312
0.7406719305763433
Epoch 131/267, Loss: 0.05878805431226889
0.7408318072477708
Epoch 141/267, Loss: 0.053619839095

[I 2024-01-23 01:05:44,235] Trial 23 finished with value: 0.7477995057511679 and parameters: {'hidden_dim_h': 29, 'dropout': 0.18132326344850455, 'batch_size': 1019, 'n_epochs': 267}. Best is trial 22 with value: 0.7546840023585993.


Build model with 2 layers of attention
Epoch 1/255, Loss: 3.0220290514139028
0.5064675152892677
Epoch 11/255, Loss: 1.943398654460907
0.563702799091655
Epoch 21/255, Loss: 1.247189984871791
0.5888526714216183
Epoch 31/255, Loss: 0.7732777824768653
0.0881810095868017
Epoch 41/255, Loss: 0.4427390774855247
0.612208784858953
Epoch 51/255, Loss: 0.27399898492372954
0.645871288219766
Epoch 61/255, Loss: 0.19638622953341558
0.6620608426903678
Epoch 71/255, Loss: 0.16395745483728555
0.68608750789252
Epoch 81/255, Loss: 0.1484691580900779
0.6872188578684582
Epoch 91/255, Loss: 0.1415098452797303
0.6923947455055068
Epoch 101/255, Loss: 0.13478112736573586
0.6978673753409049
Epoch 111/255, Loss: 0.12783106187215218
0.7119002123937259
Epoch 121/255, Loss: 0.12038730056240009
0.713880067778334
Epoch 131/255, Loss: 0.11412903322623326
0.7221873102493906
Epoch 141/255, Loss: 0.1064652161529431
0.7217364632860385
Epoch 151/255, Loss: 0.09889753019580474
0.7255071457017045
Epoch 161/255, Loss: 0.09575

[I 2024-01-23 01:07:24,268] Trial 24 finished with value: 0.7492225644873134 and parameters: {'hidden_dim_h': 33, 'dropout': 0.11685919044164889, 'batch_size': 948, 'n_epochs': 255}. Best is trial 22 with value: 0.7546840023585993.


Build model with 2 layers of attention
Epoch 1/151, Loss: 0.7215583947571841
0.5484808805085158
Epoch 11/151, Loss: 0.3270281065594066
0.6013142444843182
Epoch 21/151, Loss: 0.1682736115022139
0.6535684414345809
Epoch 31/151, Loss: 0.11251969296823848
0.6792010893999086
Epoch 41/151, Loss: 0.09354220906441862
0.7033692708971419
Epoch 51/151, Loss: 0.08571504530581561
0.7257087890023944
Epoch 61/151, Loss: 0.07963166216557677
0.7333672287875059
Epoch 71/151, Loss: 0.07528890872543509
0.7374220458849651
Epoch 81/151, Loss: 0.07083346965638074
0.7377131007498793
Epoch 91/151, Loss: 0.06652727502990853
0.747660916327128
Epoch 101/151, Loss: 0.062129455703226005
0.7461978459448274
Epoch 111/151, Loss: 0.057927201756022194
0.7490649644019604
Epoch 121/151, Loss: 0.05422742986543612
0.7548657012353888
Epoch 131/151, Loss: 0.05083644119175998
0.7518113347933572
Epoch 141/151, Loss: 0.04792016490616582
0.7536199644947332


[I 2024-01-23 01:08:15,610] Trial 25 finished with value: 0.7555315489772368 and parameters: {'hidden_dim_h': 26, 'dropout': 0.08004697370188486, 'batch_size': 1090, 'n_epochs': 151}. Best is trial 25 with value: 0.7555315489772368.


Epoch 151/151, Loss: 0.04483643648299304
0.7555315489772368
Found better hyperparameter, update model
Build model with 2 layers of attention
Epoch 1/145, Loss: 0.4260404706001282
0.5480727466411411
Epoch 11/145, Loss: 0.24699429354884408
0.6267849765726372
Epoch 21/145, Loss: 0.2072629677978429
0.640538644172276
Epoch 31/145, Loss: 0.19246334108439359
0.6516467284201763
Epoch 41/145, Loss: 0.17855055765672165
0.664006034555794
Epoch 51/145, Loss: 0.16419530524448914
0.6790174116521669
Epoch 61/145, Loss: 0.14927088740197095
0.6902748140646484
Epoch 71/145, Loss: 0.13446716219186783
0.698546405258522
Epoch 81/145, Loss: 0.1203628619286147
0.7002853181847273
Epoch 91/145, Loss: 0.10797682777047157
0.7156426180948134
Epoch 101/145, Loss: 0.09642051600597122
0.724690077426091
Epoch 111/145, Loss: 0.08496657318689606
0.7349646287897685
Epoch 121/145, Loss: 0.07673844762823799
0.7285660862621938
Epoch 131/145, Loss: 0.06859670951962471
0.7378598578455569
Epoch 141/145, Loss: 0.06187252632596

[I 2024-01-23 01:09:06,220] Trial 26 finished with value: 0.7467454279364377 and parameters: {'hidden_dim_h': 25, 'dropout': 0.07849171390755381, 'batch_size': 1103, 'n_epochs': 145}. Best is trial 25 with value: 0.7555315489772368.


Build model with 2 layers of attention
Epoch 1/226, Loss: 0.6520318307659843
0.5447259388137073
Epoch 11/226, Loss: 0.39399540559812024
0.6053130172195029
Epoch 21/226, Loss: 0.2540419562296434
0.5966934760177823
Epoch 31/226, Loss: 0.20546822317621924
0.6394075449220036
Epoch 41/226, Loss: 0.18857050416144458
0.646654238238234
Epoch 51/226, Loss: 0.17473248189145868
0.6749603535373019
Epoch 61/226, Loss: 0.16067389398813248
0.6869851431289532
Epoch 71/226, Loss: 0.1469794362783432
0.6937708624126477
Epoch 81/226, Loss: 0.1330026618459008
0.7055972557744778
Epoch 91/226, Loss: 0.12020131539214741
0.7039151694936256
Epoch 101/226, Loss: 0.108231628482992
0.7202494881032582
Epoch 111/226, Loss: 0.09612117233601483
0.7242849721430081
Epoch 121/226, Loss: 0.08566400510343639
0.7339242534678766
Epoch 131/226, Loss: 0.07764439420266585
0.7335226662656033
Epoch 141/226, Loss: 0.06938875297253783
0.7368013845728572
Epoch 151/226, Loss: 0.0630931185389107
0.7405727993181122
Epoch 161/226, Loss:

[I 2024-01-23 01:10:26,711] Trial 27 finished with value: 0.7555762768422705 and parameters: {'hidden_dim_h': 28, 'dropout': 0.07657881349873598, 'batch_size': 1096, 'n_epochs': 226}. Best is trial 27 with value: 0.7555762768422705.


Found better hyperparameter, update model
Build model with 2 layers of attention
Epoch 1/263, Loss: 0.35656427524306555
0.5357294172100615
Epoch 11/263, Loss: 0.17979037152095276
0.6076620724038234
Epoch 21/263, Loss: 0.1488402309742841
0.6385400821544219
Epoch 31/263, Loss: 0.13772264935753561
0.6588264148161886
Epoch 41/263, Loss: 0.12898469919508154
0.6650586025232501
Epoch 51/263, Loss: 0.1193487938832153
0.6796473005470782
Epoch 61/263, Loss: 0.11138160967014053
0.6821190033443307
Epoch 71/263, Loss: 0.10063927688381889
0.7012762297249349
Epoch 81/263, Loss: 0.09190269830552014
0.7118758285667819
Epoch 91/263, Loss: 0.08423389731483026
0.7121311843281851
Epoch 101/263, Loss: 0.07661922987211835
0.7187202068330283
Epoch 111/263, Loss: 0.07366529174826363
0.7244498667285496
Epoch 121/263, Loss: 0.06818545592779462
0.7198754060996123
Epoch 131/263, Loss: 0.06530540487305685
0.7238578618753081
Epoch 141/263, Loss: 0.060454481704668564
0.7291351739626185
Epoch 151/263, Loss: 0.05668978

[I 2024-01-23 01:11:52,658] Trial 28 finished with value: 0.7447006707268798 and parameters: {'hidden_dim_h': 18, 'dropout': 0.05462339423839503, 'batch_size': 1135, 'n_epochs': 263}. Best is trial 27 with value: 0.7555762768422705.


Build model with 2 layers of attention
Epoch 1/300, Loss: 2.7007271103236987
0.4678231722480961
Epoch 11/300, Loss: 1.8124681866687278
0.5434868630734621
Epoch 21/300, Loss: 1.1553980993187947
0.1518287781067141
Epoch 31/300, Loss: 0.6807148016017416
0.5688051277139695
Epoch 41/300, Loss: 0.4120202116344286
0.6103228863307139
Epoch 51/300, Loss: 0.2570312191610751
0.6518596347277738
Epoch 61/300, Loss: 0.18338201875272003
0.6515912110206395
Epoch 71/300, Loss: 0.1502558718556943
0.6718129288324299
Epoch 81/300, Loss: 0.13291901997897937
0.6840455555071419
Epoch 91/300, Loss: 0.12463700253030528
0.6940153981051581
Epoch 101/300, Loss: 0.12006964081007501
0.7011725800214432
Epoch 111/300, Loss: 0.11495284995307094
0.7058111158094563
Epoch 121/300, Loss: 0.1105676144361496
0.7108462333476036
Epoch 131/300, Loss: 0.1056507809654526
0.7243385047221513
Epoch 141/300, Loss: 0.10176347034132999
0.7200442582791156
Epoch 151/300, Loss: 0.09738630315531856
0.7216937715411742
Epoch 161/300, Loss: 

[I 2024-01-23 01:13:43,787] Trial 29 finished with value: 0.746058592689192 and parameters: {'hidden_dim_h': 28, 'dropout': 0.08778478887232304, 'batch_size': 1080, 'n_epochs': 300}. Best is trial 27 with value: 0.7555762768422705.


Build model with 2 layers of attention
Epoch 1/224, Loss: 2.9333997781460104
0.47228774091081266
Epoch 11/224, Loss: 1.8493370643028846
0.5456959670822717
Epoch 21/224, Loss: 1.154894260259775
0.6044035787136249
Epoch 31/224, Loss: 0.7301322451004615
0.6396483763316542
Epoch 41/224, Loss: 0.4918314596781364
0.6661975747619092
Epoch 51/224, Loss: 0.37054796860768247
0.68235942070254
Epoch 61/224, Loss: 0.31363764290626234
0.6913156086079273
Epoch 71/224, Loss: 0.2623595606822234
0.6487913881618199
Epoch 81/224, Loss: 0.22737190127372742
0.6614667620075736
Epoch 91/224, Loss: 0.21156610491184089
0.6765668507946654
Epoch 101/224, Loss: 0.19927259649221712
0.6809908835133223
Epoch 111/224, Loss: 0.1876727445767476
0.6874916722821187
Epoch 121/224, Loss: 0.1755806402518199
0.7001988445816417
Epoch 131/224, Loss: 0.16258682711766317
0.7059196697865973
Epoch 141/224, Loss: 0.14886411566000718
0.713732140110577
Epoch 151/224, Loss: 0.13526680492437804
0.721560398493722
Epoch 161/224, Loss: 0.1

[I 2024-01-23 01:15:06,627] Trial 30 finished with value: 0.7378183728871626 and parameters: {'hidden_dim_h': 30, 'dropout': 0.0728430919303978, 'batch_size': 927, 'n_epochs': 224}. Best is trial 27 with value: 0.7555762768422705.


Build model with 2 layers of attention
Epoch 1/217, Loss: 0.16791319134442703
0.5503766862350662
Epoch 11/217, Loss: 0.1267440493987954
0.6245139204748055
Epoch 21/217, Loss: 0.10795263265785963
0.657299842677155
Epoch 31/217, Loss: 0.09686441395593726
0.6728873437046321
Epoch 41/217, Loss: 0.09150168105312016
0.6722415069229846
Epoch 51/217, Loss: 0.08084105829829755
0.6955050938184881
Epoch 61/217, Loss: 0.07513841112022815
0.7093703834036633
Epoch 71/217, Loss: 0.06900475530520729
0.7185705228833401
Epoch 81/217, Loss: 0.06481233222976975
0.7303784432816086
Epoch 91/217, Loss: 0.06146332103273143
0.7381417662629263
Epoch 101/217, Loss: 0.05893195355715959
0.7432547291555571
Epoch 111/217, Loss: 0.056382732384878655
0.7472484534935995
Epoch 121/217, Loss: 0.053359118330737816
0.7472362148455173
Epoch 131/217, Loss: 0.05050847177272257
0.7492689364774009
Epoch 141/217, Loss: 0.05021053293476934
0.7474158467141607
Epoch 151/217, Loss: 0.04767423317484234
0.7491760547432936
Epoch 161/21

[I 2024-01-23 01:16:26,765] Trial 31 finished with value: 0.754704846063209 and parameters: {'hidden_dim_h': 27, 'dropout': 0.12149936815036433, 'batch_size': 1076, 'n_epochs': 217}. Best is trial 27 with value: 0.7555762768422705.


Build model with 2 layers of attention
Epoch 1/220, Loss: 0.6529126530108245
0.5422393035333731
Epoch 11/220, Loss: 0.27858127847961756
0.6160172794811369
Epoch 21/220, Loss: 0.14045132858597714
0.6458267986302481
Epoch 31/220, Loss: 0.09877203987992328
0.6919735244318337
Epoch 41/220, Loss: 0.08615983180377794
0.7145810678208528
Epoch 51/220, Loss: 0.08003607457098753
0.7213754248875344
Epoch 61/220, Loss: 0.07500493137732796
0.7273003398278505
Epoch 71/220, Loss: 0.06994246173164119
0.7407952608818539
Epoch 81/220, Loss: 0.06614017599950665
0.7412581855434912
Epoch 91/220, Loss: 0.06213878240922223
0.746327570588072
Epoch 101/220, Loss: 0.0593834580934566
0.7483419360703893
Epoch 111/220, Loss: 0.05553027533966562
0.7475340590785962
Epoch 121/220, Loss: 0.05197569306777871
0.7526238199775321
Epoch 131/220, Loss: 0.04876134577004806
0.753378832903484
Epoch 141/220, Loss: 0.04665044857108075
0.755516128763596
Epoch 151/220, Loss: 0.044699987477582435
0.7550188884817047
Epoch 161/220, L

[I 2024-01-23 01:17:46,001] Trial 32 finished with value: 0.7604109329548373 and parameters: {'hidden_dim_h': 28, 'dropout': 0.11391580511521779, 'batch_size': 1066, 'n_epochs': 220}. Best is trial 32 with value: 0.7604109329548373.


Found better hyperparameter, update model
Build model with 2 layers of attention
Epoch 1/186, Loss: 0.7583553687385891
0.533389589643662
Epoch 11/186, Loss: 0.4296344194723212
0.6152268798882381
Epoch 21/186, Loss: 0.3147129183230193
0.6354478435822966
Epoch 31/186, Loss: 0.2765093640140865
0.6499060831909954
Epoch 41/186, Loss: 0.25518431935621344
0.6503789586645565
Epoch 51/186, Loss: 0.23422886560792508
0.6635891242033694
Epoch 61/186, Loss: 0.21235324442386627
0.6828614351654322
Epoch 71/186, Loss: 0.18903669131838757
0.6974583504069812
Epoch 81/186, Loss: 0.167504225736079
0.7064873723644909
Epoch 91/186, Loss: 0.14589040175728177
0.7170277626153742
Epoch 101/186, Loss: 0.1269285196195478
0.7211154671175927
Epoch 111/186, Loss: 0.10901690598415292
0.726631664288588
Epoch 121/186, Loss: 0.09314624576464943
0.7376012350725404
Epoch 131/186, Loss: 0.07993629315625066
0.7345241159002707
Epoch 141/186, Loss: 0.06872273252710052
0.7412242150108266
Epoch 151/186, Loss: 0.0591916362552539

[I 2024-01-23 01:18:53,108] Trial 33 finished with value: 0.7440128714315206 and parameters: {'hidden_dim_h': 27, 'dropout': 0.05964315156713247, 'batch_size': 1062, 'n_epochs': 186}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/151, Loss: 2.669234210794622
0.3114829236167094
Epoch 11/151, Loss: 1.7866262847726995
0.024890890654720284
Epoch 21/151, Loss: 1.1531296968460083
0.01586055521370346
Epoch 31/151, Loss: 0.7532508942213926
0.04422947789196072
Epoch 41/151, Loss: 0.5109302293170582
0.02326594694179617
Epoch 51/151, Loss: 0.3769530789418654
0.022058667800394925
Epoch 61/151, Loss: 0.3056108829650012
0.02388000398110954
Epoch 71/151, Loss: 0.28011348979039624
0.007337681976081922
Epoch 81/151, Loss: 0.26657546108419244
0.00958280552589905
Epoch 91/151, Loss: 0.2600455162200061
0.007550263712977321
Epoch 101/151, Loss: 0.2588681030002507
0.01843892600837965
Epoch 111/151, Loss: 0.26359574632211163
0.007085576186880958
Epoch 121/151, Loss: 0.257944944229993
0.031460907367841254
Epoch 131/151, Loss: 0.2600190016356381
0.021466165694836732
Epoch 141/151, Loss: 0.25727480582215567
0.024318453813755976


[I 2024-01-23 01:19:55,047] Trial 34 finished with value: 0.03916333527404268 and parameters: {'hidden_dim_h': 37, 'dropout': 0.10190243379571357, 'batch_size': 1130, 'n_epochs': 151}. Best is trial 32 with value: 0.7604109329548373.


Epoch 151/151, Loss: 0.2579965218901634
0.03916333527404268
Build model with 2 layers of attention
Epoch 1/219, Loss: 0.7268468814511453
0.5570491797909704
Epoch 11/219, Loss: 0.3698841583344244
0.6103531918593614
Epoch 21/219, Loss: 0.2396654087689615
0.6154615408799404
Epoch 31/219, Loss: 0.20211314289800583
0.6395006926900099
Epoch 41/219, Loss: 0.18242974723539046
0.6683872422194308
Epoch 51/219, Loss: 0.1625255005013558
0.6801913429721929
Epoch 61/219, Loss: 0.14374778539903701
0.6973599371330713
Epoch 71/219, Loss: 0.12476841696808415
0.7137453302253005
Epoch 81/219, Loss: 0.10737217890639458
0.7201269217291467
Epoch 91/219, Loss: 0.09257482472927339
0.7240731089321948
Epoch 101/219, Loss: 0.08068168091197167
0.7357084271163956
Epoch 111/219, Loss: 0.07041577225731264
0.7454101769504844
Epoch 121/219, Loss: 0.06337914736040177
0.7474840449425963
Epoch 131/219, Loss: 0.05663404806006339
0.7530373598434488
Epoch 141/219, Loss: 0.0529181779392304
0.7621803889014803
Epoch 151/219, Lo

[I 2024-01-23 01:21:11,454] Trial 35 finished with value: 0.7544781895089205 and parameters: {'hidden_dim_h': 24, 'dropout': 0.11281276383325871, 'batch_size': 779, 'n_epochs': 219}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/246, Loss: 0.25954663170420605
0.5418745539832827
Epoch 11/246, Loss: 0.20760688185691833
0.6179055363052579
Epoch 21/246, Loss: 0.17146004736423492
0.6426888057724227
Epoch 31/246, Loss: 0.14428974104964215
0.6601560710530817
Epoch 41/246, Loss: 0.12081402086693308
0.6900323805864382
Epoch 51/246, Loss: 0.1021795995209528
0.7146951526983679
Epoch 61/246, Loss: 0.08713714098152907
0.7263623004730948
Epoch 71/246, Loss: 0.07530176833919856
0.7295317902897552
Epoch 81/246, Loss: 0.06632423157925191
0.742755288397411
Epoch 91/246, Loss: 0.060077655736518944
0.7458579812133437
Epoch 101/246, Loss: 0.05472736562723699
0.7467910303091304
Epoch 111/246, Loss: 0.05124394336472387
0.7525766256714946
Epoch 121/246, Loss: 0.048153947228970734
0.7504749589075704
Epoch 131/246, Loss: 0.046386955386918526
0.7521558545169925
Epoch 141/246, Loss: 0.04488765011015146
0.7513490310435235
Epoch 151/246, Loss: 0.04360114313338114
0.7536931296864008
Epoch 161/2

[I 2024-01-23 01:22:33,401] Trial 36 finished with value: 0.7567536969999608 and parameters: {'hidden_dim_h': 20, 'dropout': 0.07987989341360495, 'batch_size': 1057, 'n_epochs': 246}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/281, Loss: 0.8886441096015598
0.5182895238854592
Epoch 11/281, Loss: 0.4614590095437091
0.6207196647532713
Epoch 21/281, Loss: 0.28882202765216
0.6430685318399946
Epoch 31/281, Loss: 0.22744091567785843
0.6600743252057644
Epoch 41/281, Loss: 0.20486981972404147
0.6808752969499482
Epoch 51/281, Loss: 0.190409942165665
0.6907476533690057
Epoch 61/281, Loss: 0.17635156149449555
0.7018649381622534
Epoch 71/281, Loss: 0.16172515762888867
0.7090885509067932
Epoch 81/281, Loss: 0.1475818150717279
0.7141982999194587
Epoch 91/281, Loss: 0.13353659147801605
0.7178999673659653
Epoch 101/281, Loss: 0.11919967765393465
0.7230091517698176
Epoch 111/281, Loss: 0.10713773382746655
0.7317230106145839
Epoch 121/281, Loss: 0.09513536516738974
0.736851410211476
Epoch 131/281, Loss: 0.08458677906057109
0.7344211810743951
Epoch 141/281, Loss: 0.07570184054582016
0.7430172007501882
Epoch 151/281, Loss: 0.06746701507464699
0.7434251161110441
Epoch 161/281, Loss: 

[I 2024-01-23 01:24:05,964] Trial 37 finished with value: 0.7474451562318463 and parameters: {'hidden_dim_h': 16, 'dropout': 0.05268965742506931, 'batch_size': 1041, 'n_epochs': 281}. Best is trial 32 with value: 0.7604109329548373.


Epoch 281/281, Loss: 0.037140230445758156
0.7474451562318463
Build model with 2 layers of attention
Epoch 1/247, Loss: 1.5082663411185855
0.525220514058416
Epoch 11/247, Loss: 0.9293409131822132
0.5558633816705374
Epoch 21/247, Loss: 0.5316635725044069
0.6178174957889961
Epoch 31/247, Loss: 0.3213460303488232
0.6355366380499973
Epoch 41/247, Loss: 0.2167749816463107
0.657280678122681
Epoch 51/247, Loss: 0.17146892774672734
0.6809011701701798
Epoch 61/247, Loss: 0.15236198547340574
0.6892365027565427
Epoch 71/247, Loss: 0.14309051065217882
0.6981454397448301
Epoch 81/247, Loss: 0.1361120655423119
0.701009314115188
Epoch 91/247, Loss: 0.12977602723098935
0.7171868961953717
Epoch 101/247, Loss: 0.12504831914390838
0.7130438954639232
Epoch 111/247, Loss: 0.11757972623620715
0.7248528386128534
Epoch 121/247, Loss: 0.11116845018806912
0.7265443886008468
Epoch 131/247, Loss: 0.10411402121895835
0.732586460068718
Epoch 141/247, Loss: 0.09788167476654053
0.7373276815341447
Epoch 151/247, Loss: 

[I 2024-01-23 01:25:26,543] Trial 38 finished with value: 0.7530579056134836 and parameters: {'hidden_dim_h': 19, 'dropout': 0.0910135221822881, 'batch_size': 1148, 'n_epochs': 247}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/184, Loss: 0.19139916990317551
0.558284006419174
Epoch 11/184, Loss: 0.09561049792112089
0.6499266181112245
Epoch 21/184, Loss: 0.08470703091691523
0.6789057145367978
Epoch 31/184, Loss: 0.07952424332353414
0.6892996917801768
Epoch 41/184, Loss: 0.0741680180836542
0.7064577499256404
Epoch 51/184, Loss: 0.0698004314018523
0.7129924117790588
Epoch 61/184, Loss: 0.06553354906831302
0.7261681198232286
Epoch 71/184, Loss: 0.06267692685565528
0.7258680962640076
Epoch 81/184, Loss: 0.05982242595842656
0.7310512023940493
Epoch 91/184, Loss: 0.056485239252009815
0.7330871505025301
Epoch 101/184, Loss: 0.05522608767975779
0.737985810304878
Epoch 111/184, Loss: 0.05348106849865586
0.7409626482256111
Epoch 121/184, Loss: 0.05119754923690183
0.7426641022403413
Epoch 131/184, Loss: 0.049557784079190564
0.7496812146552229
Epoch 141/184, Loss: 0.048478503611084876
0.7432640278059118
Epoch 151/184, Loss: 0.04757633804361902
0.7398668966199273
Epoch 161/184

[I 2024-01-23 01:27:55,527] Trial 39 finished with value: 0.7467627917262607 and parameters: {'hidden_dim_h': 16, 'dropout': 0.07733349551294683, 'batch_size': 117, 'n_epochs': 184}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/244, Loss: 1.7350985904534657
0.5363334183504491
Epoch 11/244, Loss: 1.0027887175480525
0.5712366410689581
Epoch 21/244, Loss: 0.5924289673566818
0.6110420606669394
Epoch 31/244, Loss: 0.3778910810748736
0.6300268781841318
Epoch 41/244, Loss: 0.2788510049382846
0.6540097994586578
Epoch 51/244, Loss: 0.23798413636783758
0.6630715161871578
Epoch 61/244, Loss: 0.21933309423426786
0.6804329363788189
Epoch 71/244, Loss: 0.20664993363122144
0.6595364111786644
Epoch 81/244, Loss: 0.19521864814062914
0.6552806648871028
Epoch 91/244, Loss: 0.18325998447835445
0.6957503337315397
Epoch 101/244, Loss: 0.17065837979316711
0.6937238373049077
Epoch 111/244, Loss: 0.15805461754401526
0.7031615472333937
Epoch 121/244, Loss: 0.14540527823070684
0.7157932145263243
Epoch 131/244, Loss: 0.13230406492948532
0.7197143822413201
Epoch 141/244, Loss: 0.12057138948390882
0.711634000542122
Epoch 151/244, Loss: 0.10846427641808987
0.7289815935805709
Epoch 161/244, Los

[I 2024-01-23 01:29:18,008] Trial 40 finished with value: 0.7435059353679713 and parameters: {'hidden_dim_h': 22, 'dropout': 0.2263018567038168, 'batch_size': 1004, 'n_epochs': 244}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/215, Loss: 0.1937399130800496
0.5563359658765755
Epoch 11/215, Loss: 0.14544051043365314
0.6275902361981768
Epoch 21/215, Loss: 0.12336109902547754
0.6576208662371291
Epoch 31/215, Loss: 0.10401739050512729
0.6705106262681314
Epoch 41/215, Loss: 0.09479238740775896
0.6827045752461703
Epoch 51/215, Loss: 0.08308764255565146
0.6989289084961682
Epoch 61/215, Loss: 0.07536638787259227
0.7124625566856916
Epoch 71/215, Loss: 0.07118973978187727
0.7026911874478483
Epoch 81/215, Loss: 0.06688466441372166
0.720848701081194
Epoch 91/215, Loss: 0.061634193457987
0.7251819029086656
Epoch 101/215, Loss: 0.05866278837556425
0.7283392299044735
Epoch 111/215, Loss: 0.055561095962057945
0.7397584369853305
Epoch 121/215, Loss: 0.054830638934736664
0.7416515347399654
Epoch 131/215, Loss: 0.05079900165614874
0.7437037683138823
Epoch 141/215, Loss: 0.04902063520706218
0.7439464541773729
Epoch 151/215, Loss: 0.04733007887135381
0.747495480976725
Epoch 161/215, 

[I 2024-01-23 01:30:37,618] Trial 41 finished with value: 0.7522597942397694 and parameters: {'hidden_dim_h': 27, 'dropout': 0.0836195860597996, 'batch_size': 1081, 'n_epochs': 215}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/134, Loss: 0.2761343029829172
0.5366560005279969
Epoch 11/134, Loss: 0.1990795794587869
0.6275350862072913
Epoch 21/134, Loss: 0.16496839832801086
0.646580308205156
Epoch 31/134, Loss: 0.13573573185847357
0.6659653845638158
Epoch 41/134, Loss: 0.111968700415813
0.6948799542252885
Epoch 51/134, Loss: 0.09252646278876525
0.7086784092430635
Epoch 61/134, Loss: 0.07627933730299656
0.723826126388057
Epoch 71/134, Loss: 0.06511154512946422
0.7337627833582027
Epoch 81/134, Loss: 0.056092592099538215
0.7441443329758671
Epoch 91/134, Loss: 0.05054073881071348
0.742065367508913
Epoch 101/134, Loss: 0.04567162907467438
0.7470955163300933
Epoch 111/134, Loss: 0.04223153826135855
0.740950069989449
Epoch 121/134, Loss: 0.03988712238004574
0.7482345768071592
Epoch 131/134, Loss: 0.03674992618079369
0.7562232762453254


[I 2024-01-23 01:31:27,436] Trial 42 finished with value: 0.7562232762453254 and parameters: {'hidden_dim_h': 31, 'dropout': 0.06612061889406166, 'batch_size': 936, 'n_epochs': 134}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/96, Loss: 0.3738802792849364
0.5557196898291914
Epoch 11/96, Loss: 0.22112693885962167
0.6296553338214288
Epoch 21/96, Loss: 0.1931599814582754
0.6496714460080024
Epoch 31/96, Loss: 0.16757692010314376
0.6624857376758131
Epoch 41/96, Loss: 0.1438520296856209
0.6864450660546619
Epoch 51/96, Loss: 0.12177155691164511
0.7077032510663989
Epoch 61/96, Loss: 0.10598865096215848
0.7015782389741343
Epoch 71/96, Loss: 0.08990515740933241
0.7177829131697055
Epoch 81/96, Loss: 0.07641897322954955
0.7270676639616652
Epoch 91/96, Loss: 0.06605798102639339
0.7375395969276124


[I 2024-01-23 01:32:02,580] Trial 43 finished with value: 0.7375395969276124 and parameters: {'hidden_dim_h': 31, 'dropout': 0.05078813325959107, 'batch_size': 905, 'n_epochs': 96}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/134, Loss: 0.2341171900431315
0.5616657398042065
Epoch 11/134, Loss: 0.19523408370358603
0.6108803337521236
Epoch 21/134, Loss: 0.1678411052340553
0.6470832538187585
Epoch 31/134, Loss: 0.14496196664514996
0.6608794963728847
Epoch 41/134, Loss: 0.12673232101258777
0.6810809232646506
Epoch 51/134, Loss: 0.11055684550887063
0.6894074130676044
Epoch 61/134, Loss: 0.09730123373724166
0.7028862362833905
Epoch 71/134, Loss: 0.08673327025913057
0.7116061652191791
Epoch 81/134, Loss: 0.07810095733120329
0.7254991599424705
Epoch 91/134, Loss: 0.07074680143878573
0.7276305812437777
Epoch 101/134, Loss: 0.06514981087474596
0.732121377250042
Epoch 111/134, Loss: 0.06143360159226826
0.7314789364061047
Epoch 121/134, Loss: 0.058022581395648774
0.7349990251504716
Epoch 131/134, Loss: 0.05512559520346778
0.7358324412668162


[I 2024-01-23 01:32:55,076] Trial 44 finished with value: 0.7358324412668162 and parameters: {'hidden_dim_h': 35, 'dropout': 0.2965217739731757, 'batch_size': 1154, 'n_epochs': 134}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/126, Loss: 0.29603637382388115
0.5436892337076444
Epoch 11/126, Loss: 0.22395853574077287
0.6163379691555753
Epoch 21/126, Loss: 0.18660375910500684
0.6395495198480516
Epoch 31/126, Loss: 0.15302328330775103
0.6780475055577655
Epoch 41/126, Loss: 0.12719230881581703
0.6976969118828843
Epoch 51/126, Loss: 0.10472503987451394
0.7089380484142365
Epoch 61/126, Loss: 0.08754003730913003
0.7286601713846247
Epoch 71/126, Loss: 0.07413145744552214
0.7329603822962056
Epoch 81/126, Loss: 0.06517113822822769
0.7423993135412259
Epoch 91/126, Loss: 0.05873119365423918
0.7422260051801812
Epoch 101/126, Loss: 0.053663557240118585
0.7446407536237084
Epoch 111/126, Loss: 0.05063078940535585
0.7481873423147095
Epoch 121/126, Loss: 0.04804668854922056
0.7519756484254984


[I 2024-01-23 01:33:38,516] Trial 45 finished with value: 0.7519756484254984 and parameters: {'hidden_dim_h': 20, 'dropout': 0.06704065856314746, 'batch_size': 1029, 'n_epochs': 126}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/140, Loss: 0.24110736542691788
0.557340985211774
Epoch 11/140, Loss: 0.13821310782805085
0.6325755061665739
Epoch 21/140, Loss: 0.11646402316788833
0.6576362888199768
Epoch 31/140, Loss: 0.09791271698971589
0.6766008093409592
Epoch 41/140, Loss: 0.08373068242023389
0.7034594826417095
Epoch 51/140, Loss: 0.07372851972468197
0.7170715900118032
Epoch 61/140, Loss: 0.06809846979255478
0.7226458310627276
Epoch 71/140, Loss: 0.06231924061042567
0.725556599953133
Epoch 81/140, Loss: 0.05823492289831241
0.7182325159166855
Epoch 91/140, Loss: 0.055032886719952025
0.7387239042713123
Epoch 101/140, Loss: 0.05244524908872942
0.7435593807115319
Epoch 111/140, Loss: 0.050141648932670556
0.7384835693866613
Epoch 121/140, Loss: 0.04741445082860688
0.7445700686128173
Epoch 131/140, Loss: 0.04634879754545788
0.7376417959268414


[I 2024-01-23 01:34:31,042] Trial 46 finished with value: 0.7376417959268414 and parameters: {'hidden_dim_h': 24, 'dropout': 0.10797905157130361, 'batch_size': 506, 'n_epochs': 140}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/107, Loss: 0.3543247973918915
0.5280800444601298
Epoch 11/107, Loss: 0.25083050310611726
0.6054702888532585
Epoch 21/107, Loss: 0.21511951506137847
0.6365942709422764
Epoch 31/107, Loss: 0.18207593262195587
0.6604104137809217
Epoch 41/107, Loss: 0.15262195885181426
0.6838309383594532
Epoch 51/107, Loss: 0.12718481302261353
0.6984504365053784
Epoch 61/107, Loss: 0.10639601439237595
0.7100732444983321
Epoch 71/107, Loss: 0.09075992047786713
0.7217002408895113
Epoch 81/107, Loss: 0.07897465169429779
0.7283397813704472
Epoch 91/107, Loss: 0.06993775516748428
0.7315414994555479
Epoch 101/107, Loss: 0.06399296954274178
0.7330024388659235


[I 2024-01-23 01:35:04,655] Trial 47 finished with value: 0.7330024388659235 and parameters: {'hidden_dim_h': 13, 'dropout': 0.06904301303226892, 'batch_size': 968, 'n_epochs': 107}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/160, Loss: 0.28480370535004523
0.5633739273509871
Epoch 11/160, Loss: 0.1767958657395455
0.6399861858118965
Epoch 21/160, Loss: 0.16125965454886038
0.6347774709295663
Epoch 31/160, Loss: 0.13881055049357877
0.6556570406033936
Epoch 41/160, Loss: 0.12228727821380861
0.6646133574820419
Epoch 51/160, Loss: 0.10663482114191979
0.6745535116194575
Epoch 61/160, Loss: 0.09118748792717533
0.7111661413474072
Epoch 71/160, Loss: 0.0797032723503728
0.7125308601189224
Epoch 81/160, Loss: 0.07421278472869627
0.7211877408475984
Epoch 91/160, Loss: 0.06661479763926999
0.7286120147907649
Epoch 101/160, Loss: 0.05943719165459756
0.7367766698558245
Epoch 111/160, Loss: 0.05673445316572343
0.7450731676931145
Epoch 121/160, Loss: 0.0529357228548296
0.7433233606390328
Epoch 131/160, Loss: 0.04993423507098229
0.7427686178070261
Epoch 141/160, Loss: 0.04821358068335441
0.7401429833058498
Epoch 151/160, Loss: 0.04638157869058271
0.7381904012434213


[I 2024-01-23 01:36:16,388] Trial 48 finished with value: 0.7381904012434213 and parameters: {'hidden_dim_h': 43, 'dropout': 0.09128053614191291, 'batch_size': 791, 'n_epochs': 160}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/70, Loss: 2.5745693274906705
0.5163445474455756
Epoch 11/70, Loss: 1.7822664294924055
0.5308197593072516
Epoch 21/70, Loss: 1.1687650510243006
0.5555261612567081
Epoch 31/70, Loss: 0.8348851118768964
0.05977209289960636
Epoch 41/70, Loss: 0.5509459504059383
0.07972757123479748
Epoch 51/70, Loss: 0.3423471096016112
0.5469863228866547
Epoch 61/70, Loss: 0.25349033446539015
0.5858741884093688


[I 2024-01-23 01:36:42,287] Trial 49 finished with value: 0.5858741884093688 and parameters: {'hidden_dim_h': 30, 'dropout': 0.06454139097616854, 'batch_size': 1193, 'n_epochs': 70}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/36, Loss: 0.5316709224824552
0.1559778096147184
Epoch 11/36, Loss: 0.2948015144577733
0.03186646257197305
Epoch 21/36, Loss: 0.2607952599172239
0.02599616857486262
Epoch 31/36, Loss: 0.2575795479394771
0.025390225635725995


[I 2024-01-23 01:36:56,976] Trial 50 finished with value: 0.025390225635725995 and parameters: {'hidden_dim_h': 37, 'dropout': 0.2978688051236658, 'batch_size': 911, 'n_epochs': 36}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/231, Loss: 0.9023631648583845
0.5113761811984421
Epoch 11/231, Loss: 0.4763611704111099
0.6120718634266902
Epoch 21/231, Loss: 0.2947706092487682
0.6415949979810557
Epoch 31/231, Loss: 0.2254932001233101
0.6559801975380047
Epoch 41/231, Loss: 0.20078721574761652
0.678578619900978
Epoch 51/231, Loss: 0.18743126636201685
0.6885534734230253
Epoch 61/231, Loss: 0.17379196055910803
0.697636796330853
Epoch 71/231, Loss: 0.16086372394453397
0.7195555869255921
Epoch 81/231, Loss: 0.1470428698442199
0.7252637730420709
Epoch 91/231, Loss: 0.1333747770298611
0.7221287291872908
Epoch 101/231, Loss: 0.12052516469901259
0.726769855818247
Epoch 111/231, Loss: 0.10771007598801093
0.7324945380895892
Epoch 121/231, Loss: 0.09568748521533879
0.7416434883913012
Epoch 131/231, Loss: 0.085242705588991
0.7333875797125282
Epoch 141/231, Loss: 0.07510802149772644
0.7360058473223592
Epoch 151/231, Loss: 0.06687937033447353
0.7406169814007518
Epoch 161/231, Loss: 0.

[I 2024-01-23 01:38:19,224] Trial 51 finished with value: 0.7395794334880447 and parameters: {'hidden_dim_h': 27, 'dropout': 0.08369599392689472, 'batch_size': 1105, 'n_epochs': 231}. Best is trial 32 with value: 0.7604109329548373.


Epoch 231/231, Loss: 0.0340551363134926
0.7395794334880447
Build model with 2 layers of attention
Epoch 1/161, Loss: 0.3179013923458431
0.5416200563191917
Epoch 11/161, Loss: 0.21856083364590354
0.6217278960749617
Epoch 21/161, Loss: 0.18911581454069717
0.644848018096628
Epoch 31/161, Loss: 0.16318315721076468
0.6712272533734346
Epoch 41/161, Loss: 0.13878827574460403
0.6829030440335396
Epoch 51/161, Loss: 0.11865545906450438
0.6843927251043291
Epoch 61/161, Loss: 0.09970223158597946
0.7203271863471953
Epoch 71/161, Loss: 0.08591246831676234
0.7291710980660886
Epoch 81/161, Loss: 0.07458773449711177
0.7375086608097762
Epoch 91/161, Loss: 0.0667829085951266
0.7380577637914694
Epoch 101/161, Loss: 0.060090975107058235
0.7448551833016706
Epoch 111/161, Loss: 0.055442514140968735
0.7475482638586628
Epoch 121/161, Loss: 0.05141771924884423
0.7512475839835473
Epoch 131/161, Loss: 0.048859145330346146
0.7488557753611897
Epoch 141/161, Loss: 0.04665878577076871
0.7536221789881531
Epoch 151/161

[I 2024-01-23 01:39:13,924] Trial 52 finished with value: 0.7491334662844429 and parameters: {'hidden_dim_h': 23, 'dropout': 0.11906863572743263, 'batch_size': 1064, 'n_epochs': 161}. Best is trial 32 with value: 0.7604109329548373.


Epoch 161/161, Loss: 0.04345656720840413
0.7491334662844429
Build model with 2 layers of attention
Epoch 1/210, Loss: 0.16322064585983753
0.5535946620211118
Epoch 11/210, Loss: 0.11679977644234896
0.6449989571173321
Epoch 21/210, Loss: 0.10005366771171491
0.6687629967840664
Epoch 31/210, Loss: 0.08879724144935608
0.6809465636442847
Epoch 41/210, Loss: 0.0798016581684351
0.7070163710966089
Epoch 51/210, Loss: 0.07023570810755093
0.7161865410512076
Epoch 61/210, Loss: 0.06541433138772845
0.7238779594533901
Epoch 71/210, Loss: 0.060262137868752085
0.734656474162718
Epoch 81/210, Loss: 0.05700290839498242
0.7370284678746108
Epoch 91/210, Loss: 0.05340546788647771
0.7406217906590666
Epoch 101/210, Loss: 0.050915385130792856
0.743877152138719
Epoch 111/210, Loss: 0.049497634482880436
0.7434528731974194
Epoch 121/210, Loss: 0.04728574181596438
0.7494876283806968
Epoch 131/210, Loss: 0.045325128361582756
0.7475887863864704
Epoch 141/210, Loss: 0.04402459102372328
0.7501866962133257
Epoch 151/2

[I 2024-01-23 01:40:28,581] Trial 53 finished with value: 0.7512585952361749 and parameters: {'hidden_dim_h': 26, 'dropout': 0.12724166748126825, 'batch_size': 1001, 'n_epochs': 210}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/184, Loss: 0.35671396482558476
0.5473858919212857
Epoch 11/184, Loss: 0.23908027935595738
0.6285656293609871
Epoch 21/184, Loss: 0.2105721646831149
0.6500840018405447
Epoch 31/184, Loss: 0.18549029954842158
0.671045952464673
Epoch 41/184, Loss: 0.16045450951371873
0.6794696047397749
Epoch 51/184, Loss: 0.13890402089981807
0.705762866199175
Epoch 61/184, Loss: 0.1170932228366534
0.7128841867115496
Epoch 71/184, Loss: 0.10030745040802728
0.7192356648834534
Epoch 81/184, Loss: 0.08520289865278062
0.7291472694975282
Epoch 91/184, Loss: 0.07322271842332113
0.7367533522278019
Epoch 101/184, Loss: 0.06422628178482964
0.7344269499678292
Epoch 111/184, Loss: 0.056401398919877554
0.743175888531623
Epoch 121/184, Loss: 0.050629141075270515
0.7427451820778587
Epoch 131/184, Loss: 0.046433521523362116
0.7433170230896913
Epoch 141/184, Loss: 0.0428322899554457
0.7477133339975401
Epoch 151/184, Loss: 0.039409207978418896
0.7489796008230754
Epoch 161/184,

[I 2024-01-23 01:41:34,757] Trial 54 finished with value: 0.7415890716170266 and parameters: {'hidden_dim_h': 29, 'dropout': 0.09588527701875137, 'batch_size': 1155, 'n_epochs': 184}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/238, Loss: 0.23009788277356522
0.5420637895141925
Epoch 11/238, Loss: 0.17433423840481302
0.6330211664314781
Epoch 21/238, Loss: 0.14376133291617685
0.6531500313134622
Epoch 31/238, Loss: 0.12088575052178424
0.6639509960660492
Epoch 41/238, Loss: 0.10416600108146667
0.6703971550982301
Epoch 51/238, Loss: 0.0915906565344852
0.6927528488818545
Epoch 61/238, Loss: 0.08027520160312238
0.7057734077519432
Epoch 71/238, Loss: 0.07185212069231531
0.7164980934629092
Epoch 81/238, Loss: 0.06552153096898743
0.7305303539710531
Epoch 91/238, Loss: 0.05999508969809698
0.7357653212905685
Epoch 101/238, Loss: 0.05610506738657537
0.7415468351976325
Epoch 111/238, Loss: 0.05439360083445259
0.7396725118357158
Epoch 121/238, Loss: 0.05126803870434347
0.7457458840548982
Epoch 131/238, Loss: 0.04983502010936323
0.7375614586067653
Epoch 141/238, Loss: 0.04747869122935378
0.7500408204695852
Epoch 151/238, Loss: 0.04588407731574515
0.7456871367924268
Epoch 161/238

[I 2024-01-23 01:43:11,051] Trial 55 finished with value: 0.749180997544585 and parameters: {'hidden_dim_h': 33, 'dropout': 0.18996632412922765, 'batch_size': 1041, 'n_epochs': 238}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/273, Loss: 0.2539359358224002
0.5487147558147761
Epoch 11/273, Loss: 0.12585597654635255
0.6284412361266402
Epoch 21/273, Loss: 0.11061048304492777
0.6522008404671409
Epoch 31/273, Loss: 0.10366282476620241
0.6762713063340855
Epoch 41/273, Loss: 0.09577093713662842
0.6931708544623406
Epoch 51/273, Loss: 0.08639542453668335
0.7088155057279011
Epoch 61/273, Loss: 0.07832801071080295
0.7266918502312459
Epoch 71/273, Loss: 0.07134699313478036
0.7363864665709885
Epoch 81/273, Loss: 0.06409008360721848
0.7464749998295377
Epoch 91/273, Loss: 0.058791952546347274
0.741236080644912
Epoch 101/273, Loss: 0.05420866388488899
0.7521751102763561
Epoch 111/273, Loss: 0.04945791377262636
0.7553706813605056
Epoch 121/273, Loss: 0.04725228588689457
0.754223472871902
Epoch 131/273, Loss: 0.04440335285934535
0.7539400192438886
Epoch 141/273, Loss: 0.04179803468286991
0.7622232124480804
Epoch 151/273, Loss: 0.040682816369967026
0.7581784847934883
Epoch 161/273

[I 2024-01-23 01:44:50,609] Trial 56 finished with value: 0.7476453777584022 and parameters: {'hidden_dim_h': 31, 'dropout': 0.10797928934773243, 'batch_size': 1104, 'n_epochs': 273}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/224, Loss: 0.2299227909399913
0.549252318899982
Epoch 11/224, Loss: 0.17366120849664396
0.6130526885816938
Epoch 21/224, Loss: 0.14057917061906594
0.6452119253186617
Epoch 31/224, Loss: 0.11685290818031017
0.6691152590091102
Epoch 41/224, Loss: 0.09985046575848873
0.68934908675835
Epoch 51/224, Loss: 0.08892290036265667
0.6990904998433621
Epoch 61/224, Loss: 0.07678987744909066
0.7160236122783536
Epoch 71/224, Loss: 0.07010679348157002
0.7225264638824248
Epoch 81/224, Loss: 0.06432040365269551
0.7318069798220105
Epoch 91/224, Loss: 0.05914538568602158
0.7362648825639663
Epoch 101/224, Loss: 0.056159645032424196
0.736876648598726
Epoch 111/224, Loss: 0.053773317486047745
0.7468082121594212
Epoch 121/224, Loss: 0.05234003238953077
0.7430860242605493
Epoch 131/224, Loss: 0.05002647122511497
0.7454589815857338
Epoch 141/224, Loss: 0.04788391406719501
0.7444151923512418
Epoch 151/224, Loss: 0.04635134124411987
0.7486204562995429
Epoch 161/224, 

[I 2024-01-23 01:46:10,278] Trial 57 finished with value: 0.7443029602042819 and parameters: {'hidden_dim_h': 25, 'dropout': 0.14330815230881513, 'batch_size': 950, 'n_epochs': 224}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/118, Loss: 0.16831046223640442
0.5541580462098508
Epoch 11/118, Loss: 0.12205620586872101
0.6339197167537048
Epoch 21/118, Loss: 0.10431171745061875
0.6591037670770912
Epoch 31/118, Loss: 0.0921984589099884
0.6799282484419209
Epoch 41/118, Loss: 0.08172227278351783
0.6943074764111065
Epoch 51/118, Loss: 0.07613800197839737
0.7021094180014564
Epoch 61/118, Loss: 0.07038328319787979
0.7180248631246295
Epoch 71/118, Loss: 0.06591599091887473
0.72848323213344
Epoch 81/118, Loss: 0.06120778068900108
0.7331615596938201
Epoch 91/118, Loss: 0.058136672377586365
0.7425799169992994
Epoch 101/118, Loss: 0.055613164007663723
0.7414148449115516
Epoch 111/118, Loss: 0.05194980904459953
0.7420652638193358


[I 2024-01-23 01:46:54,032] Trial 58 finished with value: 0.7420652638193358 and parameters: {'hidden_dim_h': 28, 'dropout': 0.12490906607894112, 'batch_size': 990, 'n_epochs': 118}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/248, Loss: 0.6329545024782419
0.5396593167685204
Epoch 11/248, Loss: 0.2505061710253358
0.6322671840774405
Epoch 21/248, Loss: 0.1798120434395969
0.6561069504955199
Epoch 31/248, Loss: 0.1591376792639494
0.67822263073648
Epoch 41/248, Loss: 0.14295367384329438
0.6921320114145222
Epoch 51/248, Loss: 0.12607542192563415
0.7121358219578577
Epoch 61/248, Loss: 0.10917498101480305
0.7224201151017264
Epoch 71/248, Loss: 0.0935407904908061
0.7212569202328436
Epoch 81/248, Loss: 0.07966901129111648
0.7375781801306074
Epoch 91/248, Loss: 0.06873347889631987
0.7399235850264937
Epoch 101/248, Loss: 0.05867626261897385
0.7379722287114485
Epoch 111/248, Loss: 0.05158847139682621
0.7412517858792826
Epoch 121/248, Loss: 0.04594400676432997
0.7417894428228129
Epoch 131/248, Loss: 0.04222322814166546
0.7425263028547413
Epoch 141/248, Loss: 0.0386921635363251
0.7487490898221835
Epoch 151/248, Loss: 0.03672223194735125
0.7499178920706725
Epoch 161/248, Loss:

[I 2024-01-23 01:48:32,950] Trial 59 finished with value: 0.7491530852141516 and parameters: {'hidden_dim_h': 34, 'dropout': 0.09816365761995503, 'batch_size': 747, 'n_epochs': 248}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/198, Loss: 0.8495090135506221
0.5554826165307551
Epoch 11/198, Loss: 0.4486952828509467
0.01554796537632141
Epoch 21/198, Loss: 0.2881425033722605
0.027374184350604437
Epoch 31/198, Loss: 0.26190614700317383
0.0073196177259360765
Epoch 41/198, Loss: 0.2598477910671915
0.009789641287882856
Epoch 51/198, Loss: 0.2582993448844978
0.03595507218956254
Epoch 61/198, Loss: 0.2588298214333398
0.024666704671527023
Epoch 71/198, Loss: 0.2561614816742284
0.058093809394900164
Epoch 81/198, Loss: 0.2563883789948055
0.04951089571743983
Epoch 91/198, Loss: 0.25427717555846485
0.0862894118000572
Epoch 101/198, Loss: 0.25482916831970215
0.09187233750092158
Epoch 111/198, Loss: 0.25507803154843195
0.05947061832532725
Epoch 121/198, Loss: 0.2564723454415798
0.05489613027875846
Epoch 131/198, Loss: 0.2553561750267233
0.07139151974158638
Epoch 141/198, Loss: 0.25878523343375753
0.027733908343188767
Epoch 151/198, Loss: 0.25595630758575033
0.039151898464943734


[I 2024-01-23 01:49:46,107] Trial 60 finished with value: 0.034735593689076756 and parameters: {'hidden_dim_h': 31, 'dropout': 0.16551341486165358, 'batch_size': 870, 'n_epochs': 198}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/254, Loss: 0.4746595096588135
0.5407205558432386
Epoch 11/254, Loss: 0.2902617907524109
0.6179794504965548
Epoch 21/254, Loss: 0.25017285645008086
0.6281708872730788
Epoch 31/254, Loss: 0.22147699058055878
0.6475496684874573
Epoch 41/254, Loss: 0.1939543092250824
0.6574585516016236
Epoch 51/254, Loss: 0.16833807706832885
0.654849195872659
Epoch 61/254, Loss: 0.144273122549057
0.6932693504510904
Epoch 71/254, Loss: 0.12405912131071091
0.689970394350755
Epoch 81/254, Loss: 0.1068873631954193
0.7157444559209328
Epoch 91/254, Loss: 0.09138772904872894
0.7297224215810878
Epoch 101/254, Loss: 0.07893377929925918
0.7300697412863457
Epoch 111/254, Loss: 0.06904020860791206
0.7376188446054073
Epoch 121/254, Loss: 0.06175481736660004
0.742566215549629
Epoch 131/254, Loss: 0.055843540281057355
0.7485152887372925
Epoch 141/254, Loss: 0.050874262154102325
0.7471200654456636
Epoch 151/254, Loss: 0.04765703305602074
0.7507710614876666
Epoch 161/254, Loss

[I 2024-01-23 01:51:19,488] Trial 61 finished with value: 0.7537198325045905 and parameters: {'hidden_dim_h': 29, 'dropout': 0.1366736006229373, 'batch_size': 976, 'n_epochs': 254}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/257, Loss: 1.450414838998214
0.5377849790147432
Epoch 11/257, Loss: 0.8604275765626327
0.592687853755403
Epoch 21/257, Loss: 0.5428275828776152
0.6270508648552514
Epoch 31/257, Loss: 0.3873950787212538
0.017901762816133777
Epoch 41/257, Loss: 0.2844315106454103
0.26473833146762343
Epoch 51/257, Loss: 0.21983454538428265
0.6271474483997128
Epoch 61/257, Loss: 0.19880591008974158
0.6449806607148721
Epoch 71/257, Loss: 0.18741116640360458
0.6652724748280036
Epoch 81/257, Loss: 0.17709687222605167
0.6753265889307206
Epoch 91/257, Loss: 0.16661971094815628
0.6874858455622519
Epoch 101/257, Loss: 0.15552211973978125
0.692416404081537
Epoch 111/257, Loss: 0.14407990484134012
0.7057045876060519
Epoch 121/257, Loss: 0.1316334802819335
0.7100421485656785
Epoch 131/257, Loss: 0.11983990604462831
0.7188987320164242
Epoch 141/257, Loss: 0.10863751788502155
0.7187132690269958
Epoch 151/257, Loss: 0.09704678764809734
0.7287452057602392
Epoch 161/257, Los

[I 2024-01-23 01:52:51,412] Trial 62 finished with value: 0.7535106745261724 and parameters: {'hidden_dim_h': 26, 'dropout': 0.07102374666155259, 'batch_size': 1054, 'n_epochs': 257}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/229, Loss: 3.326090996915644
0.5002597907479914
Epoch 11/229, Loss: 2.3210090615532617
0.5390580051807339
Epoch 21/229, Loss: 1.6095650683749805
0.567865478662357
Epoch 31/229, Loss: 1.119376453486356
0.6023178277433392
Epoch 41/229, Loss: 0.7193570137023926
0.6030505670178482
Epoch 51/229, Loss: 0.45436526428569446
0.629946350860807
Epoch 61/229, Loss: 0.30085833777080884
0.6507047225305814
Epoch 71/229, Loss: 0.21830049563537945
0.6695644202082333
Epoch 81/229, Loss: 0.17670153216882187
0.6822905262390673
Epoch 91/229, Loss: 0.15562503852627493
0.6953633780512855
Epoch 101/229, Loss: 0.14499714564193378
0.7063826537192912
Epoch 111/229, Loss: 0.13747512583028187
0.7164812538090845
Epoch 121/229, Loss: 0.13224003531716086
0.7193689192424275
Epoch 131/229, Loss: 0.12572360546751457
0.7286941073825829
Epoch 141/229, Loss: 0.12056479467587038
0.725458154011062
Epoch 151/229, Loss: 0.11521377482197502
0.7343085044801348
Epoch 161/229, Loss: 0

[I 2024-01-23 01:54:13,443] Trial 63 finished with value: 0.7447012456676088 and parameters: {'hidden_dim_h': 28, 'dropout': 0.12730775584131868, 'batch_size': 1111, 'n_epochs': 229}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/275, Loss: 0.7936635590516604
0.5408939092936369
Epoch 11/275, Loss: 0.3616434198159438
0.634831332318616
Epoch 21/275, Loss: 0.205104533296365
0.6329981311723683
Epoch 31/275, Loss: 0.16040445291079009
0.684437944851024
Epoch 41/275, Loss: 0.14539472529521355
0.6995443478733782
Epoch 51/275, Loss: 0.13530622881192428
0.7213481289065116
Epoch 61/275, Loss: 0.12456806720449375
0.7316497105955992
Epoch 71/275, Loss: 0.11509111265723522
0.7280430470216908
Epoch 81/275, Loss: 0.10455688662253894
0.7379716884887821
Epoch 91/275, Loss: 0.09438960512097065
0.7400701979330288
Epoch 101/275, Loss: 0.08463003744299595
0.7408845806752575
Epoch 111/275, Loss: 0.0757510275221788
0.7418147672197757
Epoch 121/275, Loss: 0.0672259355107179
0.7438735672316128
Epoch 131/275, Loss: 0.0597739527718379
0.7492007880972115
Epoch 141/275, Loss: 0.0541510061862377
0.7484774810931341
Epoch 151/275, Loss: 0.04956747519855316
0.7448281933409957
Epoch 161/275, Loss: 0

[I 2024-01-23 01:55:47,855] Trial 64 finished with value: 0.743087908193273 and parameters: {'hidden_dim_h': 23, 'dropout': 0.07966714075089851, 'batch_size': 935, 'n_epochs': 275}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/291, Loss: 0.23348452079863774
0.5364346584003012
Epoch 11/291, Loss: 0.1654106094723656
0.6341476551295415
Epoch 21/291, Loss: 0.14153034062612624
0.6551806416299844
Epoch 31/291, Loss: 0.1218637391215279
0.6702695956146204
Epoch 41/291, Loss: 0.10556877581846147
0.6841901594165364
Epoch 51/291, Loss: 0.09329711965152196
0.7022342971932748
Epoch 61/291, Loss: 0.08238502627327329
0.7118140759016031
Epoch 71/291, Loss: 0.07341764547995158
0.7195978741001335
Epoch 81/291, Loss: 0.06523499886194865
0.7239203016953415
Epoch 91/291, Loss: 0.05807564336629141
0.7380949572716551
Epoch 101/291, Loss: 0.054220342210360935
0.7405128563211609
Epoch 111/291, Loss: 0.04846580699086189
0.7375903705192938
Epoch 121/291, Loss: 0.0459406611820062
0.7491819614367156
Epoch 131/291, Loss: 0.04232475569560414
0.7412423156422854
Epoch 141/291, Loss: 0.041047402790614536
0.7461969907643816
Epoch 151/291, Loss: 0.0388336657058625
0.7443665081630391
Epoch 161/291,

[I 2024-01-23 01:57:35,205] Trial 65 finished with value: 0.7441498703171243 and parameters: {'hidden_dim_h': 30, 'dropout': 0.0600576287800437, 'batch_size': 1162, 'n_epochs': 291}. Best is trial 32 with value: 0.7604109329548373.


Epoch 291/291, Loss: 0.026174977953944887
0.7441498703171243
Build model with 2 layers of attention
Epoch 1/175, Loss: 0.4781518890099092
0.5082562361942919
Epoch 11/175, Loss: 0.23290874063968658
0.62046798079074
Epoch 21/175, Loss: 0.16577397896484894
0.6457521942667122
Epoch 31/175, Loss: 0.1493261761286042
0.6683575889405592
Epoch 41/175, Loss: 0.1429306207732721
0.6656900361977205
Epoch 51/175, Loss: 0.13720056143673984
0.6619495036032034
Epoch 61/175, Loss: 0.12121681327169592
0.7119676934463899
Epoch 71/175, Loss: 0.11156622083349661
0.7156591853784363
Epoch 81/175, Loss: 0.10110401700843465
0.7223891757589713
Epoch 91/175, Loss: 0.0921836867928505
0.7374696150889941
Epoch 101/175, Loss: 0.08307181840593164
0.7417550329173196
Epoch 111/175, Loss: 0.0739137360995466
0.745398359463732
Epoch 121/175, Loss: 0.06747754104435444
0.7474640004400347
Epoch 131/175, Loss: 0.06015281684019349
0.7501232418979462
Epoch 141/175, Loss: 0.054681559685956345
0.7525079170820537
Epoch 151/175, Los

[I 2024-01-23 01:58:41,495] Trial 66 finished with value: 0.7487753784984207 and parameters: {'hidden_dim_h': 32, 'dropout': 0.1020929599546858, 'batch_size': 1085, 'n_epochs': 175}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/207, Loss: 0.23677888450523218
0.5500116993779844
Epoch 11/207, Loss: 0.19492356044550738
0.6194795035145247
Epoch 21/207, Loss: 0.16458267035583654
0.6453661201566893
Epoch 31/207, Loss: 0.14033863320946693
0.6620008334754954
Epoch 41/207, Loss: 0.12049793762465318
0.6823171024434762
Epoch 51/207, Loss: 0.10391454305499792
0.6953983174721607
Epoch 61/207, Loss: 0.09035419238110383
0.7147284902599592
Epoch 71/207, Loss: 0.07915806428839763
0.7295889967742045
Epoch 81/207, Loss: 0.0709724926079313
0.7255758386103713
Epoch 91/207, Loss: 0.06419279193505645
0.7341593640988383
Epoch 101/207, Loss: 0.059396617424984775
0.7408492639683667
Epoch 111/207, Loss: 0.055923289308945336
0.7448002600431355
Epoch 121/207, Loss: 0.053360863123089075
0.7434289000223966
Epoch 131/207, Loss: 0.050042783531049885
0.7481566980861956
Epoch 141/207, Loss: 0.04977149718130628
0.746544807525346
Epoch 151/207, Loss: 0.047153535299003124
0.7479008573597283
Epoch 161

[I 2024-01-23 01:59:54,734] Trial 67 finished with value: 0.7476872036811315 and parameters: {'hidden_dim_h': 26, 'dropout': 0.17191392545626882, 'batch_size': 1011, 'n_epochs': 207}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/238, Loss: 0.3261803245102918
0.5426250012403069
Epoch 11/238, Loss: 0.18852387423868533
0.6119123356686952
Epoch 21/238, Loss: 0.16531035635206434
0.6450720065959817
Epoch 31/238, Loss: 0.1426329204329738
0.6669446371046988
Epoch 41/238, Loss: 0.1214533519965631
0.6787360179348443
Epoch 51/238, Loss: 0.10288292279949894
0.7013607521337873
Epoch 61/238, Loss: 0.08866736430812765
0.7150147323510267
Epoch 71/238, Loss: 0.07811032704733036
0.7259526685611488
Epoch 81/238, Loss: 0.06860481368170844
0.7145095717947187
Epoch 91/238, Loss: 0.06220667547097913
0.7383263123719241
Epoch 101/238, Loss: 0.05771450077493986
0.7315964960874666
Epoch 111/238, Loss: 0.05478283887108167
0.7402732736362582
Epoch 121/238, Loss: 0.051970617638693914
0.7456321304222974
Epoch 131/238, Loss: 0.05078405790306904
0.7328907083825524
Epoch 141/238, Loss: 0.04902167093974573
0.7404993547580299
Epoch 151/238, Loss: 0.04765111621883181
0.7467380539922881
Epoch 161/238,

[I 2024-01-23 02:01:13,328] Trial 68 finished with value: 0.7495260885064284 and parameters: {'hidden_dim_h': 18, 'dropout': 0.12073700303801906, 'batch_size': 888, 'n_epochs': 238}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/265, Loss: 0.9210060865451128
0.5056717391066544
Epoch 11/265, Loss: 0.3346756230562161
0.6102035679332386
Epoch 21/265, Loss: 0.21328810583322477
0.6361577130338592
Epoch 31/265, Loss: 0.1879048901490676
0.6528271419154922
Epoch 41/265, Loss: 0.1685273269048104
0.6727503785893657
Epoch 51/265, Loss: 0.14839488955644461
0.6859824122198012
Epoch 61/265, Loss: 0.12818850557773542
0.7020039006914809
Epoch 71/265, Loss: 0.11004780385738765
0.7138536349877558
Epoch 81/265, Loss: 0.09362308337138249
0.7191524213398788
Epoch 91/265, Loss: 0.08037600780908878
0.7296856903510672
Epoch 101/265, Loss: 0.06969136410416701
0.7367189859903491
Epoch 111/265, Loss: 0.06252184615303309
0.7400747145037763
Epoch 121/265, Loss: 0.0574612513375588
0.736734597579424
Epoch 131/265, Loss: 0.05339499801779405
0.743680653203827
Epoch 141/265, Loss: 0.05084560754207464
0.7387069439824581
Epoch 151/265, Loss: 0.048889700132302746
0.7501000240883444
Epoch 161/265, Los

[I 2024-01-23 02:02:45,904] Trial 69 finished with value: 0.7526649551052919 and parameters: {'hidden_dim_h': 20, 'dropout': 0.15364414419068406, 'batch_size': 613, 'n_epochs': 265}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/213, Loss: 1.0235647439956665
0.5022996531638682
Epoch 11/213, Loss: 0.4775096809864044
0.597654425403365
Epoch 21/213, Loss: 0.23244400978088378
0.6375053681545731
Epoch 31/213, Loss: 0.13960537195205688
0.6698204403040426
Epoch 41/213, Loss: 0.10990412950515747
0.6897752614276698
Epoch 51/213, Loss: 0.09840743511915206
0.7012371168789496
Epoch 61/213, Loss: 0.09238734722137451
0.7143136244578224
Epoch 71/213, Loss: 0.08710359156131744
0.7233055336332609
Epoch 81/213, Loss: 0.08213483422994614
0.7307446159866984
Epoch 91/213, Loss: 0.07697439432144165
0.738492781013592
Epoch 101/213, Loss: 0.07234297156333923
0.7371490340461158
Epoch 111/213, Loss: 0.06822957307100296
0.7429334503064448
Epoch 121/213, Loss: 0.06362569540739059
0.7423989037129873
Epoch 131/213, Loss: 0.05869117945432663
0.7429571436702753
Epoch 141/213, Loss: 0.05581661030650139
0.7467714552933276
Epoch 151/213, Loss: 0.052530849874019625
0.7467278642248356
Epoch 161/213, 

[I 2024-01-23 02:03:59,044] Trial 70 finished with value: 0.7499080392121692 and parameters: {'hidden_dim_h': 23, 'dropout': 0.11076222069007642, 'batch_size': 959, 'n_epochs': 213}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/192, Loss: 0.4981158145449378
0.5698212779582914
Epoch 11/192, Loss: 0.1475188254632733
0.6459959487601071
Epoch 21/192, Loss: 0.12007874792272394
0.6702300148474959
Epoch 31/192, Loss: 0.11024981804869392
0.6904304321433267
Epoch 41/192, Loss: 0.09882416643879631
0.6959707049271602
Epoch 51/192, Loss: 0.08796924843706867
0.709357700676001
Epoch 61/192, Loss: 0.08039895106445659
0.7157685343016805
Epoch 71/192, Loss: 0.0722972834482789
0.713914287927919
Epoch 81/192, Loss: 0.06718788998709484
0.7300404674851864
Epoch 91/192, Loss: 0.06086537436666814
0.7335299493795739
Epoch 101/192, Loss: 0.05773391312157566
0.7286815560282153
Epoch 111/192, Loss: 0.05737144686281681
0.7285836148819915
Epoch 121/192, Loss: 0.05336393051865426
0.7424819842387835
Epoch 131/192, Loss: 0.05074587142602964
0.7335589270908418
Epoch 141/192, Loss: 0.048791624351658604
0.7430327012190866
Epoch 151/192, Loss: 0.046049868603321636
0.7476615846398103
Epoch 161/192, 

[I 2024-01-23 02:05:33,213] Trial 71 finished with value: 0.7509567709690977 and parameters: {'hidden_dim_h': 50, 'dropout': 0.13963172486506753, 'batch_size': 549, 'n_epochs': 192}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/219, Loss: 0.33144234674317496
0.4940098446885514
Epoch 11/219, Loss: 0.18649037735802787
0.6143533809685942
Epoch 21/219, Loss: 0.16030465534755162
0.6535901371327342
Epoch 31/219, Loss: 0.13444838438715254
0.6719804129596096
Epoch 41/219, Loss: 0.11054318142788751
0.6874103126917841
Epoch 51/219, Loss: 0.09244598767587117
0.703068565958803
Epoch 61/219, Loss: 0.08017793319055012
0.7193511918419355
Epoch 71/219, Loss: 0.0691133802490575
0.7249387858455527
Epoch 81/219, Loss: 0.06159839353391102
0.7347019990055514
Epoch 91/219, Loss: 0.056790640418018616
0.7341243013036177
Epoch 101/219, Loss: 0.05209395427789007
0.7338006900361311
Epoch 111/219, Loss: 0.04980521031788417
0.7473813419389691
Epoch 121/219, Loss: 0.047557450724499566
0.7416114179581418
Epoch 131/219, Loss: 0.04498766181724412
0.7479503199151795
Epoch 141/219, Loss: 0.04426484863672938
0.7462069747875506
Epoch 151/219, Loss: 0.04284581041761807
0.7401586742117584
Epoch 161/21

[I 2024-01-23 02:06:50,836] Trial 72 finished with value: 0.7457649077486455 and parameters: {'hidden_dim_h': 24, 'dropout': 0.08873216794835664, 'batch_size': 700, 'n_epochs': 219}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/201, Loss: 0.2602170805136363
0.5584050553617242
Epoch 11/201, Loss: 0.17820987988401343
0.6294079237343589
Epoch 21/201, Loss: 0.13596072034151466
0.6602459674046239
Epoch 31/201, Loss: 0.10783723051901217
0.6646487687175715
Epoch 41/201, Loss: 0.08667777644263373
0.7019409658110946
Epoch 51/201, Loss: 0.0746874247022249
0.7113902258291291
Epoch 61/201, Loss: 0.06658679315889324
0.7193129914471621
Epoch 71/201, Loss: 0.06199188344180584
0.7332592769026457
Epoch 81/201, Loss: 0.057556360546085567
0.7310700963159917
Epoch 91/201, Loss: 0.05452115964834337
0.7414171434635207
Epoch 101/201, Loss: 0.051998817810305846
0.7387864294609204
Epoch 111/201, Loss: 0.050585149338951814
0.7416473500198661
Epoch 121/201, Loss: 0.048771122125563796
0.7407451141955959
Epoch 131/201, Loss: 0.046414096667258826
0.7416316999668043
Epoch 141/201, Loss: 0.04615586478677061
0.7391299819424509
Epoch 151/201, Loss: 0.0435476131323311
0.7410057514635302
Epoch 161/

[I 2024-01-23 02:08:10,306] Trial 73 finished with value: 0.7460114838623507 and parameters: {'hidden_dim_h': 28, 'dropout': 0.13314098853668466, 'batch_size': 446, 'n_epochs': 201}. Best is trial 32 with value: 0.7604109329548373.


Epoch 201/201, Loss: 0.038236809963429416
0.7460114838623507
Build model with 2 layers of attention
Epoch 1/154, Loss: 0.9183864647691901
0.5365120945287211
Epoch 11/154, Loss: 0.4779527580196207
0.587661094276106
Epoch 21/154, Loss: 0.2740893174301494
0.6363440492323116
Epoch 31/154, Loss: 0.1911056156862866
0.6617196859769662
Epoch 41/154, Loss: 0.15963194316083734
0.6781259695703711
Epoch 51/154, Loss: 0.15077729319984262
0.6893790632476254
Epoch 61/154, Loss: 0.14230670509013263
0.704402164908068
Epoch 71/154, Loss: 0.13460668955336919
0.7131289894843527
Epoch 81/154, Loss: 0.12757364457303827
0.7170219356826883
Epoch 91/154, Loss: 0.11922054195945914
0.7234666119122841
Epoch 101/154, Loss: 0.11050875620408492
0.7288767454868037
Epoch 111/154, Loss: 0.10495205629955638
0.7333760223107254
Epoch 121/154, Loss: 0.09702947972850366
0.7357297169796839
Epoch 131/154, Loss: 0.0888909250497818
0.7389392141243347
Epoch 141/154, Loss: 0.08213963495059447
0.7417312704110098
Epoch 151/154, Los

[I 2024-01-23 02:09:01,092] Trial 74 finished with value: 0.7459246764791014 and parameters: {'hidden_dim_h': 21, 'dropout': 0.20610827100964096, 'batch_size': 1131, 'n_epochs': 154}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/137, Loss: 0.2603137385514047
0.5512622198173966
Epoch 11/137, Loss: 0.1866414157880677
0.6294093294268502
Epoch 21/137, Loss: 0.1416917819943693
0.6597870664316087
Epoch 31/137, Loss: 0.11367265838715765
0.6743810051923993
Epoch 41/137, Loss: 0.09269807880951299
0.6967130489896859
Epoch 51/137, Loss: 0.07842174710498916
0.7061829757771251
Epoch 61/137, Loss: 0.06896669914325078
0.7225564245726328
Epoch 71/137, Loss: 0.062338399390379585
0.7317647445457517
Epoch 81/137, Loss: 0.05745365149858925
0.7313142347533862
Epoch 91/137, Loss: 0.05336630685875813
0.7400192611989954
Epoch 101/137, Loss: 0.050142918816871114
0.7407381326081404
Epoch 111/137, Loss: 0.04704402635494868
0.7429840216700647
Epoch 121/137, Loss: 0.0444437259187301
0.7431049117656034
Epoch 131/137, Loss: 0.04369930881592962
0.7474664556135578


[I 2024-01-23 02:09:50,224] Trial 75 finished with value: 0.7474664556135578 and parameters: {'hidden_dim_h': 25, 'dropout': 0.0799286294882051, 'batch_size': 670, 'n_epochs': 137}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/225, Loss: 2.676973425108811
0.5082082025475904
Epoch 11/225, Loss: 1.6147161927716485
0.5646276179295402
Epoch 21/225, Loss: 0.9122251251648212
0.003044521612816829
Epoch 31/225, Loss: 0.5326851154195851
0.011688960774010673
Epoch 41/225, Loss: 0.3523972784650737
0.022082501993974636
Epoch 51/225, Loss: 0.2826698062748745
0.022875229571099387
Epoch 61/225, Loss: 0.2687844603226103
0.005684524345978572
Epoch 71/225, Loss: 0.26021808796915513
0.013383945091298087
Epoch 81/225, Loss: 0.25822532793571207
0.010511274630996438
Epoch 91/225, Loss: 0.2551517008707441
0.02735146811611616
Epoch 101/225, Loss: 0.2564668922588743
0.03379870663243401
Epoch 111/225, Loss: 0.2574695543996219
0.023896872855162617
Epoch 121/225, Loss: 0.25506499872125427
0.02772899316798195
Epoch 131/225, Loss: 0.25602160908024885
0.022633278062900497
Epoch 141/225, Loss: 0.25260459040773325
0.040889744924001534
Epoch 151/225, Loss: 0.2520891849336953
0.0467461733224892
E

[I 2024-01-23 02:11:11,542] Trial 76 finished with value: 0.01606760459449823 and parameters: {'hidden_dim_h': 27, 'dropout': 0.11406397726789771, 'batch_size': 840, 'n_epochs': 225}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/249, Loss: 0.16918531177859558
0.5538464718352681
Epoch 11/249, Loss: 0.12137979308241292
0.6413277455606509
Epoch 21/249, Loss: 0.10009675394547612
0.6739353862605577
Epoch 31/249, Loss: 0.08564946035805501
0.6882462370359507
Epoch 41/249, Loss: 0.07806412463909701
0.7038102596326803
Epoch 51/249, Loss: 0.06839963332994987
0.7213906035655865
Epoch 61/249, Loss: 0.061067338150582816
0.737714398186863
Epoch 71/249, Loss: 0.058262391408023084
0.7422299391000163
Epoch 81/249, Loss: 0.052083327287906094
0.7442907566439151
Epoch 91/249, Loss: 0.048779963662749844
0.7375102739942252
Epoch 101/249, Loss: 0.04594382486845318
0.7389586783615455
Epoch 111/249, Loss: 0.04378347392929228
0.742100020923618
Epoch 121/249, Loss: 0.041265195333643964
0.734065040586961
Epoch 131/249, Loss: 0.038480362903914954
0.7419243795285886
Epoch 141/249, Loss: 0.037927345127651564
0.7321274683336015
Epoch 151/249, Loss: 0.03674471554787535
0.7415546066520123
Epoch 16

[I 2024-01-23 02:12:45,159] Trial 77 finished with value: 0.7466739211096924 and parameters: {'hidden_dim_h': 30, 'dropout': 0.07397358250707556, 'batch_size': 641, 'n_epochs': 249}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/241, Loss: 0.8207170640428861
0.5466624756078703
Epoch 11/241, Loss: 0.4103748674194018
0.6210174161296428
Epoch 21/241, Loss: 0.25893567440410453
0.6400657768483885
Epoch 31/241, Loss: 0.2072531438122193
0.659354355353393
Epoch 41/241, Loss: 0.1910060861458381
0.6769602314814398
Epoch 51/241, Loss: 0.17723402318855128
0.6789418622881386
Epoch 61/241, Loss: 0.16323113876084486
0.6877767149520668
Epoch 71/241, Loss: 0.1496469626824061
0.702526917504867
Epoch 81/241, Loss: 0.1354560429851214
0.710735325726547
Epoch 91/241, Loss: 0.12165751215070486
0.7230643797519903
Epoch 101/241, Loss: 0.10760536914070447
0.7313752343250335
Epoch 111/241, Loss: 0.09839909616857767
0.7259725640223968
Epoch 121/241, Loss: 0.08396871636311214
0.7392322898928233
Epoch 131/241, Loss: 0.07469745011379321
0.7427940517986504
Epoch 141/241, Loss: 0.06589312897995114
0.7403917326327901
Epoch 151/241, Loss: 0.05992212394873301
0.7456171726735338
Epoch 161/241, Loss: 

[I 2024-01-23 02:14:17,454] Trial 78 finished with value: 0.7501739167506171 and parameters: {'hidden_dim_h': 32, 'dropout': 0.09855806366505837, 'batch_size': 1033, 'n_epochs': 241}. Best is trial 32 with value: 0.7604109329548373.


Epoch 241/241, Loss: 0.03205657253662745
0.7501739167506171
Build model with 2 layers of attention
Epoch 1/179, Loss: 0.2420782738405725
0.544580004681571
Epoch 11/179, Loss: 0.19189022999742758
0.6244510508457313
Epoch 21/179, Loss: 0.1673718677914661
0.6422848562662429
Epoch 31/179, Loss: 0.1440960045741952
0.6781113498125428
Epoch 41/179, Loss: 0.12427063286304474
0.6990626557433037
Epoch 51/179, Loss: 0.10586526277272598
0.7091580151420015
Epoch 61/179, Loss: 0.09104436797940213
0.7162501560745341
Epoch 71/179, Loss: 0.07923799396856972
0.7225162421863376
Epoch 81/179, Loss: 0.06891241507685703
0.7350546030752145
Epoch 91/179, Loss: 0.0620509068603101
0.7354811744273868
Epoch 101/179, Loss: 0.055609584502551865
0.7431013391804522
Epoch 111/179, Loss: 0.051892042322003326
0.7492431851413707
Epoch 121/179, Loss: 0.04765774099075276
0.7505914388952483
Epoch 131/179, Loss: 0.04692153098142665
0.7528907767465455
Epoch 141/179, Loss: 0.042902899015208946
0.7485093725977178
Epoch 151/179,

[I 2024-01-23 02:15:19,386] Trial 79 finished with value: 0.7545238160138744 and parameters: {'hidden_dim_h': 22, 'dropout': 0.06050524738532235, 'batch_size': 1079, 'n_epochs': 179}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/167, Loss: 0.2247278896116075
0.5380590592662039
Epoch 11/167, Loss: 0.17797701841308958
0.6218712921550221
Epoch 21/167, Loss: 0.1504415443965367
0.6412713357022181
Epoch 31/167, Loss: 0.13201235270216352
0.652321564710701
Epoch 41/167, Loss: 0.11649467547734578
0.6639181883142471
Epoch 51/167, Loss: 0.10275916719720476
0.6732400512244392
Epoch 61/167, Loss: 0.09297853530872435
0.6872164586882038
Epoch 71/167, Loss: 0.08386831482251485
0.7092904873396219
Epoch 81/167, Loss: 0.07656367052169073
0.7190569585389132
Epoch 91/167, Loss: 0.06895378764186587
0.7284814942469093
Epoch 101/167, Loss: 0.06509389515433993
0.7363759463078932
Epoch 111/167, Loss: 0.061041519932803656
0.7359214427691171
Epoch 121/167, Loss: 0.05873581660645349
0.7367233319496628
Epoch 131/167, Loss: 0.05810854034054847
0.7403210376599338
Epoch 141/167, Loss: 0.055402393851961405
0.7402053598068739
Epoch 151/167, Loss: 0.05430508422709647
0.7440963630635687
Epoch 161/167

[I 2024-01-23 02:16:19,042] Trial 80 finished with value: 0.74556599588219 and parameters: {'hidden_dim_h': 25, 'dropout': 0.15552294506821815, 'batch_size': 1189, 'n_epochs': 167}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/177, Loss: 0.2892904660918496
0.5463621548545464
Epoch 11/177, Loss: 0.22693911465731534
0.6221765162057216
Epoch 21/177, Loss: 0.18789389729499817
0.6457665211113037
Epoch 31/177, Loss: 0.15502769906412472
0.6699999692126292
Epoch 41/177, Loss: 0.1289060078561306
0.6952835212459147
Epoch 51/177, Loss: 0.1069132194600322
0.7112702801346228
Epoch 61/177, Loss: 0.09009359472177246
0.7147897099929988
Epoch 71/177, Loss: 0.07611910355362025
0.7274983027446896
Epoch 81/177, Loss: 0.0655883780934594
0.7352744277426179
Epoch 91/177, Loss: 0.05793999660421501
0.7304839863977739
Epoch 101/177, Loss: 0.053595745089379226
0.7340662008946554
Epoch 111/177, Loss: 0.0485599467700178
0.7455727878995053
Epoch 121/177, Loss: 0.0455933597616174
0.744986252285947
Epoch 131/177, Loss: 0.042834633283994415
0.7477162003697633
Epoch 141/177, Loss: 0.040898404168811714
0.7462340462747594
Epoch 151/177, Loss: 0.038977655497464264
0.746732699126838
Epoch 161/177, L

[I 2024-01-23 02:17:17,533] Trial 81 finished with value: 0.7466254443410335 and parameters: {'hidden_dim_h': 21, 'dropout': 0.052044194716749675, 'batch_size': 1120, 'n_epochs': 177}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/194, Loss: 1.3324077647665273
0.5253608893791962
Epoch 11/194, Loss: 0.7026762133059294
0.5933805815130544
Epoch 21/194, Loss: 0.3635805676812711
0.6327138174919561
Epoch 31/194, Loss: 0.19535578722539154
0.6721323463750148
Epoch 41/194, Loss: 0.1224441171988197
0.6936865396546605
Epoch 51/194, Loss: 0.09526527057523312
0.7018326455181825
Epoch 61/194, Loss: 0.08385228495235028
0.7193449708660474
Epoch 71/194, Loss: 0.07876173730777658
0.7136206671497165
Epoch 81/194, Loss: 0.07489930287651393
0.7295916254603266
Epoch 91/194, Loss: 0.07208857037450957
0.7278889034947932
Epoch 101/194, Loss: 0.06844730740008147
0.6870446951624959
Epoch 111/194, Loss: 0.06600773707032204
0.7304681038221656
Epoch 121/194, Loss: 0.06270334999198499
0.730409709668281
Epoch 131/194, Loss: 0.060003556797037956
0.7302697053392565
Epoch 141/194, Loss: 0.057310958757348686
0.7355914307954716
Epoch 151/194, Loss: 0.05435212280439294
0.7362579344250643
Epoch 161/194, 

[I 2024-01-23 02:18:21,543] Trial 82 finished with value: 0.7377076956451671 and parameters: {'hidden_dim_h': 17, 'dropout': 0.06075892631397169, 'batch_size': 1061, 'n_epochs': 194}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/145, Loss: 0.22659995374472244
0.545908211859593
Epoch 11/145, Loss: 0.16388883020566858
0.6281422906572052
Epoch 21/145, Loss: 0.138376881894858
0.6455001167815504
Epoch 31/145, Loss: 0.11618027317783107
0.6755823510021303
Epoch 41/145, Loss: 0.09960624316464299
0.6904029655004771
Epoch 51/145, Loss: 0.0848076563814412
0.7097857304439829
Epoch 61/145, Loss: 0.07420820840027022
0.727033483865169
Epoch 71/145, Loss: 0.06615372973939647
0.7251763120197444
Epoch 81/145, Loss: 0.06022988616124443
0.7351483641046574
Epoch 91/145, Loss: 0.05495535436531772
0.7417288370453397
Epoch 101/145, Loss: 0.053168275110099625
0.7437211228573747
Epoch 111/145, Loss: 0.04993250706921453
0.7437808680519584
Epoch 121/145, Loss: 0.048126003502503685
0.7475147711379859
Epoch 131/145, Loss: 0.04626761295873186
0.7531183072619975
Epoch 141/145, Loss: 0.044067400467136635
0.7474284753729441


[I 2024-01-23 02:19:10,216] Trial 83 finished with value: 0.7474284753729441 and parameters: {'hidden_dim_h': 22, 'dropout': 0.08667594018793681, 'batch_size': 1074, 'n_epochs': 145}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/119, Loss: 0.40459991494814557
0.5323441007045312
Epoch 11/119, Loss: 0.21161608460048834
0.619097637332334
Epoch 21/119, Loss: 0.17581240522364774
0.6479876925831906
Epoch 31/119, Loss: 0.16217644264300665
0.6693512032649972
Epoch 41/119, Loss: 0.1494897057612737
0.6754715208187114
Epoch 51/119, Loss: 0.1359136210133632
0.6925878103773278
Epoch 61/119, Loss: 0.12000087990115087
0.7094072288656789
Epoch 71/119, Loss: 0.10674416273832321
0.7276055548184569
Epoch 81/119, Loss: 0.0939753179748853
0.7370082238495087
Epoch 91/119, Loss: 0.08221797955532868
0.7351618852591648
Epoch 101/119, Loss: 0.07181979560603698
0.7409949722716014
Epoch 111/119, Loss: 0.06276941703011592
0.7419196904924351


[I 2024-01-23 02:19:53,558] Trial 84 finished with value: 0.7419196904924351 and parameters: {'hidden_dim_h': 29, 'dropout': 0.06679146119066151, 'batch_size': 1024, 'n_epochs': 119}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/204, Loss: 0.3931266438393366
0.536680680885264
Epoch 11/204, Loss: 0.1869534814641589
0.6236123454234821
Epoch 21/204, Loss: 0.16381276966560454
0.6511309838835307
Epoch 31/204, Loss: 0.13835572273958296
0.6742567638628655
Epoch 41/204, Loss: 0.11645811707490966
0.696135594839448
Epoch 51/204, Loss: 0.09706132557420503
0.7116578881607035
Epoch 61/204, Loss: 0.08112722759445508
0.7230292554952988
Epoch 71/204, Loss: 0.067905457424266
0.7346768018806825
Epoch 81/204, Loss: 0.0601157568217743
0.7400427258284454
Epoch 91/204, Loss: 0.05586650124972775
0.7406568094009333
Epoch 101/204, Loss: 0.05152474414734613
0.7469790689925494
Epoch 111/204, Loss: 0.04743173461230028
0.7447634915811423
Epoch 121/204, Loss: 0.04580661698821045
0.7500688592171879
Epoch 131/204, Loss: 0.04340359329112938
0.7518244781391373
Epoch 141/204, Loss: 0.0423069495175566
0.7482106896627895
Epoch 151/204, Loss: 0.04043001105033216
0.7469509818958951
Epoch 161/204, Loss:

[I 2024-01-23 02:21:07,672] Trial 85 finished with value: 0.7456411839217255 and parameters: {'hidden_dim_h': 24, 'dropout': 0.10555985463387382, 'batch_size': 580, 'n_epochs': 204}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/189, Loss: 0.2205257344813574
0.5496403211375539
Epoch 11/189, Loss: 0.18138485934053147
0.6289044756839223
Epoch 21/189, Loss: 0.156016828758376
0.6530045644665519
Epoch 31/189, Loss: 0.13497077283405123
0.6647971754133326
Epoch 41/189, Loss: 0.11791538801931199
0.6892532951659333
Epoch 51/189, Loss: 0.10178823059513456
0.7002676629495246
Epoch 61/189, Loss: 0.08775619843176433
0.7112527190307205
Epoch 71/189, Loss: 0.07649003998154685
0.7215971162014013
Epoch 81/189, Loss: 0.06743211565273148
0.7338298931874756
Epoch 91/189, Loss: 0.060480473119588124
0.7353478257637911
Epoch 101/189, Loss: 0.059098511934280396
0.728604572504381
Epoch 111/189, Loss: 0.050529999569767996
0.7365209224667739
Epoch 121/189, Loss: 0.04674630878227098
0.7393083728413649
Epoch 131/189, Loss: 0.04373878009972118
0.7386435885164211
Epoch 141/189, Loss: 0.04162276287873586
0.7382592976215108
Epoch 151/189, Loss: 0.03975334373258409
0.7411226456891524
Epoch 161/189

[I 2024-01-23 02:22:15,924] Trial 86 finished with value: 0.7474084987608363 and parameters: {'hidden_dim_h': 27, 'dropout': 0.07522247072101279, 'batch_size': 1177, 'n_epochs': 189}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/219, Loss: 0.9251282486048612
0.5228174970239792
Epoch 11/219, Loss: 0.5373726013031873
0.04730540536543526
Epoch 21/219, Loss: 0.29681750454685907
0.5840467324601035
Epoch 31/219, Loss: 0.20792351798577743
0.6468021344848894
Epoch 41/219, Loss: 0.17936346138065512
0.663819050362434
Epoch 51/219, Loss: 0.16644851931116797
0.6836301578668597
Epoch 61/219, Loss: 0.15611771020022305
0.6954712634223842
Epoch 71/219, Loss: 0.1460218754681674
0.7012026624004797
Epoch 81/219, Loss: 0.13561551679264416
0.7212271994044932
Epoch 91/219, Loss: 0.12581844323060729
0.725395606741012
Epoch 101/219, Loss: 0.11426360329443758
0.732175427406749
Epoch 111/219, Loss: 0.10352362895553763
0.7403212567444871
Epoch 121/219, Loss: 0.09389358894391493
0.7378721020503992
Epoch 131/219, Loss: 0.08428811010989276
0.7450196833269829
Epoch 141/219, Loss: 0.0760849690572782
0.7439407486472828
Epoch 151/219, Loss: 0.06784443252465942
0.7440821661351815
Epoch 161/219, Los

[I 2024-01-23 02:23:32,579] Trial 87 finished with value: 0.7485690744921552 and parameters: {'hidden_dim_h': 26, 'dropout': 0.05650442594668864, 'batch_size': 1097, 'n_epochs': 219}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/180, Loss: 2.240541877746582
0.5140321994558046
Epoch 11/180, Loss: 1.3968180513381958
0.00016155194744221974
Epoch 21/180, Loss: 0.8146019721031189
0.027301335527852064
Epoch 31/180, Loss: 0.4953006172180176
0.05243690933846876
Epoch 41/180, Loss: 0.3466167604923248
0.05875377210145721
Epoch 51/180, Loss: 0.2765808928012848
0.08488547354333716
Epoch 61/180, Loss: 0.2541654688119888
0.07103704175713424
Epoch 71/180, Loss: 0.2403144109249115
0.13101312722934705
Epoch 81/180, Loss: 0.2468489360809326
0.09602927706067624
Epoch 91/180, Loss: 0.24007136046886443
0.10465953008777758
Epoch 101/180, Loss: 0.24466251730918884
0.09011264104250488
Epoch 111/180, Loss: 0.2404714220762253
0.10469395804202239
Epoch 121/180, Loss: 0.2377079290151596
0.1235239094451736
Epoch 131/180, Loss: 0.23543141543865204
0.1274936678401685
Epoch 141/180, Loss: 0.23403540074825288
0.1403054704970274
Epoch 151/180, Loss: 0.23517834186553954
0.1478090378436713
Epoch 161

[I 2024-01-23 02:24:34,882] Trial 88 finished with value: 0.12938384750462434 and parameters: {'hidden_dim_h': 19, 'dropout': 0.0944792908622368, 'batch_size': 991, 'n_epochs': 180}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/131, Loss: 0.5320343871911367
0.5076520364384861
Epoch 11/131, Loss: 0.28749285141626996
0.5978145149709594
Epoch 21/131, Loss: 0.20758022509870075
0.6388251890878838
Epoch 31/131, Loss: 0.18584404460021428
0.6567914503294839
Epoch 41/131, Loss: 0.17416719879422868
0.6677471817930909
Epoch 51/131, Loss: 0.16391725980100177
0.681436123109376
Epoch 61/131, Loss: 0.1527830305553618
0.6905779656503271
Epoch 71/131, Loss: 0.14140047203926814
0.699569622646462
Epoch 81/131, Loss: 0.1308465702902703
0.7041732896774687
Epoch 91/131, Loss: 0.1198904560435386
0.7124239614439262
Epoch 101/131, Loss: 0.11028263434058144
0.7059281893708524
Epoch 111/131, Loss: 0.101163254962081
0.7173063582455051
Epoch 121/131, Loss: 0.0925288771589597
0.7289001342634045


[I 2024-01-23 02:25:14,758] Trial 89 finished with value: 0.724852347542402 and parameters: {'hidden_dim_h': 13, 'dropout': 0.14345179490428758, 'batch_size': 1140, 'n_epochs': 131}. Best is trial 32 with value: 0.7604109329548373.


Epoch 131/131, Loss: 0.08526238586221423
0.724852347542402
Build model with 2 layers of attention
Epoch 1/233, Loss: 1.2068655811823332
0.5385678360073777
Epoch 11/233, Loss: 0.5867514381041894
0.5900489294255563
Epoch 21/233, Loss: 0.28942805987138015
0.6300584911180943
Epoch 31/233, Loss: 0.16850423755554053
0.6713916427915203
Epoch 41/233, Loss: 0.12682424628963837
0.6940024985837724
Epoch 51/233, Loss: 0.11294542539578217
0.7071924377007736
Epoch 61/233, Loss: 0.10525312074101888
0.7196805329528801
Epoch 71/233, Loss: 0.0990818076981948
0.7298815571042041
Epoch 81/233, Loss: 0.09301597223832057
0.7346376941845483
Epoch 91/233, Loss: 0.08679683535144879
0.7340396163876741
Epoch 101/233, Loss: 0.08097394985648301
0.7423943724044488
Epoch 111/233, Loss: 0.0746850033219044
0.7486109309212426
Epoch 121/233, Loss: 0.06832605886917847
0.7509000914905465
Epoch 131/233, Loss: 0.06289013016682404
0.7526439142262408
Epoch 141/233, Loss: 0.058113597763272434
0.748869478698873
Epoch 151/233, Lo

[I 2024-01-23 02:26:32,410] Trial 90 finished with value: 0.7539403229259887 and parameters: {'hidden_dim_h': 23, 'dropout': 0.08433772614854822, 'batch_size': 925, 'n_epochs': 233}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/216, Loss: 1.7055988004130702
0.43220575492783003
Epoch 11/216, Loss: 0.8144584278906545
0.6011445301548255
Epoch 21/216, Loss: 0.37505862982042376
0.6472984774182589
Epoch 31/216, Loss: 0.19261383337359275
0.6879133525013293
Epoch 41/216, Loss: 0.1309381708983452
0.6997451043550283
Epoch 51/216, Loss: 0.11309670368509908
0.7150245315104674
Epoch 61/216, Loss: 0.10432460279233995
0.7333306016870255
Epoch 71/216, Loss: 0.09803069719383793
0.7313123799915947
Epoch 81/216, Loss: 0.09220244711445223
0.7367577051675934
Epoch 91/216, Loss: 0.08619585489073107
0.7397388032811005
Epoch 101/216, Loss: 0.07981554755280094
0.740944910722836
Epoch 111/216, Loss: 0.07318709814740766
0.7420127224175517
Epoch 121/216, Loss: 0.06650628245645954
0.7457277616578544
Epoch 131/216, Loss: 0.06115628021859353
0.7440422818132577
Epoch 141/216, Loss: 0.05584903433918953
0.7432796967531137
Epoch 151/216, Loss: 0.05168722894403242
0.7492118267986485
Epoch 161/216, 

[I 2024-01-23 02:27:48,069] Trial 91 finished with value: 0.7515760929295409 and parameters: {'hidden_dim_h': 24, 'dropout': 0.11422914502894237, 'batch_size': 785, 'n_epochs': 216}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/258, Loss: 0.2373217728101846
0.5478837516246104
Epoch 11/258, Loss: 0.16801153123378754
0.6256464959166627
Epoch 21/258, Loss: 0.12952309350172678
0.6606100332793632
Epoch 31/258, Loss: 0.1021898966847044
0.6928253313919618
Epoch 41/258, Loss: 0.08324606039307335
0.7190963556351624
Epoch 51/258, Loss: 0.06899701352372314
0.7253446239575922
Epoch 61/258, Loss: 0.06059677368312171
0.7348683234150192
Epoch 71/258, Loss: 0.05521365650223963
0.7405504481144665
Epoch 81/258, Loss: 0.05067561611984715
0.7511200152385423
Epoch 91/258, Loss: 0.04811334880915555
0.7460595762620724
Epoch 101/258, Loss: 0.045765404787027474
0.7529800077382808
Epoch 111/258, Loss: 0.04389931137363116
0.7572198378498529
Epoch 121/258, Loss: 0.04141947694800117
0.7522541922015019
Epoch 131/258, Loss: 0.04096935113722628
0.7498825626576857
Epoch 141/258, Loss: 0.0385748605159196
0.7507918745322132
Epoch 151/258, Loss: 0.037888743886441895
0.7516573373673009
Epoch 161/258

[I 2024-01-23 02:29:16,687] Trial 92 finished with value: 0.7442381099236265 and parameters: {'hidden_dim_h': 21, 'dropout': 0.0636430090998055, 'batch_size': 728, 'n_epochs': 258}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/161, Loss: 0.25055878253086755
0.5488433801389806
Epoch 11/161, Loss: 0.17638050149316373
0.6336864113659955
Epoch 21/161, Loss: 0.15166326255902
0.6554010503850433
Epoch 31/161, Loss: 0.12879072745209155
0.6763553080963784
Epoch 41/161, Loss: 0.11164855989425079
0.6907912566704206
Epoch 51/161, Loss: 0.09409558449102484
0.7065881673071353
Epoch 61/161, Loss: 0.08191080002681068
0.7237108974233678
Epoch 71/161, Loss: 0.07114038551631181
0.7192266659349458
Epoch 81/161, Loss: 0.06312740804708522
0.7409738441096877
Epoch 91/161, Loss: 0.058216611166363175
0.739143881638358
Epoch 101/161, Loss: 0.052790145511212555
0.7492793573389247
Epoch 111/161, Loss: 0.049298917145832725
0.7477401518659477
Epoch 121/161, Loss: 0.046620104299939194
0.7471746202041909
Epoch 131/161, Loss: 0.04486346973673157
0.748225695643691
Epoch 141/161, Loss: 0.04279663834882819
0.7483012106492438
Epoch 151/161, Loss: 0.04094529427264048
0.7441749330633588


[I 2024-01-23 02:30:15,014] Trial 93 finished with value: 0.7463580409106456 and parameters: {'hidden_dim_h': 29, 'dropout': 0.1234599827568773, 'batch_size': 1046, 'n_epochs': 161}. Best is trial 32 with value: 0.7604109329548373.


Epoch 161/161, Loss: 0.0395188477376233
0.7463580409106456
Build model with 2 layers of attention
Epoch 1/228, Loss: 0.23246774729341269
0.5522759064249615
Epoch 11/228, Loss: 0.1634896956384182
0.6315246824407296
Epoch 21/228, Loss: 0.13014255044981837
0.657617913798313
Epoch 31/228, Loss: 0.10796844842843711
0.6693855272645229
Epoch 41/228, Loss: 0.09293199656531215
0.6928786891291155
Epoch 51/228, Loss: 0.08152262005023658
0.702778785909081
Epoch 61/228, Loss: 0.0747221380006522
0.7116837366996304
Epoch 71/228, Loss: 0.06900245312135667
0.722316103442356
Epoch 81/228, Loss: 0.06499099812936038
0.7279290387092524
Epoch 91/228, Loss: 0.059098186320625246
0.7397579585967953
Epoch 101/228, Loss: 0.055784684023819864
0.7445460664588033
Epoch 111/228, Loss: 0.05424811423290521
0.7469547851116367
Epoch 121/228, Loss: 0.050920534413307905
0.7461538775234794
Epoch 131/228, Loss: 0.05076175543945283
0.7486185520767004
Epoch 141/228, Loss: 0.04822178091853857
0.7543802440307263
Epoch 151/228, 

[I 2024-01-23 02:31:38,555] Trial 94 finished with value: 0.7528054598760641 and parameters: {'hidden_dim_h': 28, 'dropout': 0.13347080476670536, 'batch_size': 767, 'n_epochs': 228}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/220, Loss: 0.7913770526647568
0.547193072787295
Epoch 11/220, Loss: 0.2739910019768609
0.620787644157469
Epoch 21/220, Loss: 0.15715576087435087
0.6546710429776066
Epoch 31/220, Loss: 0.13618840732508236
0.6947308439253318
Epoch 41/220, Loss: 0.12450264414979352
0.7105525118030216
Epoch 51/220, Loss: 0.11256157937977049
0.7147111044890215
Epoch 61/220, Loss: 0.1009884648438957
0.7266931546005079
Epoch 71/220, Loss: 0.08947030165129238
0.7288326615970598
Epoch 81/220, Loss: 0.0783607651376062
0.732821565041192
Epoch 91/220, Loss: 0.06890789760897557
0.7388390051580469
Epoch 101/220, Loss: 0.06106005743559864
0.7394456150528792
Epoch 111/220, Loss: 0.054782265279855996
0.7436498285164056
Epoch 121/220, Loss: 0.049472525922788516
0.7478261055871785
Epoch 131/220, Loss: 0.045536693496008716
0.7501952690700908
Epoch 141/220, Loss: 0.04312228690832853
0.7452663922028897
Epoch 151/220, Loss: 0.041171267421709165
0.7451536361427719
Epoch 161/220, 

[I 2024-01-23 02:32:55,578] Trial 95 finished with value: 0.7505566957481271 and parameters: {'hidden_dim_h': 24, 'dropout': 0.10798526563204047, 'batch_size': 668, 'n_epochs': 220}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/210, Loss: 0.6307558702385944
0.5360863089288681
Epoch 11/210, Loss: 0.3151785990466242
0.5934831299820754
Epoch 21/210, Loss: 0.2171270199443983
0.6209477466506128
Epoch 31/210, Loss: 0.19166231932847397
0.6361378667350973
Epoch 41/210, Loss: 0.17781274538973105
0.6568779375308618
Epoch 51/210, Loss: 0.16404682462630066
0.6680979740247495
Epoch 61/210, Loss: 0.1513013289026592
0.6808431019930301
Epoch 71/210, Loss: 0.13849621687246405
0.6862368210667075
Epoch 81/210, Loss: 0.1265346718871075
0.6957288642558342
Epoch 91/210, Loss: 0.11460162472465764
0.7019568962739147
Epoch 101/210, Loss: 0.10452344689680182
0.713409523475611
Epoch 111/210, Loss: 0.09554655590782994
0.7150230306244804
Epoch 121/210, Loss: 0.08769528600184814
0.7289572045225228
Epoch 131/210, Loss: 0.08074859076220056
0.7304470012582898
Epoch 141/210, Loss: 0.07564732399971588
0.7362682171572864
Epoch 151/210, Loss: 0.07115135665820993
0.732205487442931
Epoch 161/210, Loss

[I 2024-01-23 02:34:04,144] Trial 96 finished with value: 0.747636848495062 and parameters: {'hidden_dim_h': 15, 'dropout': 0.27242042700679164, 'batch_size': 1075, 'n_epochs': 210}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/203, Loss: 0.8004152137657692
0.5545380344970476
Epoch 11/203, Loss: 0.29676674255009355
0.6287169274146269
Epoch 21/203, Loss: 0.1345529787499329
0.651628953750274
Epoch 31/203, Loss: 0.09478320952119498
0.6928800582429842
Epoch 41/203, Loss: 0.08476724681155436
0.7034891545418162
Epoch 51/203, Loss: 0.07771959849472704
0.7113991835619375
Epoch 61/203, Loss: 0.07241343732537894
0.7249708250079367
Epoch 71/203, Loss: 0.06778332899356711
0.733817469740058
Epoch 81/203, Loss: 0.06281239732072272
0.7400998746401101
Epoch 91/203, Loss: 0.05876551812578892
0.7358028209721502
Epoch 101/203, Loss: 0.05500756024286665
0.7504815075443491
Epoch 111/203, Loss: 0.051628934283708704
0.7450284476175834
Epoch 121/203, Loss: 0.048681899519829916
0.7477496937894446
Epoch 131/203, Loss: 0.04611148055771302
0.7500843132183042
Epoch 141/203, Loss: 0.042989596082218764
0.7458122911809265
Epoch 151/203, Loss: 0.04158408181934521
0.7505728177257478
Epoch 161/203

[I 2024-01-23 02:35:16,688] Trial 97 finished with value: 0.7503008231275482 and parameters: {'hidden_dim_h': 26, 'dropout': 0.09331632581500467, 'batch_size': 828, 'n_epochs': 203}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/235, Loss: 0.20984166502952575
0.5535874247246428
Epoch 11/235, Loss: 0.16612960577011107
0.626420204986473
Epoch 21/235, Loss: 0.14258040606975556
0.6530160393903208
Epoch 31/235, Loss: 0.12227951586246491
0.662232104743077
Epoch 41/235, Loss: 0.10676712036132813
0.6883827573114495
Epoch 51/235, Loss: 0.09347634494304657
0.6942425816452438
Epoch 61/235, Loss: 0.08239200592041016
0.7138654308820046
Epoch 71/235, Loss: 0.07296906352043152
0.7242952823234777
Epoch 81/235, Loss: 0.0663909250497818
0.7310904698953933
Epoch 91/235, Loss: 0.05990796223282814
0.7359000082580699
Epoch 101/235, Loss: 0.05584185630083084
0.7362092384392299
Epoch 111/235, Loss: 0.0526282824575901
0.7473575656231877
Epoch 121/235, Loss: 0.04995945930480957
0.740409156665588
Epoch 131/235, Loss: 0.04655963122844696
0.7408182417271071
Epoch 141/235, Loss: 0.04494691029191017
0.7461470436184092
Epoch 151/235, Loss: 0.04325668290257454
0.7439427815467885
Epoch 161/235, Lo

[I 2024-01-23 02:36:44,348] Trial 98 finished with value: 0.7546480222607709 and parameters: {'hidden_dim_h': 31, 'dropout': 0.12862969835114052, 'batch_size': 977, 'n_epochs': 235}. Best is trial 32 with value: 0.7604109329548373.


Build model with 2 layers of attention
Epoch 1/245, Loss: 1.5513890981674194
0.5433744903229456
Epoch 11/245, Loss: 0.8291338729858398
0.6086542082088576
Epoch 21/245, Loss: 0.43034725546836855
0.6348896885521151
Epoch 31/245, Loss: 0.23228349208831786
0.672286971035759
Epoch 41/245, Loss: 0.1469348591566086
0.6991496143971186
Epoch 51/245, Loss: 0.11559522598981857
0.7093527986943632
Epoch 61/245, Loss: 0.10316232889890671
0.7223007274511242
Epoch 71/245, Loss: 0.09678043723106385
0.7331668217492854
Epoch 81/245, Loss: 0.09116647958755493
0.7406983595770422
Epoch 91/245, Loss: 0.08588530838489533
0.7436189328608679
Epoch 101/245, Loss: 0.08043805956840515
0.7422858418394175
Epoch 111/245, Loss: 0.07553297400474548
0.7447884958096261
Epoch 121/245, Loss: 0.07095130354166031
0.7460915575133877
Epoch 131/245, Loss: 0.06540836423635482
0.7484135448271467
Epoch 141/245, Loss: 0.0607601822912693
0.7466468327733388
Epoch 151/245, Loss: 0.05663558691740036
0.7522998932698144
Epoch 161/245, Lo

[I 2024-01-23 02:38:21,467] Trial 99 finished with value: 0.7518808659981004 and parameters: {'hidden_dim_h': 34, 'dropout': 0.16057442154216664, 'batch_size': 964, 'n_epochs': 245}. Best is trial 32 with value: 0.7604109329548373.
[I 2024-01-23 02:38:21,507] A new study created in memory with name: no-name-27468bed-fc46-4030-9096-7c421294f5b0


Best Trial:
  Criterion: 0.7604
  Params: 
    hidden_dim_h: 28
    dropout: 0.11391580511521779
    batch_size: 1066
    n_epochs: 220
TF_2 achieved R2 = 0.7584174157250739
Build model with 3 layers of attention
Epoch 1/187, Loss: 0.9921368333426389
0.5425515992257479
Epoch 11/187, Loss: 0.5704217905347998
0.0007049317391253257
Epoch 21/187, Loss: 0.3715992569923401
0.006507412411204353
Epoch 31/187, Loss: 0.2937996116551486
0.0007388629812123749
Epoch 41/187, Loss: 0.2699171216650443
0.0002878937976387371
Epoch 51/187, Loss: 0.2635158598423004
0.0006685455039841501
Epoch 61/187, Loss: 0.262927963652394
5.623001221280992e-05
Epoch 71/187, Loss: 0.26187382164326584
0.0006111039532383215
Epoch 81/187, Loss: 0.26246694881807675
0.00011253631594762885
Epoch 91/187, Loss: 0.26237300580198114
0.00013845944454002598
Epoch 101/187, Loss: 0.26224844428625976
0.002926023931964106
Epoch 111/187, Loss: 0.26128107851201837
0.003511787981543394
Epoch 121/187, Loss: 0.26136788658120413
0.00456370000

[I 2024-01-23 02:39:47,555] Trial 0 finished with value: 0.005901299532831831 and parameters: {'hidden_dim_h': 29, 'dropout': 0.06382259811368754, 'batch_size': 1111, 'n_epochs': 187}. Best is trial 0 with value: 0.005901299532831831.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/289, Loss: 1.9834861755371094
0.5305686865715104
Epoch 11/289, Loss: 1.2546677589416504
0.5269549645830617
Epoch 21/289, Loss: 0.7711684476761591
0.5714365700597401
Epoch 31/289, Loss: 0.47025341221264433
0.5888910756916321
Epoch 41/289, Loss: 0.2966400001730238
0.6154236628244173
Epoch 51/289, Loss: 0.20605107787109556
0.6085371888058586
Epoch 61/289, Loss: 0.1624175856510798
0.6360172927363701
Epoch 71/289, Loss: 0.143105278412501
0.6435763837703705
Epoch 81/289, Loss: 0.13383759593679792
0.643126353308928
Epoch 91/289, Loss: 0.1293260334503083
0.6519594556837047
Epoch 101/289, Loss: 0.2642675042152405
0.0009593322456348196
Epoch 111/289, Loss: 0.26067021063395907
0.005957185074314001
Epoch 121/289, Loss: 0.25793682038784027
0.014108175638627051
Epoch 131/289, Loss: 0.2571704231557392
0.0214722343911411
Epoch 141/289, Loss: 0.2574233001186734
0.02223711084943931
Epoch 151/289, Loss: 0.25645058850

[I 2024-01-23 02:41:44,187] Trial 1 finished with value: 0.01861866382766132 and parameters: {'hidden_dim_h': 18, 'dropout': 0.3469772043088075, 'batch_size': 1145, 'n_epochs': 289}. Best is trial 1 with value: 0.01861866382766132.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/105, Loss: 2.500365816516641
0.0014435083485514043
Epoch 11/105, Loss: 0.2724211730154944
0.002287583031598504
Epoch 21/105, Loss: 0.26218451504354123
0.00027798187275131605
Epoch 31/105, Loss: 0.2622413916720284
0.00041052005057746436
Epoch 41/105, Loss: 0.26234790885154113
0.00019724253152937147
Epoch 51/105, Loss: 0.2608659123932874
2.0004178571750916e-05
Epoch 61/105, Loss: 0.26186463632333423
7.616688212377446e-05
Epoch 71/105, Loss: 0.26197582814428544
0.0004944723337710239
Epoch 81/105, Loss: 0.2619933713181519
0.0011978329786005723
Epoch 91/105, Loss: 0.2608999836224097
3.482668635474077e-05
Epoch 101/105, Loss: 0.26119721092191744
0.002062751156393067


[I 2024-01-23 02:43:21,755] Trial 2 finished with value: 0.002062751156393067 and parameters: {'hidden_dim_h': 49, 'dropout': 0.34613886218019957, 'batch_size': 148, 'n_epochs': 105}. Best is trial 1 with value: 0.01861866382766132.


Build model with 3 layers of attention
Epoch 1/250, Loss: 0.913767093961889
0.5429643591036216
Epoch 11/250, Loss: 0.2789975438605655
0.5943165426074875
Epoch 21/250, Loss: 0.25098558402422705
0.013492126774613864
Epoch 31/250, Loss: 0.25980411944064224
0.005013041276561009
Epoch 41/250, Loss: 0.26078174556746625
0.004556598774733065
Epoch 51/250, Loss: 0.2614110588575854
0.00047323542660728104
Epoch 61/250, Loss: 0.2629888267679648
0.0007898431057837414
Epoch 71/250, Loss: 0.26119990127556253
0.0008268820337392227
Epoch 81/250, Loss: 0.26440690683596063
0.001863723710699982
Epoch 91/250, Loss: 0.26248365572907706
0.0012299435897050633
Epoch 101/250, Loss: 0.2599564634940841
0.0032408009476227635
Epoch 111/250, Loss: 0.26065456505977747
0.002119129404643987
Epoch 121/250, Loss: 0.26167098861752136
0.0030496711167163587
Epoch 131/250, Loss: 0.26346179933258984
0.000928861939589864
Epoch 141/250, Loss: 0.2604209274956674
0.005257359772214155
Epoch 151/250, Loss: 0.2601922590172652
0.0018

[I 2024-01-23 02:45:32,811] Trial 3 finished with value: 0.006011236801604155 and parameters: {'hidden_dim_h': 25, 'dropout': 0.2917438908954073, 'batch_size': 367, 'n_epochs': 250}. Best is trial 1 with value: 0.01861866382766132.


Build model with 3 layers of attention
Epoch 1/198, Loss: 0.4478503513980556
0.48901940113665554
Epoch 11/198, Loss: 0.24673238799378677
0.5940645410422567
Epoch 21/198, Loss: 0.20963102498570005
0.6079496432911409
Epoch 31/198, Loss: 0.17605842811030312
0.6220164526506515
Epoch 41/198, Loss: 0.14875210459167892
0.6220963289751844
Epoch 51/198, Loss: 0.12581136661606865
0.6376888093452668
Epoch 61/198, Loss: 0.11031584038927748
0.6464176073091673
Epoch 71/198, Loss: 0.09888559539575835
0.6568556314179144
Epoch 81/198, Loss: 0.09097681697961446
0.6683355357244812
Epoch 91/198, Loss: 0.2631753054825035
0.007848482943250959
Epoch 101/198, Loss: 0.2632824242920489
0.005268810288288638
Epoch 111/198, Loss: 0.2642634603622797
0.0014467004415464671
Epoch 121/198, Loss: 0.26412195692191254
0.0001439179945403571
Epoch 131/198, Loss: 0.2648052553067336
0.00030435601181500144
Epoch 141/198, Loss: 0.2650205126485309
0.00017153300353498695
Epoch 151/198, Loss: 0.26538101644129364
2.8594694085328367

[I 2024-01-23 02:46:56,484] Trial 4 finished with value: 1.0517777956960729e-05 and parameters: {'hidden_dim_h': 17, 'dropout': 0.1526599452525645, 'batch_size': 650, 'n_epochs': 198}. Best is trial 1 with value: 0.01861866382766132.


Build model with 3 layers of attention
Epoch 1/171, Loss: 1.1317114502191543
0.47540770115151904
Epoch 11/171, Loss: 0.4698380343616009
0.6068976189487687
Epoch 21/171, Loss: 0.3009538114070892
0.6165181828409689
Epoch 31/171, Loss: 0.26008561849594114
0.6089632760132355
Epoch 41/171, Loss: 0.2333000276237726
0.6320122347783353
Epoch 51/171, Loss: 0.260525094717741
0.008125914106325318
Epoch 61/171, Loss: 0.26299768388271333
0.002700919878608655
Epoch 71/171, Loss: 0.2605912584811449
0.0031299235521565208
Epoch 81/171, Loss: 0.26551874466240405
2.2632083330293152e-05
Epoch 91/171, Loss: 0.2660620056092739
0.0028012082855270845
Epoch 101/171, Loss: 0.2600575279444456
0.0010095400806668638
Epoch 111/171, Loss: 0.26000196672976017
0.0012342273551684082
Epoch 121/171, Loss: 0.26037179827690127
0.004933408584362242
Epoch 131/171, Loss: 0.26182808093726634
0.00030164419252593714
Epoch 141/171, Loss: 0.26320715211331847
0.0006881122564691116
Epoch 151/171, Loss: 0.26347606256604195
0.00083787

[I 2024-01-23 02:48:26,296] Trial 5 finished with value: 0.0008912978168745178 and parameters: {'hidden_dim_h': 33, 'dropout': 0.16011038513275583, 'batch_size': 611, 'n_epochs': 171}. Best is trial 1 with value: 0.01861866382766132.


Epoch 171/171, Loss: 0.25966708175837994
0.0008912978168745178
Build model with 3 layers of attention
Epoch 1/214, Loss: 0.18887837827205659
0.5470473687870478
Epoch 11/214, Loss: 0.14656852398599898
0.6012431307463332
Epoch 21/214, Loss: 0.12217781245708466
0.6259525363092693
Epoch 31/214, Loss: 0.2641160671200071
0.0003218207002939199
Epoch 41/214, Loss: 0.2658125638961792
6.0980816662387094e-06
Epoch 51/214, Loss: 0.26432780878884454
0.000408862223847255
Epoch 61/214, Loss: 0.26430126300879886
0.0007440090926733892
Epoch 71/214, Loss: 0.2636108364377703
0.0008968900550935106
Epoch 81/214, Loss: 0.26295400559902193
0.0001636079278597551
Epoch 91/214, Loss: 0.2630929206098829
0.0004518693079803168
Epoch 101/214, Loss: 0.2631529233285359
0.0012898047083909226
Epoch 111/214, Loss: 0.26282451578548977
0.0011479175010943637
Epoch 121/214, Loss: 0.26285837037222726
0.0016933494590531703
Epoch 131/214, Loss: 0.26300175871167863
0.002365409873214658
Epoch 141/214, Loss: 0.2636010838406427
0.

[I 2024-01-23 02:50:18,506] Trial 6 finished with value: 6.397739736652078e-05 and parameters: {'hidden_dim_h': 37, 'dropout': 0.3322376579920349, 'batch_size': 685, 'n_epochs': 214}. Best is trial 1 with value: 0.01861866382766132.


Build model with 3 layers of attention
Epoch 1/100, Loss: 0.5839079194947293
0.5329649039759506
Epoch 11/100, Loss: 0.2608447004305689
0.5980916447259218
Epoch 21/100, Loss: 0.2156684112391974
0.6218339267005766
Epoch 31/100, Loss: 0.18863350976454585
0.6324059946334352
Epoch 41/100, Loss: 0.16268326695028104
0.6317031337173232
Epoch 51/100, Loss: 0.13929488353039088
0.644694050655174
Epoch 61/100, Loss: 0.12318267751681178
0.6348253330008414
Epoch 71/100, Loss: 0.12438638390679109
0.6164752190298901
Epoch 81/100, Loss: 0.09951267783579074
0.658287850886285
Epoch 91/100, Loss: 0.09066050519284449
0.6682326653221088


[I 2024-01-23 02:51:04,786] Trial 7 finished with value: 0.6682326653221088 and parameters: {'hidden_dim_h': 26, 'dropout': 0.0931893189966438, 'batch_size': 635, 'n_epochs': 100}. Best is trial 7 with value: 0.6682326653221088.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/189, Loss: 0.7487096736828486
0.5537098897697917
Epoch 11/189, Loss: 0.35209641481439274
0.6038539919352345
Epoch 21/189, Loss: 0.2969813918073972
0.009320559455802601
Epoch 31/189, Loss: 0.26850320026278496
1.499268932463926e-05
Epoch 41/189, Loss: 0.2637503656248252
0.0017406251729478052
Epoch 51/189, Loss: 0.26271150323251885
0.0029781370977252208
Epoch 61/189, Loss: 0.26254138350486755
0.00047352366440428024
Epoch 71/189, Loss: 0.26215248679121333
0.0016850928531995538
Epoch 81/189, Loss: 0.2622283740590016
3.614623531973129e-05
Epoch 91/189, Loss: 0.26191331694523495
9.634586952568734e-06
Epoch 101/189, Loss: 0.26180002962549526
0.0031968514464502168
Epoch 111/189, Loss: 0.2615663061539332
0.0006847253840490157
Epoch 121/189, Loss: 0.26178538364668685
0.004420468741809132
Epoch 131/189, Loss: 0.26124471984803677
0.0029499443019300165
Epoch 141/189, Loss: 0.2619995803882678
0.00222371228014708


[I 2024-01-23 02:53:02,554] Trial 8 finished with value: 0.0025795177373203037 and parameters: {'hidden_dim_h': 50, 'dropout': 0.29561346875054395, 'batch_size': 1010, 'n_epochs': 189}. Best is trial 7 with value: 0.6682326653221088.


Build model with 3 layers of attention
Epoch 1/45, Loss: 2.4269941993381665
0.4754556769380963
Epoch 11/45, Loss: 1.586060062698696
0.0016675341334292336
Epoch 21/45, Loss: 1.0162763336430425
2.7738788506183515e-05
Epoch 31/45, Loss: 0.6596208370250204
4.279357143207324e-05
Epoch 41/45, Loss: 0.45339578519696777
1.3560376740445415e-05


[I 2024-01-23 02:53:23,640] Trial 9 finished with value: 1.3560376740445415e-05 and parameters: {'hidden_dim_h': 27, 'dropout': 0.14220555533181856, 'batch_size': 1083, 'n_epochs': 45}. Best is trial 7 with value: 0.6682326653221088.


Build model with 3 layers of attention
Epoch 1/98, Loss: 0.18098552773396173
0.5235596151724783
Epoch 11/98, Loss: 0.12350732261935869
0.6139590630440639
Epoch 21/98, Loss: 0.10574189225832621
0.6352760165222411
Epoch 31/98, Loss: 0.09519345710674922
0.6530555871203172
Epoch 41/98, Loss: 0.09085607031981151
0.6562093295472688
Epoch 51/98, Loss: 0.087217428535223
0.6632024403236496
Epoch 61/98, Loss: 0.10262827177842458
0.6063032869354105
Epoch 71/98, Loss: 0.08763580297430357
0.6538715686790141
Epoch 81/98, Loss: 0.08568574239810307
0.6625780016139454
Epoch 91/98, Loss: 0.08438027525941531
0.6679245568848915


[I 2024-01-23 02:54:01,391] Trial 10 finished with value: 0.6679245568848915 and parameters: {'hidden_dim_h': 11, 'dropout': 0.06319620269385091, 'batch_size': 814, 'n_epochs': 98}. Best is trial 7 with value: 0.6682326653221088.


Build model with 3 layers of attention
Epoch 1/102, Loss: 0.2118153186707661
0.5135891601649726
Epoch 11/102, Loss: 0.1402064366587277
0.5793245190440832
Epoch 21/102, Loss: 0.11869136449591867
0.6205570410678717
Epoch 31/102, Loss: 0.10506052318318136
0.6380521850948764
Epoch 41/102, Loss: 0.0957678507628112
0.643656415044585
Epoch 51/102, Loss: 0.09064386348272192
0.6570395774118469
Epoch 61/102, Loss: 0.08868121021780474
0.6575238018120321
Epoch 71/102, Loss: 0.0881364661557921
0.6551265496789394
Epoch 81/102, Loss: 0.09053071151519644
0.6516500317389374
Epoch 91/102, Loss: 0.08613696206232598
0.6652488019293764
Epoch 101/102, Loss: 0.08489819230704472
0.669534527122123


[I 2024-01-23 02:54:39,550] Trial 11 finished with value: 0.669534527122123 and parameters: {'hidden_dim_h': 10, 'dropout': 0.05313543929265222, 'batch_size': 840, 'n_epochs': 102}. Best is trial 11 with value: 0.669534527122123.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/115, Loss: 0.5811401786548751
0.504268026800109
Epoch 11/115, Loss: 0.21040350518056325
0.5878643241122242
Epoch 21/115, Loss: 0.12243739675198283
0.612994879766017
Epoch 31/115, Loss: 0.10875867786152023
0.6338198811856262
Epoch 41/115, Loss: 0.10476636168147836
0.6373004333241168
Epoch 51/115, Loss: 0.10190422034689359
0.6407981446377325
Epoch 61/115, Loss: 0.09922753168003899
0.6505040963505756
Epoch 71/115, Loss: 0.09754396416246891
0.6518597087722526
Epoch 81/115, Loss: 0.09555147109287125
0.6499847719508095
Epoch 91/115, Loss: 0.09270931993212018
0.659404411077324
Epoch 101/115, Loss: 0.09069467788296086
0.6627297780483042
Epoch 111/115, Loss: 0.08889529907277652
0.6675643028361998


[I 2024-01-23 02:55:22,439] Trial 12 finished with value: 0.6675643028361998 and parameters: {'hidden_dim_h': 10, 'dropout': 0.10376784501866403, 'batch_size': 871, 'n_epochs': 115}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/61, Loss: 0.24013852634850671
0.5587447189835416
Epoch 11/61, Loss: 0.2559167552812427
0.01278267516912703
Epoch 21/61, Loss: 0.26053663591543835
0.010128890583169063
Epoch 31/61, Loss: 0.26374202908254135
0.00036470785644384697
Epoch 41/61, Loss: 0.26274138192335766
0.00282682070653851
Epoch 51/61, Loss: 0.2627599812021442
0.0013726872672630954


[I 2024-01-23 02:55:57,275] Trial 13 finished with value: 0.003054578552652065 and parameters: {'hidden_dim_h': 39, 'dropout': 0.21322998895160516, 'batch_size': 468, 'n_epochs': 61}. Best is trial 11 with value: 0.669534527122123.


Epoch 61/61, Loss: 0.26158395935507384
0.003054578552652065
Build model with 3 layers of attention
Epoch 1/137, Loss: 0.9305051990917751
0.5295167478802799
Epoch 11/137, Loss: 0.48773542791604996
0.5960997502978229
Epoch 21/137, Loss: 0.262357551072325
0.6233264978352597
Epoch 31/137, Loss: 0.26551818794437815
0.0030648831661990593
Epoch 41/137, Loss: 0.26283426955342293
0.0005472822726419359
Epoch 51/137, Loss: 0.2618363393204553
1.7982534578733858e-05
Epoch 61/137, Loss: 0.26204874579395565
0.0047211239896129305
Epoch 71/137, Loss: 0.26123904436826706
0.003340717370616078
Epoch 81/137, Loss: 0.2610241802675383
0.006324882183595491
Epoch 91/137, Loss: 0.2615258369062628
0.0008481352350242869
Epoch 101/137, Loss: 0.26238745770284105
0.0027408123412643146
Epoch 111/137, Loss: 0.26052853039332796
0.004072941258052646
Epoch 121/137, Loss: 0.26180688345006536
0.0017399839277583407
Epoch 131/137, Loss: 0.2622043751180172
0.0025195945769803402


[I 2024-01-23 02:56:54,132] Trial 14 finished with value: 0.0025195945769803402 and parameters: {'hidden_dim_h': 20, 'dropout': 0.10438277392844814, 'batch_size': 870, 'n_epochs': 137}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/74, Loss: 0.6391625036584571
0.547284634734158
Epoch 11/74, Loss: 0.17772535060314423
0.6146799188684013
Epoch 21/74, Loss: 0.13736571244736936
0.6307446932755161
Epoch 31/74, Loss: 0.2597292163904677
0.009300393026717776
Epoch 41/74, Loss: 0.26016933581930524
0.0030134529626082455
Epoch 51/74, Loss: 0.2639476571311342
0.0028252655366954905
Epoch 61/74, Loss: 0.2625175267457962
0.0028553690138362906
Epoch 71/74, Loss: 0.2625230775868639
0.0001668186944778471


[I 2024-01-23 02:57:28,510] Trial 15 finished with value: 0.0001668186944778471 and parameters: {'hidden_dim_h': 22, 'dropout': 0.21314084767859875, 'batch_size': 515, 'n_epochs': 74}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/30, Loss: 0.7297199785709381
0.5521396186577366
Epoch 11/30, Loss: 0.19429861325770617
0.5398785641670363
Epoch 21/30, Loss: 0.15762881189584732
0.6190921727613605


[I 2024-01-23 02:57:44,045] Trial 16 finished with value: 0.6190921727613605 and parameters: {'hidden_dim_h': 14, 'dropout': 0.051743109370070046, 'batch_size': 299, 'n_epochs': 30}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/146, Loss: 0.607839533776948
0.5431184598781362
Epoch 11/146, Loss: 0.21054728193716568
0.6248066803888149
Epoch 21/146, Loss: 0.13417445913408743
0.6382405139196048
Epoch 31/146, Loss: 0.12341876689231757
0.637220997792688
Epoch 41/146, Loss: 0.2501348658944621
0.03688629499705629
Epoch 51/146, Loss: 0.2510034347122366
0.01953115668860573
Epoch 61/146, Loss: 0.24761833702072952
0.02882789152684929
Epoch 71/146, Loss: 0.2657314015157295
0.00021277430215902707
Epoch 81/146, Loss: 0.2626764810446537
0.005505871939212635
Epoch 91/146, Loss: 0.26138646087863227
0.005672532690557917
Epoch 101/146, Loss: 0.2610767334699631
0.00207922272774193
Epoch 111/146, Loss: 0.2612131761782097
0.0030986494210743214
Epoch 121/146, Loss: 0.2620100523486282
0.005020491758442642
Epoch 131/146, Loss: 0.26114374579805316
0.004003454596212524
Epoch 141/146, Loss: 0.26112615249373694
0.0023395877355428755


[I 2024-01-23 02:59:06,285] Trial 17 finished with value: 0.0023395877355428755 and parameters: {'hidden_dim_h': 42, 'dropout': 0.09706850807369356, 'batch_size': 741, 'n_epochs': 146}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/81, Loss: 0.48934618949890135
0.40891945219937387
Epoch 11/81, Loss: 0.2402515107393265
0.6085115936048192
Epoch 21/81, Loss: 0.1879964154958725
0.6281850238605295
Epoch 31/81, Loss: 0.2649388086795807
8.349993951226457e-05
Epoch 41/81, Loss: 0.26065187096595765
0.001659051220017134
Epoch 51/81, Loss: 0.26050624132156375
0.003438984058385711
Epoch 61/81, Loss: 0.2579210156202316
0.009866743380909825
Epoch 71/81, Loss: 0.2591311872005463
0.00900228772350135


[I 2024-01-23 02:59:47,178] Trial 18 finished with value: 0.007759650597669485 and parameters: {'hidden_dim_h': 33, 'dropout': 0.126054197158751, 'batch_size': 969, 'n_epochs': 81}. Best is trial 11 with value: 0.669534527122123.


Epoch 81/81, Loss: 0.26033995151519773
0.007759650597669485
Build model with 3 layers of attention
Epoch 1/135, Loss: 0.5202073150060393
0.5595115746612064
Epoch 11/135, Loss: 0.2535729384557767
0.6069709653031259
Epoch 21/135, Loss: 0.21233119307593865
0.6160854520337301
Epoch 31/135, Loss: 0.17506926676089113
0.6347766984629256
Epoch 41/135, Loss: 0.21183901822025125
0.0045233428217963285
Epoch 51/135, Loss: 0.25589175522327423
0.018833522424805373
Epoch 61/135, Loss: 0.25378432869911194
0.025154124355975135
Epoch 71/135, Loss: 0.26296679404648865
0.00132086057635223
Epoch 81/135, Loss: 0.2633449702777646
0.0011895115266264676
Epoch 91/135, Loss: 0.26210030473091384
0.002973851356565688
Epoch 101/135, Loss: 0.260177072476257
0.0028692017317693687
Epoch 111/135, Loss: 0.2600297358903018
0.0017533581213112083
Epoch 121/135, Loss: 0.2592000947757201
0.003535988622807511
Epoch 131/135, Loss: 0.2610315355387601
0.001353382865392919


[I 2024-01-23 03:00:49,756] Trial 19 finished with value: 0.001353382865392919 and parameters: {'hidden_dim_h': 23, 'dropout': 0.19859137938854532, 'batch_size': 547, 'n_epochs': 135}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/115, Loss: 0.21927639986238173
0.5515912496727857
Epoch 11/115, Loss: 0.15792230781047575
0.6231206547431738
Epoch 21/115, Loss: 0.20201920982330077
0.013413241216046342
Epoch 31/115, Loss: 0.2641877661789617
0.00010298499982125621
Epoch 41/115, Loss: 0.263318968396033
9.756186627834583e-05
Epoch 51/115, Loss: 0.2636259323166263
0.0004370081679192933
Epoch 61/115, Loss: 0.26249368440720344
0.00020765808330235703
Epoch 71/115, Loss: 0.2625518748837133
0.0011261370587693092
Epoch 81/115, Loss: 0.2630227458092474
0.0008697282553467564
Epoch 91/115, Loss: 0.26285490730116445
0.001225473662046257
Epoch 101/115, Loss: 0.26282188344386315
0.003218586207413569
Epoch 111/115, Loss: 0.26238299089093364
0.0013169370409934695


[I 2024-01-23 03:01:56,525] Trial 20 finished with value: 0.0013169370409934695 and parameters: {'hidden_dim_h': 44, 'dropout': 0.08028145382048038, 'batch_size': 789, 'n_epochs': 115}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/91, Loss: 1.2078768968582154
0.48103915417672827
Epoch 11/91, Loss: 0.5655285586913427
0.565015438177546
Epoch 21/91, Loss: 0.3222408632437388
0.6051170874534569
Epoch 31/91, Loss: 0.24863399614890416
0.6160280383879397
Epoch 41/91, Loss: 0.2287519415219625
0.5850300663480918
Epoch 51/91, Loss: 0.21075876702864965
0.6093612377328145
Epoch 61/91, Loss: 0.19759505987167358
0.5880665061859637
Epoch 71/91, Loss: 0.17956979026397069
0.6122997932117648
Epoch 81/91, Loss: 0.16491547375917434
0.6233707691748335


[I 2024-01-23 03:02:31,041] Trial 21 finished with value: 0.6315970704419953 and parameters: {'hidden_dim_h': 10, 'dropout': 0.05619803654092542, 'batch_size': 814, 'n_epochs': 91}. Best is trial 11 with value: 0.669534527122123.


Epoch 91/91, Loss: 0.1500046561161677
0.6315970704419953
Build model with 3 layers of attention
Epoch 1/58, Loss: 1.3141603057201092
0.5296014887560851
Epoch 11/58, Loss: 0.6851425904494065
0.583578891059582
Epoch 21/58, Loss: 0.387519875398049
0.6201189289432761
Epoch 31/58, Loss: 0.2667862990727791
0.6388694994944414
Epoch 41/58, Loss: 0.22522034094883844
0.6450701900061324
Epoch 51/58, Loss: 0.2090712533547328
0.6322123028526717


[I 2024-01-23 03:02:53,542] Trial 22 finished with value: 0.6322123028526717 and parameters: {'hidden_dim_h': 13, 'dropout': 0.0801749126404453, 'batch_size': 947, 'n_epochs': 58}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/95, Loss: 0.268132770151803
0.5395518087630923
Epoch 11/95, Loss: 0.2619364523526394
0.00019795666101830335
Epoch 21/95, Loss: 0.2618198083205657
0.00600853241648131
Epoch 31/95, Loss: 0.26134551519697363
0.003343496122143112
Epoch 41/95, Loss: 0.2609285314877828
0.0054109916637054955
Epoch 51/95, Loss: 0.2612793883590987
0.006348186584273549
Epoch 61/95, Loss: 0.262057157628464
0.002612439957571523
Epoch 71/95, Loss: 0.26160339469259436
0.0009381565899127854
Epoch 81/95, Loss: 0.26136658543890173
0.006826528481807041
Epoch 91/95, Loss: 0.26126881350170483
0.007977826304873654


[I 2024-01-23 03:03:31,915] Trial 23 finished with value: 0.007977826304873654 and parameters: {'hidden_dim_h': 15, 'dropout': 0.18042543676913753, 'batch_size': 726, 'n_epochs': 95}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/160, Loss: 2.715953153371811
0.5223275849682056
Epoch 11/160, Loss: 1.324453654885292
0.5376611366031496
Epoch 21/160, Loss: 0.6698182165622711
4.2469985874063406e-06
Epoch 31/160, Loss: 0.359786294400692
0.006148271052830353
Epoch 41/160, Loss: 0.27508984357118604
0.013697036677400822
Epoch 51/160, Loss: 0.2599698223173618
0.014369205122050822
Epoch 61/160, Loss: 0.255496621504426
0.016257246872258067
Epoch 71/160, Loss: 0.2586610782891512
0.0054438692734837795
Epoch 81/160, Loss: 0.2591727260500193
0.0049930234082860335
Epoch 91/160, Loss: 0.2607768811285496
0.0015505202657905446
Epoch 101/160, Loss: 0.26084817536175253
0.0011033735676113433
Epoch 111/160, Loss: 0.2602459780871868
0.0014774004523789724
Epoch 121/160, Loss: 0.26187528148293493
0.0008026190743256242
Epoch 131/160, Loss: 0.26113827899098396
0.0012948957786723659
Epoch 141/160, Loss: 0.26107204891741276
0.001869601282783235
Epoch 151/160, Loss: 0.26111053451895716
0.00136325

[I 2024-01-23 03:04:35,098] Trial 24 finished with value: 0.0013632556908083127 and parameters: {'hidden_dim_h': 10, 'dropout': 0.12614479224904124, 'batch_size': 604, 'n_epochs': 160}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/119, Loss: 0.19474186162863458
0.5431280170563357
Epoch 11/119, Loss: 0.136459207162261
0.5990685499731917
Epoch 21/119, Loss: 0.11481509331081595
0.6455107475503274
Epoch 31/119, Loss: 0.100876887728061
0.6578541435270567
Epoch 41/119, Loss: 0.0994185536567654
0.6321057898379844
Epoch 51/119, Loss: 0.09151651231305939
0.6471382273414816
Epoch 61/119, Loss: 0.08956976979970932
0.6553592210829143
Epoch 71/119, Loss: 0.08746278152934142
0.6637053723981852
Epoch 81/119, Loss: 0.084913897727217
0.6693855801970129
Epoch 91/119, Loss: 0.09397680551878043
0.6416995020410002
Epoch 101/119, Loss: 0.08603317769510406
0.6640255174094194
Epoch 111/119, Loss: 0.08469005620905332
0.6470720814558413


[I 2024-01-23 03:05:24,835] Trial 25 finished with value: 0.6470720814558413 and parameters: {'hidden_dim_h': 19, 'dropout': 0.07946454020215597, 'batch_size': 877, 'n_epochs': 119}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/73, Loss: 0.18310743421316147
0.5124128513689896
Epoch 11/73, Loss: 0.1292163085192442
0.599591027168567
Epoch 21/73, Loss: 0.11600502766668797
0.6269880120902108
Epoch 31/73, Loss: 0.2619987688958645
0.000723240133036196
Epoch 41/73, Loss: 0.26108705401420595
0.0024581989925450687
Epoch 51/73, Loss: 0.26177523881196973
0.0008695297798197122
Epoch 61/73, Loss: 0.2610707513988018
0.0028481665112318286
Epoch 71/73, Loss: 0.2612568274140358
0.0038055258083720127


[I 2024-01-23 03:05:53,054] Trial 26 finished with value: 0.0038055258083720127 and parameters: {'hidden_dim_h': 14, 'dropout': 0.25401344670836634, 'batch_size': 1195, 'n_epochs': 73}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/131, Loss: 3.0667499745333635
0.4614423201777124
Epoch 11/131, Loss: 1.21986503733529
0.5595676708659424
Epoch 21/131, Loss: 0.4889208580608721
0.6127560115324283
Epoch 31/131, Loss: 0.21938333384416722
0.6203725949361885
Epoch 41/131, Loss: 0.25600610204316954
0.015227278907609578
Epoch 51/131, Loss: 0.260040484920696
0.004323048588715749
Epoch 61/131, Loss: 0.259685343614331
0.004958873436661661
Epoch 71/131, Loss: 0.2595205607789534
0.009191509105476661
Epoch 81/131, Loss: 0.2594603736091543
0.00788602245133818
Epoch 91/131, Loss: 0.2587718480715045
0.008994951131342573
Epoch 101/131, Loss: 0.25917935205830467
0.007170499619936454
Epoch 111/131, Loss: 0.25899376195889934
0.0028152190135645795
Epoch 121/131, Loss: 0.25927026873385467
0.0046259108834209904


[I 2024-01-23 03:07:00,863] Trial 27 finished with value: 0.006472730571550004 and parameters: {'hidden_dim_h': 32, 'dropout': 0.12588902132387486, 'batch_size': 444, 'n_epochs': 131}. Best is trial 11 with value: 0.669534527122123.


Epoch 131/131, Loss: 0.2590652185457724
0.006472730571550004
Build model with 3 layers of attention
Epoch 1/160, Loss: 1.3962618189473306
0.5509895127809293
Epoch 11/160, Loss: 0.6943279254821039
0.6103824522522153
Epoch 21/160, Loss: 0.40771342285217776
0.6077037655389675
Epoch 31/160, Loss: 0.2694371246522473
0.6131103471596041
Epoch 41/160, Loss: 0.2128311655213756
0.6173731049803183
Epoch 51/160, Loss: 0.26128617986556024
0.0098252599284595
Epoch 61/160, Loss: 0.2578366509368343
0.017659615539934077
Epoch 71/160, Loss: 0.2563948251547352
0.02181345837932175
Epoch 81/160, Loss: 0.25692025163481313
0.022887536030218033
Epoch 91/160, Loss: 0.2551091298941643
0.03114078995535341
Epoch 101/160, Loss: 0.2527100203498717
0.045896795326805524
Epoch 111/160, Loss: 0.2566933107952918
0.014848900320419323
Epoch 121/160, Loss: 0.26069261229807333
0.001216769426308333
Epoch 131/160, Loss: 0.259968297616128
0.0060703933466190844
Epoch 141/160, Loss: 0.25912739624900205
0.010752922662159147
Epoch

[I 2024-01-23 03:08:11,483] Trial 28 finished with value: 0.012175281763362432 and parameters: {'hidden_dim_h': 22, 'dropout': 0.09348595736283503, 'batch_size': 789, 'n_epochs': 160}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/95, Loss: 0.20046484649181365
0.5292783038787672
Epoch 11/95, Loss: 0.15092731714248658
0.5965768343597011
Epoch 21/95, Loss: 0.13270514369010925
0.5756474298373674
Epoch 31/95, Loss: 0.11444658488035202
0.6340120525253348
Epoch 41/95, Loss: 0.10415975630283356
0.6377095652498658
Epoch 51/95, Loss: 0.2699004673957825
0.0010605058684772495
Epoch 61/95, Loss: 0.2715963858366013
6.588263022218041e-05
Epoch 71/95, Loss: 0.2555348926782608
0.011583360877573081
Epoch 81/95, Loss: 0.2632039666175842
6.005461247591636e-05
Epoch 91/95, Loss: 0.26641252160072326
0.001486490018740068


[I 2024-01-23 03:08:56,529] Trial 29 finished with value: 0.001486490018740068 and parameters: {'hidden_dim_h': 28, 'dropout': 0.06884646887898961, 'batch_size': 994, 'n_epochs': 95}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/50, Loss: 0.1836080888043279
0.5280711519525227
Epoch 11/50, Loss: 0.13365926949874216
0.6188306265847829
Epoch 21/50, Loss: 0.11703333744536275
0.6277718804188914
Epoch 31/50, Loss: 0.2605796482252038
0.012006578441004598
Epoch 41/50, Loss: 0.2549272419317909
0.01593275415412962


[I 2024-01-23 03:09:17,298] Trial 30 finished with value: 0.01593275415412962 and parameters: {'hidden_dim_h': 17, 'dropout': 0.0634821025070961, 'batch_size': 1080, 'n_epochs': 50}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/115, Loss: 1.4804109334945679
0.5116858720073142
Epoch 11/115, Loss: 0.7319447504622596
0.5781609098367384
Epoch 21/115, Loss: 0.3625665583780834
0.6192601883921859
Epoch 31/115, Loss: 0.20649803802371025
0.6003605501685935
Epoch 41/115, Loss: 0.1539259089955262
0.6324011980602251
Epoch 51/115, Loss: 0.138731310410159
0.6410569558599262
Epoch 61/115, Loss: 0.13297917055232183
0.6413649702017405
Epoch 71/115, Loss: 0.1307355865303959
0.6283645249616371
Epoch 81/115, Loss: 0.12537786204900062
0.636076926015559
Epoch 91/115, Loss: 0.12029204624039787
0.6447730615620582
Epoch 101/115, Loss: 0.11624653876892158
0.6461432908769006
Epoch 111/115, Loss: 0.11096664039152009
0.6483572235031512


[I 2024-01-23 03:10:01,210] Trial 31 finished with value: 0.6483572235031512 and parameters: {'hidden_dim_h': 11, 'dropout': 0.1131365516433812, 'batch_size': 873, 'n_epochs': 115}. Best is trial 11 with value: 0.669534527122123.


Build model with 3 layers of attention
Epoch 1/108, Loss: 0.4001644452412923
0.5237231012876311
Epoch 11/108, Loss: 0.22814106223759828
0.5881460687900352
Epoch 21/108, Loss: 0.19894796830636483
0.6181130620389236
Epoch 31/108, Loss: 0.17537927241237075
0.6363856944468158
Epoch 41/108, Loss: 0.15577907308384223
0.6347468226937417
Epoch 51/108, Loss: 0.1361549918850263
0.6413064177326316
Epoch 61/108, Loss: 0.12072445397023801
0.6601120403027722
Epoch 71/108, Loss: 0.10846934936664722
0.6601566714087791
Epoch 81/108, Loss: 0.1104615107178688
0.6495343519980565
Epoch 91/108, Loss: 0.09419770180075257
0.6617031312853751
Epoch 101/108, Loss: 0.08915967632223058
0.6781286993983072


[I 2024-01-23 03:10:42,051] Trial 32 finished with value: 0.6781286993983072 and parameters: {'hidden_dim_h': 12, 'dropout': 0.08864922516694447, 'batch_size': 906, 'n_epochs': 108}. Best is trial 32 with value: 0.6781286993983072.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/83, Loss: 3.161350635921254
0.5093919611309469
Epoch 11/83, Loss: 1.7816093493910397
0.5358480829934436
Epoch 21/83, Loss: 1.001566438113942
0.5757778635144426
Epoch 31/83, Loss: 0.5526698021327748
0.0029907574850260997
Epoch 41/83, Loss: 0.34929121943081126
0.009881152795197655
Epoch 51/83, Loss: 0.2799431497559828
0.004001049603862373
Epoch 61/83, Loss: 0.2621839725795914
0.011448746110014904
Epoch 71/83, Loss: 0.25806435241418724
0.00847263013629096
Epoch 81/83, Loss: 0.2616004838662989
0.0021805522037696315


[I 2024-01-23 03:11:15,551] Trial 33 finished with value: 0.0021805522037696315 and parameters: {'hidden_dim_h': 13, 'dropout': 0.07870028722034424, 'batch_size': 707, 'n_epochs': 83}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/101, Loss: 0.45922597911622787
0.5246827555223919
Epoch 11/101, Loss: 0.25024218802098874
0.5990368134054095
Epoch 21/101, Loss: 0.2124499198463228
0.6220341367508204
Epoch 31/101, Loss: 0.19169359240267012
0.6205623783167256
Epoch 41/101, Loss: 0.1672329775713108
0.6407914386142658
Epoch 51/101, Loss: 0.15039468418668817
0.6370868217197603
Epoch 61/101, Loss: 0.13075801730155945
0.6447253290155711
Epoch 71/101, Loss: 0.12514094070152001
0.6256634875681524
Epoch 81/101, Loss: 0.11140986504378142
0.6468926037770497
Epoch 91/101, Loss: 0.10385299860327332
0.6559890387226084


[I 2024-01-23 03:11:54,797] Trial 34 finished with value: 0.6596870932721972 and parameters: {'hidden_dim_h': 16, 'dropout': 0.059104859469426584, 'batch_size': 917, 'n_epochs': 101}. Best is trial 32 with value: 0.6781286993983072.


Epoch 101/101, Loss: 0.09762907773256302
0.6596870932721972
Build model with 3 layers of attention
Epoch 1/244, Loss: 0.4802657256523768
0.5395494451841811
Epoch 11/244, Loss: 0.20503394454717636
0.6228931860269291
Epoch 21/244, Loss: 0.16584120591481527
0.6311752072888275
Epoch 31/244, Loss: 0.1512160802880923
0.6436930331053146
Epoch 41/244, Loss: 0.13804430663585662
0.6491523568343394
Epoch 51/244, Loss: 0.12576385512948035
0.6513039743159494
Epoch 61/244, Loss: 0.11398449117938678
0.6558073907463676
Epoch 71/244, Loss: 0.10819026877482732
0.6578950408715916
Epoch 81/244, Loss: 0.09650153840581575
0.6685937099016873
Epoch 91/244, Loss: 0.09351351285974184
0.6650066893878256
Epoch 101/244, Loss: 0.08745476404825846
0.6733387552331729
Epoch 111/244, Loss: 0.09351998344063758
0.6569053384349727
Epoch 121/244, Loss: 0.08522197132309278
0.6692174002875231
Epoch 131/244, Loss: 0.08059653490781785
0.6857206037737777
Epoch 141/244, Loss: 0.07880709767341613
0.6899086872191794
Epoch 151/244,

[I 2024-01-23 03:13:36,665] Trial 35 finished with value: 0.040473278469885805 and parameters: {'hidden_dim_h': 20, 'dropout': 0.08870578654911615, 'batch_size': 802, 'n_epochs': 244}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/65, Loss: 1.5289199870565664
0.509597606332498
Epoch 11/65, Loss: 0.872372391431228
0.5527996232837215
Epoch 21/65, Loss: 0.5208940091340438
0.5830815334634288
Epoch 31/65, Loss: 0.3413022173487622
0.5933873065684013
Epoch 41/65, Loss: 0.26171187732530676
0.6062788820912832
Epoch 51/65, Loss: 0.22954479702140973
0.6047260413385468
Epoch 61/65, Loss: 0.21863139647504556
0.5433860715882954


[I 2024-01-23 03:14:01,254] Trial 36 finished with value: 0.5433860715882954 and parameters: {'hidden_dim_h': 12, 'dropout': 0.17494731219798187, 'batch_size': 1046, 'n_epochs': 65}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/299, Loss: 0.6482439935207367
0.5517370986156728
Epoch 11/299, Loss: 0.1859713111604963
0.6152987330717212
Epoch 21/299, Loss: 0.1309382672465983
0.6328209415505698
Epoch 31/299, Loss: 0.1219465345853851
0.6488137297331067
Epoch 41/299, Loss: 0.12090782874396869
0.6222896451148217
Epoch 51/299, Loss: 0.2624956262963159
0.0009313537261845479
Epoch 61/299, Loss: 0.2669710658845447
0.0038495540214976203
Epoch 71/299, Loss: 0.2614550675664629
0.003555369408492243
Epoch 81/299, Loss: 0.2608795389533043
0.0025482077159891003
Epoch 91/299, Loss: 0.2586700302504358
0.0026888198196462565
Epoch 101/299, Loss: 0.26304767351774944
0.0013807109107673813
Epoch 111/299, Loss: 0.262371464854195
0.0009493836801553495
Epoch 121/299, Loss: 0.26031158545187544
0.003995440360304054
Epoch 131/299, Loss: 0.2627801306190945
0.0017173964579857773
Epoch 141/299, Loss: 0.26436273221458706
0.00026986677545950353
Epoch 151/299, Loss: 0.2634628588954608
0.0013130073876

[I 2024-01-23 03:16:22,760] Trial 37 finished with value: 0.0013961086123271968 and parameters: {'hidden_dim_h': 25, 'dropout': 0.11442491311750198, 'batch_size': 570, 'n_epochs': 299}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/147, Loss: 1.4954719382363397
0.5472849041528466
Epoch 11/147, Loss: 0.6537442400648787
0.5957949389874297
Epoch 21/147, Loss: 0.35681724870527115
0.6203782183040383
Epoch 31/147, Loss: 0.2770812430897275
0.634646170427003
Epoch 41/147, Loss: 0.25051956966116623
0.6320136725360651
Epoch 51/147, Loss: 0.22926994393000732
0.5978694718923526
Epoch 61/147, Loss: 0.2058595024250649
0.6394905536756706
Epoch 71/147, Loss: 0.1876013327289272
0.6063121832856625
Epoch 81/147, Loss: 0.1660249724581435
0.6373116575725101
Epoch 91/147, Loss: 0.14585141555683032
0.655064542029693
Epoch 101/147, Loss: 0.12838498925840533
0.6619006512066933
Epoch 111/147, Loss: 0.11399318096605507
0.6699171869429076
Epoch 121/147, Loss: 0.10285623371601105
0.6644624555722444
Epoch 131/147, Loss: 0.10004386688406403
0.6479931565409041
Epoch 141/147, Loss: 0.0918863772137745
0.6705641146681387


[I 2024-01-23 03:17:24,941] Trial 38 finished with value: 0.6705641146681387 and parameters: {'hidden_dim_h': 17, 'dropout': 0.14492549209077152, 'batch_size': 649, 'n_epochs': 147}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/181, Loss: 0.4541101106525173
0.5436011619769548
Epoch 11/181, Loss: 0.25776288989027574
0.00579465147724206
Epoch 21/181, Loss: 0.2629213461861808
1.348810404522554e-06
Epoch 31/181, Loss: 0.2595091964718858
2.486916309826055e-06
Epoch 41/181, Loss: 0.2615925891864935
0.00032083971945401796
Epoch 51/181, Loss: 0.25607361569559783
0.011711377519622064
Epoch 61/181, Loss: 0.256136401076994
0.013672652730511018
Epoch 71/181, Loss: 0.2553834751160187
0.0204350464047723
Epoch 81/181, Loss: 0.26282223247917447
0.0003756587275069927
Epoch 91/181, Loss: 0.2614017299999147
0.0017648986386671628
Epoch 101/181, Loss: 0.2609879791560258
0.0008741780066979412
Epoch 111/181, Loss: 0.25949375816350856
0.0008043677999634613
Epoch 121/181, Loss: 0.25680348956020627
0.00039117242070284486
Epoch 131/181, Loss: 0.255065365699974
0.004060863490697021
Epoch 141/181, Loss: 0.2551366094301438
0.0028146783435038193
Epoch 151/181, Loss: 0.2554458319788148
0.004512

[I 2024-01-23 03:20:02,223] Trial 39 finished with value: 0.002513836478345326 and parameters: {'hidden_dim_h': 18, 'dropout': 0.14502742797966578, 'batch_size': 142, 'n_epochs': 181}. Best is trial 32 with value: 0.6781286993983072.


Epoch 181/181, Loss: 0.25445581364208425
0.002513836478345326
Build model with 3 layers of attention
Epoch 1/153, Loss: 0.33196965027862874
0.5330995263981693
Epoch 11/153, Loss: 0.17976017841990566
0.603935593702138
Epoch 21/153, Loss: 0.12905973867631296
0.6351288391284401
Epoch 31/153, Loss: 0.10401055814934448
0.6424793399310098
Epoch 41/153, Loss: 0.1644109092967611
0.016002219643596795
Epoch 51/153, Loss: 0.26788336563278253
0.0033158948845515403
Epoch 61/153, Loss: 0.26929942948717467
0.002963843368746103
Epoch 71/153, Loss: 0.2655480438974542
0.004294454934698478
Epoch 81/153, Loss: 0.2651965998008218
0.0005240824587362952
Epoch 91/153, Loss: 0.26520591449569647
0.000628254904218583
Epoch 101/153, Loss: 0.2628102292057494
0.002506501641188651
Epoch 111/153, Loss: 0.2606624360655395
0.003031181080077746
Epoch 121/153, Loss: 0.25921851102734955
0.005564194126012536
Epoch 131/153, Loss: 0.2588170846583138
0.0028024864756455866
Epoch 141/153, Loss: 0.25997416822003644
0.00199508367

[I 2024-01-23 03:21:27,165] Trial 40 finished with value: 0.002679409392741068 and parameters: {'hidden_dim_h': 30, 'dropout': 0.16138123772914653, 'batch_size': 340, 'n_epochs': 153}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/130, Loss: 0.49640984712420283
0.5360141311462672
Epoch 11/130, Loss: 0.2635964171306507
0.6105716295778815
Epoch 21/130, Loss: 0.2233436518424266
0.6239032749875333
Epoch 31/130, Loss: 0.1895282171867989
0.6180560665277058
Epoch 41/130, Loss: 0.15762240902797595
0.6424573305707936
Epoch 51/130, Loss: 0.13218005404279037
0.6483981332658104
Epoch 61/130, Loss: 0.25515222025884166
0.012649604254335556
Epoch 71/130, Loss: 0.2552531542810234
0.01319075734102976
Epoch 81/130, Loss: 0.24967699477801453
0.023795055205569755
Epoch 91/130, Loss: 0.2601026127467284
0.005975966750690977
Epoch 101/130, Loss: 0.2608767759961051
0.004593119187915739
Epoch 111/130, Loss: 0.2606829948521949
0.0035610518556036955
Epoch 121/130, Loss: 0.26109317871364385
0.0050437948707013494


[I 2024-01-23 03:22:20,987] Trial 41 finished with value: 0.0050437948707013494 and parameters: {'hidden_dim_h': 16, 'dropout': 0.06917445044163341, 'batch_size': 654, 'n_epochs': 130}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/108, Loss: 1.4054862352517934
0.5323424254928201
Epoch 11/108, Loss: 0.5217701265445123
0.5887981864999128
Epoch 21/108, Loss: 0.22188149163356194
0.6233592682629805
Epoch 31/108, Loss: 0.15291408965220818
0.6346941829771185
Epoch 41/108, Loss: 0.1394508184912877
0.6458928417757859
Epoch 51/108, Loss: 0.13201973969355607
0.650292228972866
Epoch 61/108, Loss: 0.12376410915301396
0.6576531508877597
Epoch 71/108, Loss: 0.11907353271276523
0.6550117211452186
Epoch 81/108, Loss: 0.11123016419319007
0.6628778397694665
Epoch 91/108, Loss: 0.10175883827301171
0.6781513792535716
Epoch 101/108, Loss: 0.09674390634665123
0.6774506829822893


[I 2024-01-23 03:23:04,125] Trial 42 finished with value: 0.6774506829822893 and parameters: {'hidden_dim_h': 12, 'dropout': 0.051191212289173296, 'batch_size': 618, 'n_epochs': 108}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/211, Loss: 1.3465035988224878
0.52227257787368
Epoch 11/211, Loss: 0.511116187605593
0.5769953893621097
Epoch 21/211, Loss: 0.210114107777675
0.6195719912444231
Epoch 31/211, Loss: 0.26999635125199956
0.0018010569938747645
Epoch 41/211, Loss: 0.26295598306589657
0.0021356917944977596
Epoch 51/211, Loss: 0.2579078438381354
0.015257270864904792
Epoch 61/211, Loss: 0.25712855408589047
0.014907290567343313
Epoch 71/211, Loss: 0.25633490623699295
0.016378171320354594
Epoch 81/211, Loss: 0.2601291872560978
0.004907335053182598
Epoch 91/211, Loss: 0.2556876300109757
0.013179495750870919
Epoch 101/211, Loss: 0.25514910494287807
0.011404469832217594
Epoch 111/211, Loss: 0.25589000516467625
0.013565322283544122
Epoch 121/211, Loss: 0.25584089962972534
0.014936045739386696
Epoch 131/211, Loss: 0.25242061043779057
0.025711244288736813
Epoch 141/211, Loss: 0.25555407297280097
0.01694434865252313
Epoch 151/211, Loss: 0.2560042155285676
0.018455538829526

[I 2024-01-23 03:24:27,343] Trial 43 finished with value: 0.018890733647815613 and parameters: {'hidden_dim_h': 12, 'dropout': 0.13457027033513236, 'batch_size': 675, 'n_epochs': 211}. Best is trial 32 with value: 0.6781286993983072.


Epoch 211/211, Loss: 0.2530509779850642
0.018890733647815613
Build model with 3 layers of attention
Epoch 1/145, Loss: 1.1626221552873268
0.5274985855803651
Epoch 11/145, Loss: 0.4632936693154849
0.00029919485911731437
Epoch 21/145, Loss: 0.2856415349703569
0.004762125636128254
Epoch 31/145, Loss: 0.26164243695063466
0.005985754971488141
Epoch 41/145, Loss: 0.2584907313187917
0.008959720746225353
Epoch 51/145, Loss: 0.25756437121293485
0.01079342575580534
Epoch 61/145, Loss: 0.2625192900498708
0.00015313761786330012
Epoch 71/145, Loss: 0.2624843942049222
0.000589639576568991
Epoch 81/145, Loss: 0.26168739604644287
0.0009601725725018996
Epoch 91/145, Loss: 0.26173158906973326
0.0005561142191298378
Epoch 101/145, Loss: 0.26138612933647937
0.003771262365954817
Epoch 111/145, Loss: 0.26054990941133255
0.0013479376224384836
Epoch 121/145, Loss: 0.2620948320015883
0.0015953249633497363
Epoch 131/145, Loss: 0.2609171252220105
0.0018409309934694466
Epoch 141/145, Loss: 0.26048883222616637
0.00

[I 2024-01-23 03:25:35,607] Trial 44 finished with value: 0.0011536357305391906 and parameters: {'hidden_dim_h': 26, 'dropout': 0.050160674341717094, 'batch_size': 622, 'n_epochs': 145}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/108, Loss: 0.2658615446752972
0.5448865259165824
Epoch 11/108, Loss: 0.17609306673208872
0.6048107108278202
Epoch 21/108, Loss: 0.13449263407124412
0.6223310521730364
Epoch 31/108, Loss: 0.10795817706320021
0.6399620828510626
Epoch 41/108, Loss: 0.09645901885297563
0.650403803707445
Epoch 51/108, Loss: 0.08965217255883746
0.6576576060312721
Epoch 61/108, Loss: 0.08723554660876592
0.6650583507773397
Epoch 71/108, Loss: 0.09121262364917332
0.6514992946942356
Epoch 81/108, Loss: 0.08798742724789513
0.661208707457324
Epoch 91/108, Loss: 0.08992068005932702
0.6561890714746474
Epoch 101/108, Loss: 0.08496749103069305
0.6685024414711414


[I 2024-01-23 03:26:21,376] Trial 45 finished with value: 0.6685024414711414 and parameters: {'hidden_dim_h': 15, 'dropout': 0.1129214756374107, 'batch_size': 537, 'n_epochs': 108}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/177, Loss: 0.6088739431330136
0.533609727886463
Epoch 11/177, Loss: 0.2377015565122877
0.607233966965786
Epoch 21/177, Loss: 0.19644401807870185
0.6306907659138641
Epoch 31/177, Loss: 0.15841251292398997
0.648979369952126
Epoch 41/177, Loss: 0.1437611100929124
0.00038450935557025114
Epoch 51/177, Loss: 0.25381189132375376
0.021116035759863386
Epoch 61/177, Loss: 0.24991524485605104
0.022154394715918374
Epoch 71/177, Loss: 0.2469976818455117
0.02203682235283781
Epoch 81/177, Loss: 0.2566274292767048
0.008221040328679301
Epoch 91/177, Loss: 0.25604129768908024
0.0101357228580156
Epoch 101/177, Loss: 0.2562806361487934
0.010448115039923118
Epoch 111/177, Loss: 0.254568719172052
0.01279108700003569
Epoch 121/177, Loss: 0.2553046239273889
0.0074647127381806955
Epoch 131/177, Loss: 0.2575521543622017
0.005981265165650753
Epoch 141/177, Loss: 0.2541187147476843
0.009016010473551031
Epoch 151/177, Loss: 0.25502924913806574
0.015372248531004343
Epo

[I 2024-01-23 03:27:39,542] Trial 46 finished with value: 0.008100991379512156 and parameters: {'hidden_dim_h': 15, 'dropout': 0.11309490134108746, 'batch_size': 428, 'n_epochs': 177}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/123, Loss: 0.43123121898282657
0.547633228057613
Epoch 11/123, Loss: 0.12781829403882677
0.6246664428415948
Epoch 21/123, Loss: 0.1109322350133549
0.6415828049530565
Epoch 31/123, Loss: 0.10462797636335547
0.6532973814757046
Epoch 41/123, Loss: 0.10140960545025089
0.6452488469465556
Epoch 51/123, Loss: 0.0961081747981635
0.6632241368769033
Epoch 61/123, Loss: 0.090696435760368
0.6707684069391352
Epoch 71/123, Loss: 0.08711425536735491
0.6733437325484901
Epoch 81/123, Loss: 0.08517569066448645
0.6780636211141714
Epoch 91/123, Loss: 0.08594468997960741
0.6738329501098073
Epoch 101/123, Loss: 0.08294030600650744
0.6788799364350142
Epoch 111/123, Loss: 0.2644238258627328
0.0020357045333334487
Epoch 121/123, Loss: 0.2580576078458266
0.015825617165901536


[I 2024-01-23 03:28:33,799] Trial 47 finished with value: 0.015825617165901536 and parameters: {'hidden_dim_h': 17, 'dropout': 0.15899035013703675, 'batch_size': 551, 'n_epochs': 123}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/108, Loss: 1.4535759127869898
0.5133026298705937
Epoch 11/108, Loss: 0.4240367309171326
0.5903988001784646
Epoch 21/108, Loss: 0.1731626783706704
0.6321092807040337
Epoch 31/108, Loss: 0.1388894821308097
0.6383576744982213
Epoch 41/108, Loss: 0.1295085493095067
0.642281842527642
Epoch 51/108, Loss: 0.12068787125908599
0.66004948868474
Epoch 61/108, Loss: 0.11496566905050862
0.6517144861006486
Epoch 71/108, Loss: 0.10559245822381
0.6640887040126429
Epoch 81/108, Loss: 0.0993397976366841
0.6641838422963215
Epoch 91/108, Loss: 0.2630896069565598
0.003957124860239679
Epoch 101/108, Loss: 0.25844588845360034
0.011968683084891059


[I 2024-01-23 03:29:20,554] Trial 48 finished with value: 0.011968683084891059 and parameters: {'hidden_dim_h': 13, 'dropout': 0.10334956037294021, 'batch_size': 489, 'n_epochs': 108}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/107, Loss: 0.2477191358804703
9.224791881686642e-05
Epoch 11/107, Loss: 0.2619984742254019
0.001718664916317098
Epoch 21/107, Loss: 0.2624397127578656
2.6959711810049238e-05
Epoch 31/107, Loss: 0.2619595387329658
0.001294792901808597
Epoch 41/107, Loss: 0.2617695531497399
0.002453969399811008
Epoch 51/107, Loss: 0.2624397243062655
0.00014591499560964185
Epoch 61/107, Loss: 0.26268804346521696
9.55560589526784e-05
Epoch 71/107, Loss: 0.2621325695266326
0.0004271415890661094
Epoch 81/107, Loss: 0.26166388193766277
4.660572473295734e-06
Epoch 91/107, Loss: 0.26182119076450666
1.3087820381926477e-05
Epoch 101/107, Loss: 0.26174430673321086
3.808431701129635e-05


[I 2024-01-23 03:30:30,154] Trial 49 finished with value: 3.808431701129635e-05 and parameters: {'hidden_dim_h': 19, 'dropout': 0.08920933244994567, 'batch_size': 200, 'n_epochs': 107}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/143, Loss: 0.19040806662468684
0.5154653545128752
Epoch 11/143, Loss: 0.1432319778416838
0.573522052224856
Epoch 21/143, Loss: 0.13089310413315183
0.5756943044990538
Epoch 31/143, Loss: 0.11362841366125005
0.599218527761285
Epoch 41/143, Loss: 0.11581279763153621
0.6076489157568511
Epoch 51/143, Loss: 0.10019238426217011
0.6235636608030116
Epoch 61/143, Loss: 0.10927879491022655
0.6227623329780753
Epoch 71/143, Loss: 0.10075181616204125
0.6381943299958138
Epoch 81/143, Loss: 0.10125583694094703
0.6349777645600362
Epoch 91/143, Loss: 0.09374191363652547
0.6407594041649679
Epoch 101/143, Loss: 0.09378902915687788
0.6507562766595671
Epoch 111/143, Loss: 0.09246960743552163
0.6547503169936909
Epoch 121/143, Loss: 0.10763364338449069
0.6119220423750419
Epoch 131/143, Loss: 0.09638941855657668
0.6345761048060311
Epoch 141/143, Loss: 0.1136150425743489
0.6400039515567472


[I 2024-01-23 03:31:27,316] Trial 50 finished with value: 0.6400039515567472 and parameters: {'hidden_dim_h': 10, 'dropout': 0.23557502320857027, 'batch_size': 582, 'n_epochs': 143}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/86, Loss: 0.2041407865472138
0.5577182085418159
Epoch 11/86, Loss: 0.26168037625029683
0.006371436757367224
Epoch 21/86, Loss: 0.25769098475575447
0.01610510818937342
Epoch 31/86, Loss: 0.256598262116313
0.01775585854056409
Epoch 41/86, Loss: 0.2568392110988498
0.014795050557898403
Epoch 51/86, Loss: 0.25671214796602726
0.01615442374516444
Epoch 61/86, Loss: 0.25662067579105496
0.008119845787181532
Epoch 71/86, Loss: 0.25944008911028504
0.0049583149253866945
Epoch 81/86, Loss: 0.26290437253192067
0.0007094657048882054


[I 2024-01-23 03:32:12,650] Trial 51 finished with value: 0.0007094657048882054 and parameters: {'hidden_dim_h': 37, 'dropout': 0.07153543750859274, 'batch_size': 752, 'n_epochs': 86}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/125, Loss: 0.7335596116813453
0.43887897273618226
Epoch 11/125, Loss: 0.2891504152401074
0.6028328913366089
Epoch 21/125, Loss: 0.2120151745306479
0.6256492573704613
Epoch 31/125, Loss: 0.19071689288358432
0.6186581963850644
Epoch 41/125, Loss: 0.17056826320854393
0.6339985685298981
Epoch 51/125, Loss: 0.15069078070086403
0.6411013499983527
Epoch 61/125, Loss: 0.1372152697798368
0.6265815924546335
Epoch 71/125, Loss: 0.11948582006467355
0.649383654179629
Epoch 81/125, Loss: 0.10602094514949902
0.6738826770899877
Epoch 91/125, Loss: 0.09564131035192593
0.6702948972259972
Epoch 101/125, Loss: 0.09264250180205784
0.668233305541751
Epoch 111/125, Loss: 0.08258617528387018
0.68342513341385
Epoch 121/125, Loss: 0.26088400869756134
0.006477429384387552


[I 2024-01-23 03:33:07,725] Trial 52 finished with value: 0.006477429384387552 and parameters: {'hidden_dim_h': 23, 'dropout': 0.09444701523174454, 'batch_size': 650, 'n_epochs': 125}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/73, Loss: 1.2732460547466666
0.3697420522929288
Epoch 11/73, Loss: 0.34355926392029745
0.5745437560148712
Epoch 21/73, Loss: 0.14671704081856474
0.6184670175565485
Epoch 31/73, Loss: 0.12375545319245786
0.6237096915118142
Epoch 41/73, Loss: 0.11652232341620387
0.6441670883541861
Epoch 51/73, Loss: 0.10988628027998672
0.6564458158466275
Epoch 61/73, Loss: 0.10340491682291031
0.6592450375138446
Epoch 71/73, Loss: 0.0981173218817127
0.667112597789364


[I 2024-01-23 03:33:38,916] Trial 53 finished with value: 0.667112597789364 and parameters: {'hidden_dim_h': 12, 'dropout': 0.11109060197621198, 'batch_size': 494, 'n_epochs': 73}. Best is trial 32 with value: 0.6781286993983072.


Build model with 3 layers of attention
Epoch 1/105, Loss: 0.24184820440507704
0.5216125481905831
Epoch 11/105, Loss: 0.1476527051098885
0.6008564164439768
Epoch 21/105, Loss: 0.10833369676143892
0.6336041797685811
Epoch 31/105, Loss: 0.09461369978324059
0.6440757333711522
Epoch 41/105, Loss: 0.09134715410970873
0.6467642175427344
Epoch 51/105, Loss: 0.08846545796240529
0.6598168744137576
Epoch 61/105, Loss: 0.08644293056380364
0.6624625733198978
Epoch 71/105, Loss: 0.08422830784993787
0.6673825881495037
Epoch 81/105, Loss: 0.08513783975954979
0.6739140742153673
Epoch 91/105, Loss: 0.08276774289627228
0.673462608485468
Epoch 101/105, Loss: 0.08108990507260445
0.6891898894503475


[I 2024-01-23 03:34:27,252] Trial 54 finished with value: 0.6891898894503475 and parameters: {'hidden_dim_h': 15, 'dropout': 0.05064440723340358, 'batch_size': 386, 'n_epochs': 105}. Best is trial 54 with value: 0.6891898894503475.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/196, Loss: 0.4417034983634949
0.5472270970515245
Epoch 11/196, Loss: 0.24767030531265696
0.5705663877454413
Epoch 21/196, Loss: 0.18036468058335978
0.6184330646531105
Epoch 31/196, Loss: 0.13315452745214837
0.6246686134830449
Epoch 41/196, Loss: 0.10813450727794992
0.6373357306715163
Epoch 51/196, Loss: 0.09543857247125907
0.6491961910291273
Epoch 61/196, Loss: 0.09135757667607948
0.6538753119160228
Epoch 71/196, Loss: 0.08818614910371968
0.6548087283417132
Epoch 81/196, Loss: 0.08888235749279866
0.6582577304565119
Epoch 91/196, Loss: 0.08879954612157384
0.6506218761094213
Epoch 101/196, Loss: 0.08336466917249023
0.6713385738118213
Epoch 111/196, Loss: 0.08079682998969907
0.6773123220358944
Epoch 121/196, Loss: 0.08141357060827192
0.6763124416834477
Epoch 131/196, Loss: 0.07879036029831307
0.6847200573094745
Epoch 141/196, Loss: 0.0834605791529671
0.679976751941048
Epoch 151/196, Loss: 0.0800609881

[I 2024-01-23 03:35:56,853] Trial 55 finished with value: 0.6918828928720429 and parameters: {'hidden_dim_h': 15, 'dropout': 0.05931365817440287, 'batch_size': 397, 'n_epochs': 196}. Best is trial 55 with value: 0.6918828928720429.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/201, Loss: 0.38697403864782365
0.5336117038023711
Epoch 11/201, Loss: 0.1803233586862439
0.6082303092013066
Epoch 21/201, Loss: 0.14940065096636287
0.6355230396783924
Epoch 31/201, Loss: 0.1233910758231507
0.6338866017886394
Epoch 41/201, Loss: 0.10347670719760363
0.6489380074353459
Epoch 51/201, Loss: 0.09113121948769835
0.6601197010987456
Epoch 61/201, Loss: 0.08624754369747444
0.6693698865615323
Epoch 71/201, Loss: 0.08491910456633958
0.6702822499744214
Epoch 81/201, Loss: 0.08336301856353635
0.6801873526555412
Epoch 91/201, Loss: 0.08096885901005542
0.6860275940236071
Epoch 101/201, Loss: 0.0846900111827694
0.6789047959606951
Epoch 111/201, Loss: 0.07849551615167837
0.687065611115311
Epoch 121/201, Loss: 0.08191681763187783
0.6787425161594434
Epoch 131/201, Loss: 0.08502339290790871
0.6709665479736648
Epoch 141/201, Loss: 0.0778642936808164
0.6949942123505695
Epoch 151/201, Loss: 0.082212211777

[I 2024-01-23 03:37:28,249] Trial 56 finished with value: 0.6996727337879708 and parameters: {'hidden_dim_h': 14, 'dropout': 0.05262626665709794, 'batch_size': 395, 'n_epochs': 201}. Best is trial 56 with value: 0.6996727337879708.


Epoch 201/201, Loss: 0.07309756776104208
0.6996727337879708
Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/235, Loss: 0.5996391034644583
0.5467693308677077
Epoch 11/235, Loss: 0.24633413034936655
0.022897134775418392
Epoch 21/235, Loss: 0.2637566645508227
0.0008292293911702567
Epoch 31/235, Loss: 0.2612097600232
0.0011903855352113897
Epoch 41/235, Loss: 0.2585173077557398
0.0008269067295895865
Epoch 51/235, Loss: 0.26191848629842635
0.0001894168959864732
Epoch 61/235, Loss: 0.26425452643762465
1.4056059575786008e-05
Epoch 71/235, Loss: 0.26205376373684924
0.0007412162478028946
Epoch 81/235, Loss: 0.259435019577327
0.0015279225582964947
Epoch 91/235, Loss: 0.258017771107995
0.0037981826940650604
Epoch 101/235, Loss: 0.2595772882518561
0.00028290794070419634
Epoch 111/235, Loss: 0.25929413201368373
0.0018578540280871148
Epoch 121/235, Loss: 0.25865268934032193
0.0017155713300529792
Epoch 131/235, Loss: 0.2610633851069471
0.00057372741353165
Epoch

[I 2024-01-23 03:39:38,288] Trial 57 finished with value: 0.009962856923625498 and parameters: {'hidden_dim_h': 14, 'dropout': 0.06625696024730376, 'batch_size': 260, 'n_epochs': 235}. Best is trial 56 with value: 0.6996727337879708.


Build model with 3 layers of attention
Epoch 1/197, Loss: 0.16536163964441844
0.5677106195227387
Epoch 11/197, Loss: 0.10526349587099892
0.6315127900391145
Epoch 21/197, Loss: 0.09259623012372425
0.649477004047403
Epoch 31/197, Loss: 0.09025339218122619
0.6561060827940769
Epoch 41/197, Loss: 0.2565767537270274
0.010722639580815416
Epoch 51/197, Loss: 0.25843232763665064
0.008293510191028456
Epoch 61/197, Loss: 0.26137731628758565
2.622174803416124e-05
Epoch 71/197, Loss: 0.2608521112373897
0.007733239103237498
Epoch 81/197, Loss: 0.2600003957748413
0.005683850096948893
Epoch 91/197, Loss: 0.2646451104964529
0.00029989145168759753
Epoch 101/197, Loss: 0.2639095566102437
0.003681872596582174
Epoch 111/197, Loss: 0.2625088042446545
0.0044913846957408245
Epoch 121/197, Loss: 0.26385707748787746
0.0015552357737325412
Epoch 131/197, Loss: 0.26216903754643034
0.0023326498455032525
Epoch 141/197, Loss: 0.2631880496229444
0.0018199126379548859
Epoch 151/197, Loss: 0.2620091461709568
0.001399912

[I 2024-01-23 03:41:17,924] Trial 58 finished with value: 0.009069018174127575 and parameters: {'hidden_dim_h': 20, 'dropout': 0.05004070065299202, 'batch_size': 343, 'n_epochs': 197}. Best is trial 56 with value: 0.6996727337879708.


Build model with 3 layers of attention
Epoch 1/226, Loss: 1.9925644640195168
0.5274876413976217
Epoch 11/226, Loss: 0.5123883359513041
0.6014989849140892
Epoch 21/226, Loss: 0.15691965704752228
0.6349806001971274
Epoch 31/226, Loss: 0.1126326201325756
0.6571486966470333
Epoch 41/226, Loss: 0.10797734192367327
0.6542029441665356
Epoch 51/226, Loss: 0.1041198898169954
0.646077594670312
Epoch 61/226, Loss: 0.09933713394201409
0.665273715135416
Epoch 71/226, Loss: 0.26133927879697183
0.007986048033916672
Epoch 81/226, Loss: 0.2640957241341219
0.0013901276286980787
Epoch 91/226, Loss: 0.2641041662733434
0.0014127203756103379
Epoch 101/226, Loss: 0.26654256223622014
0.001561091117712189
Epoch 111/226, Loss: 0.2663577368198815
0.00324305397563289
Epoch 121/226, Loss: 0.2631188163817939
0.0005761515739966706
Epoch 131/226, Loss: 0.26229705396345104
0.0002759079033109223
Epoch 141/226, Loss: 0.2618239658363795
0.0011923544315555925
Epoch 151/226, Loss: 0.26147046382144346
0.006340098341304645
E

[I 2024-01-23 03:43:05,091] Trial 59 finished with value: 0.002142500065820941 and parameters: {'hidden_dim_h': 18, 'dropout': 0.08052031594468757, 'batch_size': 407, 'n_epochs': 226}. Best is trial 56 with value: 0.6996727337879708.


Build model with 3 layers of attention
Epoch 1/201, Loss: 0.43099966296490205
0.5495421476271828
Epoch 11/201, Loss: 0.257319523774563
0.022377485982369504
Epoch 21/201, Loss: 0.2585072617264504
0.017368259253377476
Epoch 31/201, Loss: 0.2575742466018555
0.007826534181652927
Epoch 41/201, Loss: 0.25154646977465206
0.013283456349284168
Epoch 51/201, Loss: 0.24735270393021563
0.010722424290761028
Epoch 61/201, Loss: 0.25560681410926456
0.0034455003878298405
Epoch 71/201, Loss: 0.2594807390519913
0.009115230332117325
Epoch 81/201, Loss: 0.26343716665151273
0.000533404958068923
Epoch 91/201, Loss: 0.2616477833783373
0.0014474322490865893
Epoch 101/201, Loss: 0.26145383604663486
0.004404095712435485
Epoch 111/201, Loss: 0.26151516145848214
0.003122752715459836
Epoch 121/201, Loss: 0.2614589793250916
9.113148071052815e-07
Epoch 131/201, Loss: 0.25956394967246565
0.0011292736855438332
Epoch 141/201, Loss: 0.25974315071993687
0.0013641172905453564
Epoch 151/201, Loss: 0.2599077980886114
0.0012

[I 2024-01-23 03:44:58,101] Trial 60 finished with value: 0.00039483179729849817 and parameters: {'hidden_dim_h': 16, 'dropout': 0.06060394128204696, 'batch_size': 254, 'n_epochs': 201}. Best is trial 56 with value: 0.6996727337879708.


Epoch 201/201, Loss: 0.26178798555059635
0.00039483179729849817
Build model with 3 layers of attention
Epoch 1/169, Loss: 0.30923068835850687
0.4621201828348495
Epoch 11/169, Loss: 0.17799676089517533
0.6121847839046545
Epoch 21/169, Loss: 0.14157054489178042
0.6200802823287869
Epoch 31/169, Loss: 0.11388000112868124
0.6401629964235951
Epoch 41/169, Loss: 0.09788724431587804
0.6518327886029005
Epoch 51/169, Loss: 0.09044020971463572
0.6582183460568835
Epoch 61/169, Loss: 0.08717029866191649
0.6603902228498515
Epoch 71/169, Loss: 0.08416399696180897
0.6602006116212514
Epoch 81/169, Loss: 0.0837944682327009
0.6691318285851513
Epoch 91/169, Loss: 0.08197429896362367
0.671922891056674
Epoch 101/169, Loss: 0.08027114671084189
0.6865269991699547
Epoch 111/169, Loss: 0.08135802322818388
0.6756255384977867
Epoch 121/169, Loss: 0.07790573427994404
0.6817075447121229
Epoch 131/169, Loss: 0.08095819442983597
0.6804802976482434
Epoch 141/169, Loss: 0.07524509986321773
0.6965175526901287
Epoch 151/

[I 2024-01-23 03:46:14,508] Trial 61 finished with value: 0.6871099475093525 and parameters: {'hidden_dim_h': 11, 'dropout': 0.05051746767647149, 'batch_size': 385, 'n_epochs': 169}. Best is trial 56 with value: 0.6996727337879708.


Build model with 3 layers of attention
Epoch 1/188, Loss: 0.2938088913758596
0.5515644990498131
Epoch 11/188, Loss: 0.1809060571094354
0.6073090784737095
Epoch 21/188, Loss: 0.12548108759025733
0.623338902945228
Epoch 31/188, Loss: 0.10248517555495103
0.6381864889007488
Epoch 41/188, Loss: 0.09122339561581612
0.6533447752958803
Epoch 51/188, Loss: 0.09794004671275616
0.6384100476904615
Epoch 61/188, Loss: 0.08803291097283364
0.6630466141568481
Epoch 71/188, Loss: 0.08719308165212472
0.6604907356955226
Epoch 81/188, Loss: 0.08448386875291665
0.6700001319776011
Epoch 91/188, Loss: 0.09335963111370801
0.6471846650941863
Epoch 101/188, Loss: 0.08284709739188353
0.67320353368118
Epoch 111/188, Loss: 0.08533541734019916
0.6731726993277438
Epoch 121/188, Loss: 0.08064488247036934
0.6831166615855186
Epoch 131/188, Loss: 0.08323003550370535
0.6774659428165358
Epoch 141/188, Loss: 0.08171484358608723
0.6846026356710438
Epoch 151/188, Loss: 0.07818435269097487
0.6949615601055554
Epoch 161/188, Lo

[I 2024-01-23 03:47:37,949] Trial 62 finished with value: 0.6802915480271146 and parameters: {'hidden_dim_h': 12, 'dropout': 0.07523119344645024, 'batch_size': 403, 'n_epochs': 188}. Best is trial 56 with value: 0.6996727337879708.


Build model with 3 layers of attention
Epoch 1/168, Loss: 2.275922704127527
0.5169725735232729
Epoch 11/168, Loss: 0.7090648239658724
0.5897387352263493
Epoch 21/168, Loss: 0.25888809201217466
0.5920904975315233
Epoch 31/168, Loss: 0.1842208310000358
0.624363820546141
Epoch 41/168, Loss: 0.16560728175024833
0.6380686222627324
Epoch 51/168, Loss: 0.14772328878602675
0.6420876756241441
Epoch 61/168, Loss: 0.13071231351744744
0.6464922088727765
Epoch 71/168, Loss: 0.11231241134866592
0.6570843329825035
Epoch 81/168, Loss: 0.09999849166600935
0.6642639656881952
Epoch 91/168, Loss: 0.09338255863516562
0.6612220305018837
Epoch 101/168, Loss: 0.08535389121501677
0.6743489538547179
Epoch 111/168, Loss: 0.08367223196452664
0.6782828408384687
Epoch 121/168, Loss: 0.08146650452286966
0.685233975830279
Epoch 131/168, Loss: 0.07969117549157911
0.6888572314077905
Epoch 141/168, Loss: 0.08132617704329952
0.683864304093754
Epoch 151/168, Loss: 0.07745898106405812
0.6949283558638488
Epoch 161/168, Loss

[I 2024-01-23 03:48:53,964] Trial 63 finished with value: 0.6962206518882975 and parameters: {'hidden_dim_h': 11, 'dropout': 0.07386744313654117, 'batch_size': 389, 'n_epochs': 168}. Best is trial 56 with value: 0.6996727337879708.


Build model with 3 layers of attention
Epoch 1/267, Loss: 0.2803033074883164
0.5215948351079908
Epoch 11/267, Loss: 0.16745048321661402
0.5965536655077179
Epoch 21/267, Loss: 0.12463844653035773
0.6223888287508377
Epoch 31/267, Loss: 0.1023665472376542
0.6364551119695603
Epoch 41/267, Loss: 0.09401198427696697
0.6493005642027815
Epoch 51/267, Loss: 0.09092501318845593
0.6509097334979563
Epoch 61/267, Loss: 0.09261829980084153
0.6438649900417672
Epoch 71/267, Loss: 0.08776861710138008
0.6615634158102022
Epoch 81/267, Loss: 0.08619380156036283
0.6640847906695976
Epoch 91/267, Loss: 0.09261141961715261
0.6446478357453583
Epoch 101/267, Loss: 0.0875564486276908
0.6549432400972454
Epoch 111/267, Loss: 0.08578791896827885
0.6666155804883837
Epoch 121/267, Loss: 0.26166399800386586
0.0049955811667094785
Epoch 131/267, Loss: 0.25651906210868086
0.01849849326135007
Epoch 141/267, Loss: 0.25657215592313987
0.01752276021474488
Epoch 151/267, Loss: 0.25614506079525245
0.016861293208182696
Epoch 16

[I 2024-01-23 03:50:54,187] Trial 64 finished with value: 0.01627077450961061 and parameters: {'hidden_dim_h': 11, 'dropout': 0.07423760300658522, 'batch_size': 392, 'n_epochs': 267}. Best is trial 56 with value: 0.6996727337879708.


Build model with 3 layers of attention
Epoch 1/174, Loss: 2.6363013998254554
0.4951055018659454
Epoch 11/174, Loss: 0.6455285851057474
0.006663527871189092
Epoch 21/174, Loss: 0.27920520750733163
0.0008515104044707234
Epoch 31/174, Loss: 0.2597200048047227
0.0008515104044707227
Epoch 41/174, Loss: 0.2596384602320659
0.000851510404470724
Epoch 51/174, Loss: 0.25956202753178487
0.001379784866212053
Epoch 61/174, Loss: 0.2594649011825586
0.0013797814411749723
Epoch 71/174, Loss: 0.260014731582109
0.002214650562617551
Epoch 81/174, Loss: 0.25992075053902414
0.001894974402372858
Epoch 91/174, Loss: 0.26030864440775536
0.001077300489559043
Epoch 101/174, Loss: 0.26009520578694034
0.0007615707584943755
Epoch 111/174, Loss: 0.25999364314915296
0.0010879859990291392
Epoch 121/174, Loss: 0.2603094204292669
0.0008847452072565674
Epoch 131/174, Loss: 0.26009259266512735
0.0009696788490775691
Epoch 141/174, Loss: 0.2599125407732926
0.0009700710911784415
Epoch 151/174, Loss: 0.26004697169576374
0.00

[I 2024-01-23 03:52:21,737] Trial 65 finished with value: 0.0007003586869136341 and parameters: {'hidden_dim_h': 13, 'dropout': 0.08739703602265476, 'batch_size': 310, 'n_epochs': 174}. Best is trial 56 with value: 0.6996727337879708.


Build model with 3 layers of attention
Epoch 1/188, Loss: 0.5082313774093505
0.5039529733282753
Epoch 11/188, Loss: 0.1932446271661789
0.5994429873262632
Epoch 21/188, Loss: 0.16090645352678914
0.6103849321434791
Epoch 31/188, Loss: 0.13244799800938176
0.6280280048173806
Epoch 41/188, Loss: 0.11633151889808717
0.6136232404100785
Epoch 51/188, Loss: 0.10084627448551116
0.6363661321054942
Epoch 61/188, Loss: 0.09473272388981234
0.6479338504336993
Epoch 71/188, Loss: 0.09115974148434977
0.6503729363181723
Epoch 81/188, Loss: 0.09379219888679442
0.6445456593077874
Epoch 91/188, Loss: 0.08899978920817375
0.6554240633951258
Epoch 101/188, Loss: 0.08639648568726355
0.6618727553635392
Epoch 111/188, Loss: 0.08565247323243849
0.6683824611400948
Epoch 121/188, Loss: 0.26105309301807034
0.0032226962796597053
Epoch 131/188, Loss: 0.26070579429787977
0.0076980779670387
Epoch 141/188, Loss: 0.26656890300012404
0.006132739293610812
Epoch 151/188, Loss: 0.26429615193797695
0.0005175933420258668
Epoch 

[I 2024-01-23 03:53:48,490] Trial 66 finished with value: 0.0030734678686187883 and parameters: {'hidden_dim_h': 14, 'dropout': 0.33222508451744015, 'batch_size': 385, 'n_epochs': 188}. Best is trial 56 with value: 0.6996727337879708.


Build model with 3 layers of attention
Epoch 1/168, Loss: 1.0422185322023787
0.5074330096629995
Epoch 11/168, Loss: 0.36458280232717405
0.0023296583951265552
Epoch 21/168, Loss: 0.26364350909332057
0.012053987055650993
Epoch 31/168, Loss: 0.25608979587285025
0.005398766577653097
Epoch 41/168, Loss: 0.2595928803367435
0.00685550959686363
Epoch 51/168, Loss: 0.26449994967793516
0.005597764129189941
Epoch 61/168, Loss: 0.26340893641957697
0.0033990859241265774
Epoch 71/168, Loss: 0.261844591712052
0.0008345054778515396
Epoch 81/168, Loss: 0.2678555792795037
0.0002605800796473257
Epoch 91/168, Loss: 0.2667470402875037
0.0030362578264915277
Epoch 101/168, Loss: 0.26556274435430205
1.3265421078095724e-05
Epoch 111/168, Loss: 0.2643848668854192
1.0595447201175547e-05
Epoch 121/168, Loss: 0.26340377555703215
0.0007234308768210233
Epoch 131/168, Loss: 0.2625770259578273
0.001653549148161726
Epoch 141/168, Loss: 0.2647184206629699
0.0011023197671162829
Epoch 151/168, Loss: 0.26362207813083
9.319

[I 2024-01-23 03:55:32,831] Trial 67 finished with value: 0.0007827234773635499 and parameters: {'hidden_dim_h': 48, 'dropout': 0.062140751985818775, 'batch_size': 457, 'n_epochs': 168}. Best is trial 56 with value: 0.6996727337879708.


Build model with 3 layers of attention
Epoch 1/216, Loss: 0.17138764631840608
0.5596761673198094
Epoch 11/216, Loss: 0.10042659395424895
0.6252422260717471
Epoch 21/216, Loss: 0.09364775585573773
0.6343137570035556
Epoch 31/216, Loss: 0.08913594952156377
0.652869648643516
Epoch 41/216, Loss: 0.0866761205559091
0.6589167925003214
Epoch 51/216, Loss: 0.09121380908082621
0.6552623267104444
Epoch 61/216, Loss: 0.0879084018542785
0.662496942215928
Epoch 71/216, Loss: 0.08694448579073877
0.6615798679593465
Epoch 81/216, Loss: 0.08523316124035407
0.6703424611820276
Epoch 91/216, Loss: 0.08164235605865486
0.6717607857786332
Epoch 101/216, Loss: 0.08402843210239742
0.67268249789445
Epoch 111/216, Loss: 0.08420493011904318
0.6633861807630298
Epoch 121/216, Loss: 0.08139314872118854
0.6767418247818892
Epoch 131/216, Loss: 0.08105775049721547
0.6817446257312322
Epoch 141/216, Loss: 0.07826778318646342
0.6880132946026757
Epoch 151/216, Loss: 0.07784373154358347
0.6857987746805181
Epoch 161/216, Los

[I 2024-01-23 03:58:02,099] Trial 68 finished with value: 0.7065032353145217 and parameters: {'hidden_dim_h': 11, 'dropout': 0.07465451999985113, 'batch_size': 186, 'n_epochs': 216}. Best is trial 68 with value: 0.7065032353145217.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/214, Loss: 0.6015642337799072
0.5430473363587754
Epoch 11/214, Loss: 0.21406913220882415
0.6012606061913729
Epoch 21/214, Loss: 0.13249156856536864
0.6271330597196342
Epoch 31/214, Loss: 0.09957097285985947
0.6379997419457846
Epoch 41/214, Loss: 0.25442033660411834
0.009484524130232937
Epoch 51/214, Loss: 0.25055402863025666
0.01831627431593404
Epoch 61/214, Loss: 0.24000185239315033
0.01790399504215613
Epoch 71/214, Loss: 0.2540302778482437
0.009784270236327166
Epoch 81/214, Loss: 0.2513195734024048
0.0042118873355213755
Epoch 91/214, Loss: 0.2477721539735794
0.013226695047546071
Epoch 101/214, Loss: 0.2531439722776413
0.006938438133954941
Epoch 111/214, Loss: 0.2463823685646057
0.024173637718920175
Epoch 121/214, Loss: 0.2460023159980774
0.01645406394695593
Epoch 131/214, Loss: 0.26193045592308045
0.0005361938783989909
Epoch 141/214, Loss: 0.26096877217292785
0.0002369579359373705
Epoch 151/214, 

[I 2024-01-23 04:00:26,338] Trial 69 finished with value: 0.0021080286663230263 and parameters: {'hidden_dim_h': 11, 'dropout': 0.07414567074107337, 'batch_size': 192, 'n_epochs': 214}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/195, Loss: 0.6175840708944532
0.5323980824464358
Epoch 11/195, Loss: 0.1321934118039078
0.6201813294936482
Epoch 21/195, Loss: 0.11602861227260695
0.6367801973393168
Epoch 31/195, Loss: 0.10233141606052716
0.6471268399233766
Epoch 41/195, Loss: 0.09286326559053527
0.6588982413438018
Epoch 51/195, Loss: 0.08865714147686958
0.6700604580623882
Epoch 61/195, Loss: 0.08718907667530908
0.6702139333624654
Epoch 71/195, Loss: 0.08844374178184403
0.6598093135868217
Epoch 81/195, Loss: 0.08671891763806343
0.6682891385435873
Epoch 91/195, Loss: 0.08456833751665221
0.6771798018077363
Epoch 101/195, Loss: 0.0817659906215138
0.6832157542514362
Epoch 111/195, Loss: 0.0854995829363664
0.686000238873552
Epoch 121/195, Loss: 0.08015951009260283
0.6892686826119232
Epoch 131/195, Loss: 0.08112447141773171
0.6881630225929536
Epoch 141/195, Loss: 0.07799779093927807
0.6989721372645806
Epoch 151/195, Loss: 0.08299222058720059
0.6885773419622366
Epoch 161/195, Lo

[I 2024-01-23 04:02:11,203] Trial 70 finished with value: 0.0011997992877422628 and parameters: {'hidden_dim_h': 10, 'dropout': 0.05945831190024985, 'batch_size': 268, 'n_epochs': 195}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/205, Loss: 0.5763289556987043
0.5554649601110914
Epoch 11/205, Loss: 0.26118248269177863
0.011186758937875987
Epoch 21/205, Loss: 0.26119304761506507
0.0015440374950748085
Epoch 31/205, Loss: 0.26171455543110334
0.0005856506152231903
Epoch 41/205, Loss: 0.26238155364990234
0.0023538772900324897
Epoch 51/205, Loss: 0.26223798268947046
3.997216427033827e-05
Epoch 61/205, Loss: 0.26209765111190686
0.00043420074689668343
Epoch 71/205, Loss: 0.2618320549743763
0.0004242820207911894
Epoch 81/205, Loss: 0.2620146332875542
0.0008842398536132942
Epoch 91/205, Loss: 0.2615330674941989
0.0027939151005604477
Epoch 101/205, Loss: 0.2610714347034261
0.0008082679891270228
Epoch 111/205, Loss: 0.2622522297112838
0.0002789568212429133
Epoch 121/205, Loss: 0.2611491202876188
0.00496961898614649
Epoch 131/205, Loss: 0.2607076878564945
0.002843657659138966
Epoch 141/205, Loss: 0.26070221308348834
9.070233913296066e-05
Epoch 151/205, Loss: 0.26070008934407995


[I 2024-01-23 04:03:48,449] Trial 71 finished with value: 0.0005730448247928089 and parameters: {'hidden_dim_h': 14, 'dropout': 0.09755206890438775, 'batch_size': 348, 'n_epochs': 205}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/221, Loss: 3.2084929457807965
0.4833579040720777
Epoch 11/221, Loss: 0.4453631808272505
0.5645383003737545
Epoch 21/221, Loss: 0.16014877159511093
0.6349718189820331
Epoch 31/221, Loss: 0.13659205538245428
0.6433740128881479
Epoch 41/221, Loss: 0.11864803058911214
0.641842594934549
Epoch 51/221, Loss: 0.10050441397238621
0.6490801242620824
Epoch 61/221, Loss: 0.08915932865533154
0.6614260446534664
Epoch 71/221, Loss: 0.08407801046835638
0.6798680689924137
Epoch 81/221, Loss: 0.08682470247808811
0.6728733414252711
Epoch 91/221, Loss: 0.08225452952680334
0.6825969357031736
Epoch 101/221, Loss: 0.08010327146775954
0.6846290199479625
Epoch 111/221, Loss: 0.07963826504033224
0.6869341760358568
Epoch 121/221, Loss: 0.24811109860913944
0.016016704289722065
Epoch 131/221, Loss: 0.24825644981017156
0.017433754152718917
Epoch 141/221, Loss: 0.2440715490189274
0.018254933088422232
Epoch 151/221, Loss: 0.24354148913273768
0.021157242848952675
Epoch 16

[I 2024-01-23 04:06:05,321] Trial 72 finished with value: 0.0017585487038571323 and parameters: {'hidden_dim_h': 12, 'dropout': 0.08541445220204719, 'batch_size': 212, 'n_epochs': 221}. Best is trial 68 with value: 0.7065032353145217.


Epoch 221/221, Loss: 0.2623999168915031
0.0017585487038571323
Build model with 3 layers of attention
Epoch 1/184, Loss: 0.2056288915387106
0.5607399077137285
Epoch 11/184, Loss: 0.099560263638313
0.6264990855425605
Epoch 21/184, Loss: 0.2686838448991603
0.004134796500657847
Epoch 31/184, Loss: 0.25444761397342336
0.001610390377813562
Epoch 41/184, Loss: 0.24848392909198866
0.007191914693125161
Epoch 51/184, Loss: 0.2487780540777008
0.011640428890340976
Epoch 61/184, Loss: 0.26771059902005606
0.0013679325242336845
Epoch 71/184, Loss: 0.25717855686515706
0.0008461251220827056
Epoch 81/184, Loss: 0.2574181863490273
0.0015187198538610485
Epoch 91/184, Loss: 0.254497860688969
2.3579558913166687e-05
Epoch 101/184, Loss: 0.25890205362263846
0.001174158612406031
Epoch 111/184, Loss: 0.25994759642969967
0.001164806027198318
Epoch 121/184, Loss: 0.2579695312011296
0.0011240331289870593
Epoch 131/184, Loss: 0.2567399283744631
0.0004751630817318433
Epoch 141/184, Loss: 0.2558305579073289
0.0022939

[I 2024-01-23 04:09:26,218] Trial 73 finished with value: 0.0009172818854148788 and parameters: {'hidden_dim_h': 13, 'dropout': 0.06763137823080248, 'batch_size': 108, 'n_epochs': 184}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/161, Loss: 0.7939270604879428
0.5475209932719163
Epoch 11/161, Loss: 0.1261870868695088
0.6257164425523387
Epoch 21/161, Loss: 0.11297202912660745
0.617890016730071
Epoch 31/161, Loss: 0.10093958226916118
0.6448353772624732
Epoch 41/161, Loss: 0.09417360815673302
0.6445495107883379
Epoch 51/161, Loss: 0.0913862916521537
0.6559883375621124
Epoch 61/161, Loss: 0.08652768857203998
0.6559255307821111
Epoch 71/161, Loss: 0.08620601701430786
0.6641198761832926
Epoch 81/161, Loss: 0.08299786721666653
0.6750972026861006
Epoch 91/161, Loss: 0.1435578690889554
0.000449807404692657
Epoch 101/161, Loss: 0.25302876149996734
0.02397464114054256
Epoch 111/161, Loss: 0.26590499663964295
0.004141534771016856
Epoch 121/161, Loss: 0.2604816737465369
0.006451414039802599
Epoch 131/161, Loss: 0.25859991155373746
0.01118249475408664
Epoch 141/161, Loss: 0.25707118939130735
0.00934179144958594
Epoch 151/161, Loss: 0.2588509936363269
0.0037756616684101823


[I 2024-01-23 04:10:48,495] Trial 74 finished with value: 0.00965657630897417 and parameters: {'hidden_dim_h': 15, 'dropout': 0.05775989999406952, 'batch_size': 309, 'n_epochs': 161}. Best is trial 68 with value: 0.7065032353145217.


Epoch 161/161, Loss: 0.2566951761643092
0.00965657630897417
Build model with 3 layers of attention
Epoch 1/191, Loss: 0.6763856335119768
0.5482435814690599
Epoch 11/191, Loss: 0.1583045496182008
0.6278170298297969
Epoch 21/191, Loss: 0.2612221159718253
0.0011650493125958978
Epoch 31/191, Loss: 0.26398029191927475
0.002930156410537468
Epoch 41/191, Loss: 0.2590629434043711
0.0038114409629048445
Epoch 51/191, Loss: 0.25886607549407264
0.0029335454288613783
Epoch 61/191, Loss: 0.25849664075808093
0.005492646685068548
Epoch 71/191, Loss: 0.2566841168837114
0.009033860439623294
Epoch 81/191, Loss: 0.25413740466941487
0.008784788036033149
Epoch 91/191, Loss: 0.25539868094704365
0.003446198764068478
Epoch 101/191, Loss: 0.2540331843224439
0.0034042869077102076
Epoch 111/191, Loss: 0.2611776208335703
0.00040372527735769347
Epoch 121/191, Loss: 0.26105692278255116
0.0010703135636776636
Epoch 131/191, Loss: 0.2601449562744661
0.0026828351082197515
Epoch 141/191, Loss: 0.2584505281665108
0.002821

[I 2024-01-23 04:12:11,331] Trial 75 finished with value: 0.0015583241418454217 and parameters: {'hidden_dim_h': 11, 'dropout': 0.07813115480982269, 'batch_size': 437, 'n_epochs': 191}. Best is trial 68 with value: 0.7065032353145217.


Epoch 191/191, Loss: 0.25986840481107887
0.0015583241418454217
Build model with 3 layers of attention
Epoch 1/208, Loss: 0.29708009492605925
0.547181389976574
Epoch 11/208, Loss: 0.16462489101104438
0.5939622750423368
Epoch 21/208, Loss: 0.12998064688872546
0.6077782842031204
Epoch 31/208, Loss: 0.10427451028954238
0.626238346721921
Epoch 41/208, Loss: 0.09517463145311922
0.6443510459570834
Epoch 51/208, Loss: 0.09055700956378132
0.6559126336769503
Epoch 61/208, Loss: 0.09011533600278199
0.664670424946439
Epoch 71/208, Loss: 0.08841806289274246
0.6633435055444757
Epoch 81/208, Loss: 0.08616252930369228
0.6647936086603592
Epoch 91/208, Loss: 0.08482216438278556
0.6766530498671957
Epoch 101/208, Loss: 0.09154203243087977
0.6515215089504292
Epoch 111/208, Loss: 0.0858895406126976
0.6717108531248301
Epoch 121/208, Loss: 0.08675219456199557
0.6698179987089591
Epoch 131/208, Loss: 0.0827125406358391
0.6850940990023883
Epoch 141/208, Loss: 0.08591767225880176
0.6682816944343496
Epoch 151/208,

[I 2024-01-23 04:13:44,391] Trial 76 finished with value: 0.6904168232862807 and parameters: {'hidden_dim_h': 10, 'dropout': 0.1035868923886036, 'batch_size': 376, 'n_epochs': 208}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/207, Loss: 0.1974183665588498
0.5534788723372359
Epoch 11/207, Loss: 0.13236589170992374
0.6143713435550795
Epoch 21/207, Loss: 0.10440150613430887
0.6456822728361044
Epoch 31/207, Loss: 0.09304332290776074
0.6513943276367207
Epoch 41/207, Loss: 0.09100741404108703
0.6459032518979317
Epoch 51/207, Loss: 0.08625627297442406
0.6652844422607083
Epoch 61/207, Loss: 0.081105284858495
0.6852938045675365
Epoch 71/207, Loss: 0.08528770494740456
0.6697337605346303
Epoch 81/207, Loss: 0.08059653698001057
0.6861067771753981
Epoch 91/207, Loss: 0.08059822826180607
0.6878165029651251
Epoch 101/207, Loss: 0.07681483612395823
0.6985050073997832
Epoch 111/207, Loss: 0.08773761545307934
0.6029529107034196
Epoch 121/207, Loss: 0.07680522400187328
0.6972512351361624
Epoch 131/207, Loss: 0.08153397450223565
0.6805654303217408
Epoch 141/207, Loss: 0.07404724502703175
0.6953932401634407
Epoch 151/207, Loss: 0.07141475233947858
0.7105268609575666
Epoch 161/207, 

[I 2024-01-23 04:15:20,156] Trial 77 finished with value: 0.010374573764138062 and parameters: {'hidden_dim_h': 16, 'dropout': 0.06860834770146963, 'batch_size': 374, 'n_epochs': 207}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/224, Loss: 2.5992294403544642
0.4629334592018907
Epoch 11/224, Loss: 0.8839761468402126
0.5557154222243686
Epoch 21/224, Loss: 0.363164988003279
0.588904159110045
Epoch 31/224, Loss: 0.2590791504634054
0.601353544445832
Epoch 41/224, Loss: 0.23008713988881363
0.6151554787335067
Epoch 51/224, Loss: 0.2036944625147602
0.6294023681354421
Epoch 61/224, Loss: 0.17706807820420517
0.6279457781056271
Epoch 71/224, Loss: 0.14983026395764268
0.6406604937554145
Epoch 81/224, Loss: 0.1269234170516332
0.6551944584627751
Epoch 91/224, Loss: 0.10867759062532793
0.6590581827963862
Epoch 101/224, Loss: 0.09507303711092263
0.6718249472537097
Epoch 111/224, Loss: 0.09155891404340141
0.654472864119598
Epoch 121/224, Loss: 0.08715162177880605
0.6660456993454094
Epoch 131/224, Loss: 0.08342532251487698
0.6674398871584969
Epoch 141/224, Loss: 0.08437147239844005
0.6742473249446154
Epoch 151/224, Loss: 0.08152120651905997
0.6792084006459882
Epoch 161/224, Loss: 0

[I 2024-01-23 04:16:56,205] Trial 78 finished with value: 0.6927751225020813 and parameters: {'hidden_dim_h': 10, 'dropout': 0.1008087881626135, 'batch_size': 422, 'n_epochs': 224}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/220, Loss: 0.3138699449744879
0.5405393478827376
Epoch 11/220, Loss: 0.14563630316771714
0.6067527364342042
Epoch 21/220, Loss: 0.12857902386024886
0.6212373823651463
Epoch 31/220, Loss: 0.11399503870337617
0.6296431492344019
Epoch 41/220, Loss: 0.10175591984800264
0.6444355515876028
Epoch 51/220, Loss: 0.09671354995054357
0.6418780141843394
Epoch 61/220, Loss: 0.09099766933450512
0.6526848963160563
Epoch 71/220, Loss: 0.08865088268237956
0.6599026688507984
Epoch 81/220, Loss: 0.08833271776344262
0.6234043126946415
Epoch 91/220, Loss: 0.08732569013156143
0.6668276808898246
Epoch 101/220, Loss: 0.08495458346955917
0.6737200795800293
Epoch 111/220, Loss: 0.08798780759759978
0.6571904863102651
Epoch 121/220, Loss: 0.08449635596251955
0.6776471550152179
Epoch 131/220, Loss: 0.08861148956359602
0.6570383282980803
Epoch 141/220, Loss: 0.08782128536818075
0.6611037170647213
Epoch 151/220, Loss: 0.08498361283073239
0.6718732065892419
Epoch 161/220

[I 2024-01-23 04:18:29,635] Trial 79 finished with value: 0.6930662408312099 and parameters: {'hidden_dim_h': 10, 'dropout': 0.10404821702342718, 'batch_size': 468, 'n_epochs': 220}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/262, Loss: 0.4644539083572144
0.5359859224327328
Epoch 11/262, Loss: 0.2194733771872013
0.5891779230186268
Epoch 21/262, Loss: 0.18343358439333896
0.606531033331619
Epoch 31/262, Loss: 0.15048697146963566
0.6320505522229503
Epoch 41/262, Loss: 0.1268677445168191
0.6253551231875225
Epoch 51/262, Loss: 0.10989097775296962
0.6396128805023683
Epoch 61/262, Loss: 0.09804629566187555
0.65142291257078
Epoch 71/262, Loss: 0.09137908020552168
0.6533253138446634
Epoch 81/262, Loss: 0.08726655771123602
0.6644715483129906
Epoch 91/262, Loss: 0.0881652591076303
0.6562351981280454
Epoch 101/262, Loss: 0.08669104617326817
0.6648323548033105
Epoch 111/262, Loss: 0.08509002562533034
0.6660468803379054
Epoch 121/262, Loss: 0.08320169119124717
0.6692869779705822
Epoch 131/262, Loss: 0.0850520172017686
0.6693606547499855
Epoch 141/262, Loss: 0.08544630446332566
0.6720400380663312
Epoch 151/262, Loss: 0.08276614673594211
0.6769804188419823
Epoch 161/262, Loss:

[I 2024-01-23 04:20:18,385] Trial 80 finished with value: 0.011979777577901654 and parameters: {'hidden_dim_h': 10, 'dropout': 0.12126737270424215, 'batch_size': 516, 'n_epochs': 262}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/231, Loss: 1.762921359539032
0.44892845305524176
Epoch 11/231, Loss: 0.6456407821178436
0.56641875454604
Epoch 21/231, Loss: 0.2551908469200134
0.6044839430007034
Epoch 31/231, Loss: 0.18457609444856643
0.616574475644413
Epoch 41/231, Loss: 0.16784190863370896
0.6230869828834791
Epoch 51/231, Loss: 0.1536608201265335
0.6280105866630249
Epoch 61/231, Loss: 0.1424648779630661
0.6157877430132279
Epoch 71/231, Loss: 0.1278691789507866
0.6275876358161177
Epoch 81/231, Loss: 0.11496676340699195
0.6338325973809568
Epoch 91/231, Loss: 0.2635959973931313
0.002558675216839859
Epoch 101/231, Loss: 0.2617592614889145
0.002523139437951648
Epoch 111/231, Loss: 0.26134826600551603
0.002921662031475443
Epoch 121/231, Loss: 0.260100856423378
0.006352871173632217
Epoch 131/231, Loss: 0.2596176940202713
0.006200333263227523
Epoch 141/231, Loss: 0.25955513715744016
0.006792736902259499
Epoch 151/231, Loss: 0.25852458119392396
0.009276826415627016
Epoch 161/23

[I 2024-01-23 04:21:55,131] Trial 81 finished with value: 0.012863666519005359 and parameters: {'hidden_dim_h': 10, 'dropout': 0.09925646460710089, 'batch_size': 482, 'n_epochs': 231}. Best is trial 68 with value: 0.7065032353145217.


Epoch 231/231, Loss: 0.2544063413143158
0.012863666519005359
Build model with 3 layers of attention
Epoch 1/220, Loss: 0.23033522536237555
0.5520241433767885
Epoch 11/220, Loss: 0.14694378731480565
0.6097707242462999
Epoch 21/220, Loss: 0.11021435512117593
0.6302266438997443
Epoch 31/220, Loss: 0.1991773296192468
0.001436721313411743
Epoch 41/220, Loss: 0.2405258100255426
0.023922120096790372
Epoch 51/220, Loss: 0.24238854341478233
0.03382823324439091
Epoch 61/220, Loss: 0.2382959263870515
0.032765353494984335
Epoch 71/220, Loss: 0.23233077863612808
0.0321378652872547
Epoch 81/220, Loss: 0.25501859044454184
0.012268114415521354
Epoch 91/220, Loss: 0.2542931015592024
0.020621136566735677
Epoch 101/220, Loss: 0.24492101128920016
0.018431738593438933
Epoch 111/220, Loss: 0.24586267935403858
0.018752338964498433
Epoch 121/220, Loss: 0.24830863862691155
0.009807499472220526
Epoch 131/220, Loss: 0.25413147512688694
0.012459083242719441
Epoch 141/220, Loss: 0.2540617800620665
0.01697925855936

[I 2024-01-23 04:23:49,385] Trial 82 finished with value: 0.008859717274954 and parameters: {'hidden_dim_h': 11, 'dropout': 0.08388102358931825, 'batch_size': 291, 'n_epochs': 220}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/218, Loss: 0.3421544615012496
0.552392304960476
Epoch 11/218, Loss: 0.18766936771015622
0.5904019978581021
Epoch 21/218, Loss: 0.1370232099694992
0.6321633453898036
Epoch 31/218, Loss: 0.10852505533552881
0.6450906669096195
Epoch 41/218, Loss: 0.09406732556535237
0.6569727761300105
Epoch 51/218, Loss: 0.0884725363619292
0.663727033577599
Epoch 61/218, Loss: 0.08554355777911286
0.6636350893951853
Epoch 71/218, Loss: 0.08559024678682213
0.6614561954010307
Epoch 81/218, Loss: 0.08352592509629121
0.6678790489192704
Epoch 91/218, Loss: 0.08426162689479429
0.6705473224130417
Epoch 101/218, Loss: 0.08053896453843187
0.6798468083657566
Epoch 111/218, Loss: 0.07985689702318675
0.682598167272597
Epoch 121/218, Loss: 0.07972988836578469
0.670135658784549
Epoch 131/218, Loss: 0.0820493849355783
0.6760114860078917
Epoch 141/218, Loss: 0.07970446565035563
0.6767074714031907
Epoch 151/218, Loss: 0.07894142689322357
0.63714394700019
Epoch 161/218, Loss: 0

[I 2024-01-23 04:25:32,419] Trial 83 finished with value: 0.004024340038479231 and parameters: {'hidden_dim_h': 14, 'dropout': 0.05700283420885674, 'batch_size': 360, 'n_epochs': 218}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/247, Loss: 0.31032882604682654
0.5495658141128417
Epoch 11/247, Loss: 0.19552016571948402
0.5935324385376595
Epoch 21/247, Loss: 0.13663614502078608
0.6308222626182886
Epoch 31/247, Loss: 0.10733386050713689
0.63359477654708
Epoch 41/247, Loss: 0.09399027259726274
0.6593969055596317
Epoch 51/247, Loss: 0.08862689869445667
0.6620219365438375
Epoch 61/247, Loss: 0.08382147249945424
0.6776370570359588
Epoch 71/247, Loss: 0.08008869449820435
0.6844352912095021
Epoch 81/247, Loss: 0.07925456971452947
0.6795401330567196
Epoch 91/247, Loss: 0.08104140233052404
0.6844256374385345
Epoch 101/247, Loss: 0.07804598031859648
0.6929853275383614
Epoch 111/247, Loss: 0.07554374877036664
0.6997907488631199
Epoch 121/247, Loss: 0.0807341101922487
0.6842385153371857
Epoch 131/247, Loss: 0.07958548280753587
0.6859482980459962
Epoch 141/247, Loss: 0.08502603675189771
0.6728688802819442
Epoch 151/247, Loss: 0.07618839019223263
0.6970729249910711
Epoch 161/247, 

[I 2024-01-23 04:27:21,504] Trial 84 finished with value: 0.6516121672705848 and parameters: {'hidden_dim_h': 13, 'dropout': 0.10355235821901296, 'batch_size': 419, 'n_epochs': 247}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/235, Loss: 0.2212184750147768
0.5579326421854848
Epoch 11/235, Loss: 0.12745984452399048
0.6140743821452613
Epoch 21/235, Loss: 0.10076422828274804
0.6369199299780824
Epoch 31/235, Loss: 0.09493287501705659
0.6399035318372622
Epoch 41/235, Loss: 0.09386891197111155
0.6477953753964725
Epoch 51/235, Loss: 0.09052683735215986
0.6580320111661339
Epoch 61/235, Loss: 0.08833930047379958
0.6651506563018883
Epoch 71/235, Loss: 0.08929660050450144
0.6651334192539974
Epoch 81/235, Loss: 0.09248161174961038
0.6565776343350467
Epoch 91/235, Loss: 0.08646393879442602
0.6709765754610175
Epoch 101/235, Loss: 0.0926703980846985
0.6589122116856398
Epoch 111/235, Loss: 0.08538744356986638
0.6769544842818533
Epoch 121/235, Loss: 0.08283307393257683
0.684241872619286
Epoch 131/235, Loss: 0.0858623916031541
0.6759998313929398
Epoch 141/235, Loss: 0.08236052126095102
0.6853670870563278
Epoch 151/235, Loss: 0.08063903576820283
0.6875885052329216
Epoch 161/235, L

[I 2024-01-23 04:29:17,379] Trial 85 finished with value: 0.688176175848461 and parameters: {'hidden_dim_h': 11, 'dropout': 0.2862450256941816, 'batch_size': 326, 'n_epochs': 235}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/237, Loss: 0.31065134806375877
0.5531854632223132
Epoch 11/237, Loss: 0.15510724813622587
0.6142553687143705
Epoch 21/237, Loss: 0.11546445148540478
0.6192076320961682
Epoch 31/237, Loss: 0.10013280114999004
0.6332499945237048
Epoch 41/237, Loss: 0.09573878209088363
0.6426400266934615
Epoch 51/237, Loss: 0.0976580852533088
0.6373603470288999
Epoch 61/237, Loss: 0.09311992098011222
0.6511748876450121
Epoch 71/237, Loss: 0.09138940654548944
0.6511954190229552
Epoch 81/237, Loss: 0.08942172522930537
0.6631986042414648
Epoch 91/237, Loss: 0.08766136501057475
0.670793587337919
Epoch 101/237, Loss: 0.27998178116246764
0.000271466474810728
Epoch 111/237, Loss: 0.26863547575240043
0.00019021120201189286
Epoch 121/237, Loss: 0.26740073485701693
0.00010701626661294873
Epoch 131/237, Loss: 0.2628773605706645
0.0008811283828258417
Epoch 141/237, Loss: 0.2622729534027623
0.00034669096743351614
Epoch 151/237, Loss: 0.26262226116423515
0.0015503608331213

[I 2024-01-23 04:31:36,929] Trial 86 finished with value: 0.0006058212488937875 and parameters: {'hidden_dim_h': 15, 'dropout': 0.27020309109094504, 'batch_size': 234, 'n_epochs': 237}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/229, Loss: 0.3290879257807606
0.5044171640015968
Epoch 11/229, Loss: 0.17165418498610197
0.5850441935660584
Epoch 21/229, Loss: 0.12845020907882013
0.6091148717751196
Epoch 31/229, Loss: 0.10599843128339241
0.6230120675875154
Epoch 41/229, Loss: 0.10039313147334676
0.6224144929898977
Epoch 51/229, Loss: 0.09495062567293644
0.6375995897169633
Epoch 61/229, Loss: 0.09348497833860547
0.643650513662259
Epoch 71/229, Loss: 0.09129879133481729
0.6517968159033652
Epoch 81/229, Loss: 0.08931977939056723
0.6579167735643766
Epoch 91/229, Loss: 0.24432725596584773
0.0004889816101569816
Epoch 101/229, Loss: 0.2647529634598054
0.005186395438728844
Epoch 111/229, Loss: 0.2705991411287534
0.0029191698136129996
Epoch 121/229, Loss: 0.2625968203340706
0.013396352663388742
Epoch 131/229, Loss: 0.25799186900258064
0.019412046471468734
Epoch 141/229, Loss: 0.25610009835738884
0.020813411137061085
Epoch 151/229, Loss: 0.2549219437335667
0.019510555201129848
Ep

[I 2024-01-23 04:33:29,451] Trial 87 finished with value: 0.0022171789042398408 and parameters: {'hidden_dim_h': 10, 'dropout': 0.3252670323519042, 'batch_size': 314, 'n_epochs': 229}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/257, Loss: 0.8600111455782086
0.5427179797507059
Epoch 11/257, Loss: 0.10841022220486445
0.6253713608791835
Epoch 21/257, Loss: 0.10054912502672655
0.63470674282977
Epoch 31/257, Loss: 0.09562762071054878
0.6377720347569384
Epoch 41/257, Loss: 0.09181692297999741
0.6529495604502242
Epoch 51/257, Loss: 0.09099631413077632
0.6637923749277246
Epoch 61/257, Loss: 0.09248493358175805
0.661194960725487
Epoch 71/257, Loss: 0.090194619135231
0.665832821110633
Epoch 81/257, Loss: 0.08579222659798379
0.6797437140313285
Epoch 91/257, Loss: 0.08495901672975391
0.6794338510463493
Epoch 101/257, Loss: 0.08943798858011869
0.6698896569605002
Epoch 111/257, Loss: 0.0828398475885814
0.6892957839060324
Epoch 121/257, Loss: 0.08614565479628583
0.6772104156588552
Epoch 131/257, Loss: 0.08066421467150357
0.6695146240921996
Epoch 141/257, Loss: 0.0863993986384243
0.6779721939335259
Epoch 151/257, Loss: 0.08069798086129182
0.6887235673042738
Epoch 161/257, Loss: 

[I 2024-01-23 04:36:40,380] Trial 88 finished with value: 0.0007751100836269108 and parameters: {'hidden_dim_h': 13, 'dropout': 0.22135415129474562, 'batch_size': 170, 'n_epochs': 257}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/241, Loss: 0.18224272905634
0.5410659178130581
Epoch 11/241, Loss: 0.12551221781625196
0.6082743024079205
Epoch 21/241, Loss: 0.1043459209971703
0.6301139210133597
Epoch 31/241, Loss: 0.09505050872954038
0.6477681083335219
Epoch 41/241, Loss: 0.09062945240965256
0.6572706213253502
Epoch 51/241, Loss: 0.09052297859810866
0.6506240501303447
Epoch 61/241, Loss: 0.08905138304600349
0.6556308259955553
Epoch 71/241, Loss: 0.0875274259597063
0.6582855581841847
Epoch 81/241, Loss: 0.08588410570071293
0.6684422218549245
Epoch 91/241, Loss: 0.08578725345432758
0.665812375512709
Epoch 101/241, Loss: 0.08475344771376023
0.6673325404190363
Epoch 111/241, Loss: 0.2617001570761204
0.011327096180105318
Epoch 121/241, Loss: 0.25560063181015164
0.010540570280927693
Epoch 131/241, Loss: 0.25495291587251884
0.013967723641803778
Epoch 141/241, Loss: 0.25344113203195423
0.02190681689115457
Epoch 151/241, Loss: 0.2522219903767109
0.013896874748796783
Epoch 161/2

[I 2024-01-23 04:38:25,831] Trial 89 finished with value: 0.00517490690045801 and parameters: {'hidden_dim_h': 12, 'dropout': 0.1359064570359016, 'batch_size': 463, 'n_epochs': 241}. Best is trial 68 with value: 0.7065032353145217.


Epoch 241/241, Loss: 0.2635790737202534
0.00517490690045801
Build model with 3 layers of attention
Epoch 1/208, Loss: 0.23649921855505776
0.5431975357270055
Epoch 11/208, Loss: 0.13058186103315914
0.603756123299572
Epoch 21/208, Loss: 0.10394439039861454
0.6301277320297947
Epoch 31/208, Loss: 0.09521637374863905
0.6450816481100397
Epoch 41/208, Loss: 0.09288961151066949
0.638038246262542
Epoch 51/208, Loss: 0.09090481304070529
0.6472778907349169
Epoch 61/208, Loss: 0.09251892102115294
0.6487879602682597
Epoch 71/208, Loss: 0.08900665859965717
0.6582972763765521
Epoch 81/208, Loss: 0.08856805238653631
0.6605246124411683
Epoch 91/208, Loss: 0.08938022317255244
0.6599167378235984
Epoch 101/208, Loss: 0.08767775700372808
0.6568639843221602
Epoch 111/208, Loss: 0.08658012931837755
0.6652823625117017
Epoch 121/208, Loss: 0.0842813674141379
0.6728846456699411
Epoch 131/208, Loss: 0.08705215611878564
0.665218238964965
Epoch 141/208, Loss: 0.08353398915599375
0.6720980490671817
Epoch 151/208, L

[I 2024-01-23 04:40:14,549] Trial 90 finished with value: 0.6850595343431699 and parameters: {'hidden_dim_h': 10, 'dropout': 0.19617762377663409, 'batch_size': 282, 'n_epochs': 208}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/224, Loss: 0.2621105240808951
0.5363489831027645
Epoch 11/224, Loss: 0.14968748491358114
0.6148174511043625
Epoch 21/224, Loss: 0.1089526821632643
0.6318910006007453
Epoch 31/224, Loss: 0.09638556676941949
0.641437181274995
Epoch 41/224, Loss: 0.09189303148839925
0.6536803678739529
Epoch 51/224, Loss: 0.09051617275218705
0.6515875441105762
Epoch 61/224, Loss: 0.0898669267425666
0.6593953597142551
Epoch 71/224, Loss: 0.2591429644742528
0.012367197677525721
Epoch 81/224, Loss: 0.2534163819776999
0.008163576985134367
Epoch 91/224, Loss: 0.2596974807816583
0.006305282494132125
Epoch 101/224, Loss: 0.25905531544137644
0.006700047401295331
Epoch 111/224, Loss: 0.2579688554680025
0.012953618056215999
Epoch 121/224, Loss: 0.2603266178756147
0.009233160786021752
Epoch 131/224, Loss: 0.2585361396541467
0.011152620463262704
Epoch 141/224, Loss: 0.2583899602696702
0.009153185005161999
Epoch 151/224, Loss: 0.2589655516518129
0.01258379546210983
Epoch 1

[I 2024-01-23 04:42:03,774] Trial 91 finished with value: 0.008742862214017079 and parameters: {'hidden_dim_h': 11, 'dropout': 0.2603090496825732, 'batch_size': 324, 'n_epochs': 224}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/178, Loss: 1.6404503868586982
0.49525306357329263
Epoch 11/178, Loss: 0.432258602398545
0.5682874669794616
Epoch 21/178, Loss: 0.2697981398052244
0.5867939095057418
Epoch 31/178, Loss: 0.23229376927240572
0.5941126032172732
Epoch 41/178, Loss: 0.19498695277456027
0.6099100595635019
Epoch 51/178, Loss: 0.16155125956926772
0.6047424693496221
Epoch 61/178, Loss: 0.13157043984132027
0.6185865992612614
Epoch 71/178, Loss: 0.11202724489258296
0.6322026210025466
Epoch 81/178, Loss: 0.09942151383677525
0.6403597210766351
Epoch 91/178, Loss: 0.09256178262963224
0.6520818233035165
Epoch 101/178, Loss: 0.09316769397970456
0.6547146224394024
Epoch 111/178, Loss: 0.08874992403521467
0.6635803069510919
Epoch 121/178, Loss: 0.08602213370266246
0.6687951867245815
Epoch 131/178, Loss: 0.08572944442727673
0.6711931158269169
Epoch 141/178, Loss: 0.08351946566531908
0.6791621193634381
Epoch 151/178, Loss: 0.08131766541680294
0.6823256149655708
Epoch 161/178, 

[I 2024-01-23 04:43:27,209] Trial 92 finished with value: 0.6841252249294626 and parameters: {'hidden_dim_h': 11, 'dropout': 0.30024318170163666, 'batch_size': 360, 'n_epochs': 178}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/203, Loss: 0.7694941738195586
0.4979284313432051
Epoch 11/203, Loss: 0.28837553973783525
0.6062381123782228
Epoch 21/203, Loss: 0.23467997579198135
0.6102044932281461
Epoch 31/203, Loss: 0.189999461696859
0.6326549563778906
Epoch 41/203, Loss: 0.15403191536142116
0.6227504687290498
Epoch 51/203, Loss: 0.12768364148704628
0.6190366541443033
Epoch 61/203, Loss: 0.10611223926146825
0.6495965913582836
Epoch 71/203, Loss: 0.09247543153009917
0.6599729884233693
Epoch 81/203, Loss: 0.0908929672941827
0.6566774028816017
Epoch 91/203, Loss: 0.08691236285264031
0.6663834908648096
Epoch 101/203, Loss: 0.08506666123867035
0.6498200219360148
Epoch 111/203, Loss: 0.08385631468212396
0.6713865245280239
Epoch 121/203, Loss: 0.0784970331087447
0.6862975174052341
Epoch 131/203, Loss: 0.08730848460343846
0.6590221893004293
Epoch 141/203, Loss: 0.08218521598661155
0.6766437004283886
Epoch 151/203, Loss: 0.0780498180211636
0.6840347331992047
Epoch 161/203, Los

[I 2024-01-23 04:44:56,694] Trial 93 finished with value: 0.6871353279806861 and parameters: {'hidden_dim_h': 14, 'dropout': 0.06485634029844575, 'batch_size': 425, 'n_epochs': 203}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/200, Loss: 0.1789567462567772
0.5371626774244805
Epoch 11/200, Loss: 0.11284899924482618
0.6354085295272931
Epoch 21/200, Loss: 0.09601010289043188
0.6434389745147946
Epoch 31/200, Loss: 0.09216429639075484
0.6415081538584743
Epoch 41/200, Loss: 0.08993096037634782
0.6482525563070638
Epoch 51/200, Loss: 0.09049918130040169
0.6567952012273249
Epoch 61/200, Loss: 0.08566581817077738
0.6762891522693684
Epoch 71/200, Loss: 0.08713144582829305
0.6605416614433557
Epoch 81/200, Loss: 0.08398524219436306
0.6714999319125501
Epoch 91/200, Loss: 0.08851751618619476
0.6554762310863037
Epoch 101/200, Loss: 0.08476944932980197
0.6718480469266035
Epoch 111/200, Loss: 0.08347044525934118
0.6680233852836607
Epoch 121/200, Loss: 0.08073504189295429
0.6808743840079916
Epoch 131/200, Loss: 0.2669529704643147
0.003073431327499103
Epoch 141/200, Loss: 0.26542070587830885
0.004261617520437316
Epoch 151/200, Loss: 0.2618873632912125
0.004835665363094612
Epoch 161

[I 2024-01-23 04:46:24,258] Trial 94 finished with value: 0.003814036637972362 and parameters: {'hidden_dim_h': 14, 'dropout': 0.09204888487488055, 'batch_size': 431, 'n_epochs': 200}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/212, Loss: 0.8668273585907956
0.5520842704466246
Epoch 11/212, Loss: 0.3408960833194408
0.0017453257260699902
Epoch 21/212, Loss: 0.26281999146684687
0.006969352992355696
Epoch 31/212, Loss: 0.25897877520703255
0.015482279479658724
Epoch 41/212, Loss: 0.2626954513661405
0.000840055627551233
Epoch 51/212, Loss: 0.26132707963598534
0.0018231835947481977
Epoch 61/212, Loss: 0.26101946894158706
0.002037047682253201
Epoch 71/212, Loss: 0.26114150120856916
0.006138154272616169
Epoch 81/212, Loss: 0.2610296534096941
0.003172762493437455
Epoch 91/212, Loss: 0.26122786516838886
0.0032356217470466634
Epoch 101/212, Loss: 0.2606298051615979
0.002774584617571884
Epoch 111/212, Loss: 0.2608331800141233
0.004964190708445558
Epoch 121/212, Loss: 0.2609187602362734
0.0027039356524397092
Epoch 131/212, Loss: 0.2605502446915241
0.006030704378461088
Epoch 141/212, Loss: 0.2607678685416567
0.002192635623711222
Epoch 151/212, Loss: 0.26100677885907764
0.003304

[I 2024-01-23 04:47:54,661] Trial 95 finished with value: 3.4966443663818275e-06 and parameters: {'hidden_dim_h': 16, 'dropout': 0.06439473021241973, 'batch_size': 512, 'n_epochs': 212}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/253, Loss: 0.19561924895754568
0.5210455665913781
Epoch 11/253, Loss: 0.12176370468956453
0.6075695676978368
Epoch 21/253, Loss: 0.10666784533747921
0.6199818797624966
Epoch 31/253, Loss: 0.0946350134909153
0.6401260158280938
Epoch 41/253, Loss: 0.09209594516842454
0.6328189403597746
Epoch 51/253, Loss: 0.09098746337824398
0.6466627123258051
Epoch 61/253, Loss: 0.08794143823561845
0.6593464790835403
Epoch 71/253, Loss: 0.08872347890778824
0.6517088436252019
Epoch 81/253, Loss: 0.08849460562622105
0.659327913009278
Epoch 91/253, Loss: 0.08593597097529306
0.6637886870790447
Epoch 101/253, Loss: 0.08947611324213169
0.6446091525021613
Epoch 111/253, Loss: 0.08538241008365596
0.6716275606149738
Epoch 121/253, Loss: 0.08254403496781985
0.6757958610435466
Epoch 131/253, Loss: 0.08556395672537663
0.6713447951902354
Epoch 141/253, Loss: 0.08330964697179971
0.6785256241631873
Epoch 151/253, Loss: 0.08089146707896833
0.6838052517891418
Epoch 161/253,

[I 2024-01-23 04:49:43,450] Trial 96 finished with value: 0.003996886574430162 and parameters: {'hidden_dim_h': 12, 'dropout': 0.12020830724414769, 'batch_size': 447, 'n_epochs': 253}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/216, Loss: 0.41262554608542346
0.5553254863184415
Epoch 11/216, Loss: 0.2065978972562428
0.5923884756955717
Epoch 21/216, Loss: 0.22847132179243812
0.006051785067350574
Epoch 31/216, Loss: 0.25905458860356234
0.007483972037194158
Epoch 41/216, Loss: 0.255138443975613
0.0147342554587328
Epoch 51/216, Loss: 0.2633967415012162
0.00014959668608033482
Epoch 61/216, Loss: 0.26263693154885853
0.00048239277745370384
Epoch 71/216, Loss: 0.2628178362702501
0.00039607991031556097
Epoch 81/216, Loss: 0.26261679725400333
0.0004571914085900279
Epoch 91/216, Loss: 0.26241782333316477
0.0002024733025703292
Epoch 101/216, Loss: 0.2610688155581211
0.004208223868941301
Epoch 111/216, Loss: 0.26215640044417876
0.0009495228436463747
Epoch 121/216, Loss: 0.2620881682839887
0.0002607983154397577
Epoch 131/216, Loss: 0.2616492121897895
0.0008114632568367605
Epoch 141/216, Loss: 0.2623482328550569
6.269984841525673e-05
Epoch 151/216, Loss: 0.26227252673486184
0.00

[I 2024-01-23 04:51:24,119] Trial 97 finished with value: 0.002129705480153266 and parameters: {'hidden_dim_h': 17, 'dropout': 0.3126669773241486, 'batch_size': 414, 'n_epochs': 216}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/202, Loss: 1.7661092795577704
0.49279082890375636
Epoch 11/202, Loss: 0.5277707769590265
0.5968505305848281
Epoch 21/202, Loss: 0.18615666119491353
0.6329820768284498
Epoch 31/202, Loss: 0.13330487673189126
0.6385509393034696
Epoch 41/202, Loss: 0.12219806161581301
0.6564414627992565
Epoch 51/202, Loss: 0.11504150766368006
0.6653598948178107
Epoch 61/202, Loss: 0.11930411542747535
0.6239358966089928
Epoch 71/202, Loss: 0.103264579001595
0.6621281053322491
Epoch 81/202, Loss: 0.09564565604223925
0.6698899564968634
Epoch 91/202, Loss: 0.09462952949837142
0.6609086680771497
Epoch 101/202, Loss: 0.08806590458341673
0.6712400309460772
Epoch 111/202, Loss: 0.08514152788648419
0.6753617053854609
Epoch 121/202, Loss: 0.0895377309883342
0.6559272822634395
Epoch 131/202, Loss: 0.08347440394116383
0.6757158178845002
Epoch 141/202, Loss: 0.08803897161109775
0.630038974853075
Epoch 151/202, Loss: 0.08193517242576562
0.6795727213514895
Epoch 161/202, Lo

[I 2024-01-23 04:52:52,688] Trial 98 finished with value: 0.7011327808528077 and parameters: {'hidden_dim_h': 15, 'dropout': 0.10593948981510093, 'batch_size': 476, 'n_epochs': 202}. Best is trial 68 with value: 0.7065032353145217.


Build model with 3 layers of attention
Epoch 1/277, Loss: 1.0474617714975394
0.5285938940276091
Epoch 11/277, Loss: 0.2576439447847067
0.605015596602939
Epoch 21/277, Loss: 0.13663423733384
0.6394661676372178
Epoch 31/277, Loss: 0.12602398252370312
0.6279955098557034
Epoch 41/277, Loss: 0.11862332213158701
0.6393731274723471
Epoch 51/277, Loss: 0.11075767494884192
0.6484812743400655
Epoch 61/277, Loss: 0.10452645432715323
0.6499462165858525
Epoch 71/277, Loss: 0.10097301035535101
0.6444334954018083
Epoch 81/277, Loss: 0.09395430164009917
0.6620113477442353
Epoch 91/277, Loss: 0.09147543752310323
0.6601410451411567
Epoch 101/277, Loss: 0.09880964823213279
0.6207654095760897
Epoch 111/277, Loss: 0.09202530176616182
0.6447399554029816
Epoch 121/277, Loss: 0.08864021067525826
0.6627105361036578
Epoch 131/277, Loss: 0.08618324790515151
0.6763588102269746
Epoch 141/277, Loss: 0.08991417595568825
0.6664943486712841
Epoch 151/277, Loss: 0.08223463025163202
0.6878816172510481
Epoch 161/277, Los

[I 2024-01-23 04:55:00,228] Trial 99 finished with value: 0.7035475865294806 and parameters: {'hidden_dim_h': 19, 'dropout': 0.1325574984143695, 'batch_size': 473, 'n_epochs': 277}. Best is trial 68 with value: 0.7065032353145217.


Best Trial:
  Criterion: 0.7065
  Params: 
    hidden_dim_h: 11
    dropout: 0.07465451999985113
    batch_size: 186
    n_epochs: 216
TF_3 achieved R2 = 0.7067049249490138


In [90]:
# num_layers = 1
# model_name = "TF_" + str(num_layers)

# criterion_best = 0.
# study = optuna.create_study(direction='maximize')
# study.optimize(objective, n_trials=10)

# # Print the best hyperparameters
# best_trial = study.best_trial
# print("Best Trial:")
# print(f"  Criterion: {best_trial.value:.4f}")
# print("  Params: ")
# for key, value in best_trial.params.items():
#     print(f"    {key}: {value}")

# best_hyper_parameters = {}
# for key, value in best_trial.params.items():
#     best_hyper_parameters[key] = value

# model_best.eval()
# pred, true = model_best(X_val.flatten(1)).flatten().detach().cpu().numpy(), y_val.flatten().detach().cpu().numpy()

# r2_test = pearsonr(pred, true)[0]**2
# print(f"{model_name} achieved R2 = {r2_test}")

# # save test R2 score
# import csv
# with open(os.path.join(results_path, "R2s.csv"), mode='a', newline='') as file:
#     writer = csv.writer(file)
#     writer.writerows([[model_name, r2_test]])

# # save predictions
# pd.DataFrame({"prediction": pred, "true": true}).to_csv(os.path.join(results_path, model_name + "_predictions.csv"), index=False)

# # save best model
# torch.save(model_best, os.path.join(results_path, model_name + "_BestModel"))