In [1]:
## Standard libraries
import os
import numpy as np
import math
import json
from functools import partial

import random as rd

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import GPUtil
import torch
import torch.nn as nn
import sys
sys.path.append('../model')
from utils import amino_acid_to_number, tokenize

device = "cuda:0"

import sys
sys.path.append('../model')
from functions import get_A2N_list, tokenize, make_train_val_test_lists_rand, prepare_data
from models import ProtDataset

outpath = "../output/"

In [2]:
# os.makedirs(outpath + study_id + "_rep_" + str(0))

In [3]:
data_name = "Faure2023_1_lenient"
train_percent = 20

In [4]:
# make folder for storing analysis outputs

study_id = "_".join([data_name, str(train_percent) + "%"])

matching_folders = [folder for folder in os.listdir(outpath) if study_id in folder and os.path.isdir(os.path.join(outpath, folder)) ]

if len(matching_folders) == 0:
    rep = 0
else: rep = np.max([int(folder.split("_")[-1]) for folder in matching_folders]) + 1

results_path = outpath + "_".join([study_id, "rep", str(rep)])
os.makedirs(results_path)

In [5]:
R2s = pd.DataFrame(columns=['Model', 'R2'])
R2s.to_csv(os.path.join(results_path, 'R2s.csv'), index=False)

### Read in data

In [6]:
in_path = "../Data/Data_prepared/" + data_name + ".csv"
datafile = pd.read_csv(in_path, index_col=None)

In [7]:
phenotypes, seqs, seqs1h = prepare_data(datafile)

  seqs = seqs[:, sites_var]


In [8]:
_, L, AA_size = seqs1h.shape
print(f"sequence length = {L}; ", f"AA_size = {AA_size}")

sequence length = 34;  AA_size = 2


In [9]:
num_train = int(.01*train_percent*len(datafile))
num_test = 2000
train_list, val_list, test_list = make_train_val_test_lists_rand(datafile, num_train, num_test)    
print(num_train)

25864


### Linear model

In [10]:
model_name = "Linear"
from models import LinearModel

In [11]:
import torch.utils.data as data

X = seqs1h.float().to(device)
y = phenotypes.to(device)

X_train, y_train = X[train_list], y[train_list]
X_val, y_val = X[val_list], y[val_list]
X_test, y_test = X[test_list], y[test_list]


train_dataset = ProtDataset(X_train, y_train)
train_loader = data.DataLoader(train_dataset,
                               batch_size=1000,
                               shuffle=True,
                               drop_last=False)

In [12]:
dropout_p = 0.0
model = LinearModel(L, AA_size, dropout_p).cuda()

In [13]:
# from models import LinearModel

In [14]:
import torch.optim as optim
import torch
import torch.nn as nn


from scipy.stats import pearsonr
learning_rate = 0.01
epochs = 300

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    total_loss = 0
    for batch_inputs, batch_targets in train_loader:
        model.train()
        optimizer.zero_grad()
        outputs = model(batch_inputs)
        loss = criterion(outputs, batch_targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader)}")
        model.eval()
        pred, true = model(X_val.flatten(1)).flatten().detach().cpu().numpy(), y_val.flatten().detach().cpu().numpy()
        print(pearsonr(pred, true)[0]**2)

Epoch 1/300, Loss: 0.3176604074736436
0.044063697893027075
Epoch 11/300, Loss: 0.11247230103860299
0.5934986206496009
Epoch 21/300, Loss: 0.11277942980329196
0.5940807199787502
Epoch 31/300, Loss: 0.11255488265305758
0.5927812725082092
Epoch 41/300, Loss: 0.11259598284959793
0.5933768256672256
Epoch 51/300, Loss: 0.11254267332454522
0.5941886584619791
Epoch 61/300, Loss: 0.11258724145591259
0.5943655054866587
Epoch 71/300, Loss: 0.11254456390937169
0.5940373214827782
Epoch 81/300, Loss: 0.11283398605883121
0.5934839689044148
Epoch 91/300, Loss: 0.11289972066879272
0.5920981122338687
Epoch 101/300, Loss: 0.11292321607470512
0.5940252018171155
Epoch 111/300, Loss: 0.11293516028672457
0.5917459719216663
Epoch 121/300, Loss: 0.11308929696679115
0.591352384183135
Epoch 131/300, Loss: 0.11260955625524123
0.5934266532800223
Epoch 141/300, Loss: 0.11282722186297178
0.5929229969971894
Epoch 151/300, Loss: 0.11261003309239943
0.5940281912245824
Epoch 161/300, Loss: 0.11280642574032147
0.59359601

In [15]:
model.eval()
pred, true = model(X_test.flatten(1)).flatten().detach().cpu().numpy(), y_test.flatten().detach().cpu().numpy()

r2_test = pearsonr(pred, true)[0]**2

print(f"{model_name} model achieved test R2 = {r2_test}")

Linear model achieved test R2 = 0.5449392844408051


In [16]:
import csv
with open(os.path.join(results_path, "R2s.csv"), mode='a', newline='') as file:
    writer = csv.writer(file)
    writer.writerows([[model_name, r2_test]])

### Transformer model

In [26]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from models import Transformer_torch_MHA, Transformer_2k

In [27]:
seqs_ex = seqs + AA_size*torch.tensor(range(L))
X = seqs_ex.to(device)
y = phenotypes.to(device)
X_train, y_train = X[train_list], y[train_list]
X_val, y_val = X[val_list], y[val_list]
X_test, y_test = X[test_list], y[test_list]
train_dataset = ProtDataset(X_train, y_train)

In [28]:
# Best Trial:
#   Value: 0.7601
#   Params: 
#     hidden_dim_h: 23
#     dropout: 0.12805161023112027
#     batch_size: 544


In [29]:
# sequence_length = L
# input_dim = AA_size*L
# output_dim = 1
# num_layers = 2
# num_heads = 4
# hidden_dim = 23*num_heads
# dropout = 0.12805161023112027

# model = Transformer_torch_MHA(L, input_dim, hidden_dim, num_layers, num_heads, dropout).to(device)

In [30]:
# from scipy.stats import pearsonr
# learning_rate = 0.001
# epochs = 500

# criterion = nn.MSELoss()
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# for epoch in range(epochs):
#     model.train()
#     total_loss = 0
#     for batch_inputs, batch_targets in train_loader:
#         optimizer.zero_grad()
#         outputs = model(batch_inputs)
#         loss = criterion(outputs, batch_targets)
#         loss.backward()
#         optimizer.step()
#         total_loss += loss.item()
    
#     if epoch % 20 == 0:
#         print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader)}")
#         model.eval()
#         pred, true = model(X_test.flatten(1)).flatten().detach().cpu().numpy(), y_test.flatten().detach().cpu().numpy()
#         print(pearsonr(pred, true)[0]**2)

In [31]:
import optuna
from scipy.stats import pearsonr

# learning_rate = 0.0001
num_heads = 4

sequence_length = L
input_dim = AA_size*L
output_dim = 1

def objective(trial):
    global criterion_best, model_best

    hidden_dim_h = trial.suggest_int('hidden_dim_h', 10, 50)
    dropout = trial.suggest_float('dropout', 0.05, 0.35)
    batch_size = trial.suggest_int('batch_size', 100, 1200)
    n_epochs = trial.suggest_int('n_epochs', 30, 300)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
    # learning_rate = trial.suggest_float"learning_rate", 1e-5, 1e-2, log=True)
    
    print(f"Build model with {num_layers} layers of attention")
    model = Transformer_2k(L, input_dim, hidden_dim_h*num_heads, num_layers, num_heads, dropout).to(device)
    
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   drop_last=False)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    r2_test = []
    try: 
        for epoch in range(n_epochs):

                model.train()
                total_loss = 0
                for batch_inputs, batch_targets in train_loader:
                    optimizer.zero_grad()
                    outputs = model(batch_inputs)
                    loss = criterion(outputs, batch_targets)
                    loss.backward()
                    optimizer.step()
                    total_loss += loss.item()

                if epoch % 10 == 0:
                    print(f"Epoch {epoch+1}/{n_epochs}, Loss: {total_loss/len(train_loader)}")
                    model.eval()
                    pred, true = model(X_val.flatten(1)).flatten().detach().cpu().numpy(), y_val.flatten().detach().cpu().numpy()
                    print(pearsonr(pred, true)[0]**2)
                    if pearsonr(pred, true)[0]**2 == "nan":
                        break
                    r2_test.append(pearsonr(pred, true)[0]**2)
                    
    except: print("training failed")
    
    criterion = np.array(r2_test)[-1]
    if criterion > criterion_best:
        print("Found better hyperparameter, update model")
        criterion_best = criterion
        model_best = model
    
    return np.array(r2_test)[-1]

In [32]:
n_trials = 100
for num_layers in [3]:

    model_name = "TF_" + str(num_layers)

    criterion_best = 0.
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=100)

    # Print the best hyperparameters
    best_trial = study.best_trial
    print("Best Trial:")
    print(f"  Criterion: {best_trial.value:.4f}")
    print("  Params: ")
    for key, value in best_trial.params.items():
        print(f"    {key}: {value}")  

    best_hyper_parameters = {}
    for key, value in best_trial.params.items():
        best_hyper_parameters[key] = value

    model_best.eval()
    pred, true = model_best(X_val.flatten(1)).flatten().detach().cpu().numpy(), y_val.flatten().detach().cpu().numpy()

    r2_test = pearsonr(pred, true)[0]**2
    print(f"{model_name} achieved R2 = {r2_test}")

    # save test R2 score
    import csv
    with open(os.path.join(results_path, "R2s.csv"), mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerows([[model_name, r2_test]])

    # save predictions
    pd.DataFrame({"prediction": pred, "true": true}).to_csv(os.path.join(results_path, model_name + "_predictions.csv"), index=False)

    # save best model
    torch.save(model_best, os.path.join(results_path, model_name + "_BestModel"))        

[I 2024-01-23 08:07:12,156] A new study created in memory with name: no-name-9ceacbef-9367-4cb3-a14b-0e722ec6a617
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)


Build model with 3 layers of attention
Epoch 1/123, Loss: 0.3468048758804798
0.5892912713597088
Epoch 11/123, Loss: 0.2642652206122875
0.6129965785127874
Epoch 21/123, Loss: 0.22934292983263732
0.6311927918564046
Epoch 31/123, Loss: 0.21113975830376147
0.6443502924711522
Epoch 41/123, Loss: 0.19708516132086515
0.6504993991633975
Epoch 51/123, Loss: 0.18342391336336733
0.6677516249362482
Epoch 61/123, Loss: 0.17090581953525544
0.6774703758202425
Epoch 71/123, Loss: 0.15834331437945365
0.6796323277559938
Epoch 81/123, Loss: 0.1463141399435699
0.6920782741135568
Epoch 91/123, Loss: 0.13601415921002627
0.6973881357264899
Epoch 101/123, Loss: 0.1265990188345313
0.702695653967123
Epoch 111/123, Loss: 0.11875486066564918
0.7080159577231476
Epoch 121/123, Loss: 0.11120947944000363
0.7028042272724704


[I 2024-01-23 08:08:29,822] Trial 0 finished with value: 0.7028042272724704 and parameters: {'hidden_dim_h': 36, 'dropout': 0.25742575781617977, 'batch_size': 301, 'n_epochs': 123, 'learning_rate': 0.00011240362450141765}. Best is trial 0 with value: 0.7028042272724704.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/90, Loss: 1.20313455419881
0.5440092898285732
Epoch 11/90, Loss: 0.16233487240970135
0.6477442230141487
Epoch 21/90, Loss: 0.12880157615457263
0.6719237341183601
Epoch 31/90, Loss: 0.10349343131695475
0.6881717250558703
Epoch 41/90, Loss: 0.0843707654092993
0.698508802319591
Epoch 51/90, Loss: 0.2746527566441468
0.00024100814353415667
Epoch 61/90, Loss: 0.2663124609206404
5.0388163711705656e-05
Epoch 71/90, Loss: 0.26712113874299187
0.005192823681819379


[I 2024-01-23 08:09:27,650] Trial 1 finished with value: 0.005192823681819379 and parameters: {'hidden_dim_h': 27, 'dropout': 0.32142519817814147, 'batch_size': 171, 'n_epochs': 90, 'learning_rate': 0.0007362933258734223}. Best is trial 0 with value: 0.7028042272724704.


training failed
Build model with 3 layers of attention
Epoch 1/204, Loss: 0.3495357741009105
0.5818015279955808
Epoch 11/204, Loss: 0.27513719553297217
0.616224074665219
Epoch 21/204, Loss: 0.24191308563405817
0.6056977605007944
Epoch 31/204, Loss: 0.22224012897773224
0.6439968560216938
Epoch 41/204, Loss: 0.21077478202906522
0.661683024495613
Epoch 51/204, Loss: 0.20100092616948215
0.6603140691334045
Epoch 61/204, Loss: 0.19217888604510913
0.6710633095918463
Epoch 71/204, Loss: 0.1859149675477635
0.6702180928065025
Epoch 81/204, Loss: 0.17319744215770203
0.6829139007536574
Epoch 91/204, Loss: 0.16945953125303442
0.6668164548995806
Epoch 101/204, Loss: 0.15807498043233698
0.6781190440636525
Epoch 111/204, Loss: 0.1490108607844873
0.6857967679996755
Epoch 121/204, Loss: 0.14058748768134552
0.6903260691065938
Epoch 131/204, Loss: 0.13247053900902922
0.6920062877241804
Epoch 141/204, Loss: 0.12466890635815533
0.6979170288029721
Epoch 151/204, Loss: 0.11721845750104297
0.7005924620159869
E

[I 2024-01-23 08:11:21,572] Trial 2 finished with value: 0.7121283937706205 and parameters: {'hidden_dim_h': 43, 'dropout': 0.2949059516884788, 'batch_size': 1093, 'n_epochs': 204, 'learning_rate': 0.00033198510793771327}. Best is trial 2 with value: 0.7121283937706205.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/271, Loss: 3.143383665518327
0.5257819947959567
Epoch 11/271, Loss: 1.1362451558763331
0.5669759863333228
Epoch 21/271, Loss: 0.4757066694172946
0.6141335388666388
Epoch 31/271, Loss: 0.2951907461339777
3.85399872712815e-05
Epoch 41/271, Loss: 0.2671243291009556
0.00010200771280196123
Epoch 51/271, Loss: 0.2666315700520169
0.00323696912806395
Epoch 61/271, Loss: 0.266037871891802
2.718967771032305e-06
Epoch 71/271, Loss: 0.26528323780406604
0.0005661678243277156
Epoch 81/271, Loss: 0.26599206165833905
7.521820799840894e-05


[I 2024-01-23 08:11:58,222] Trial 3 finished with value: 7.521820799840894e-05 and parameters: {'hidden_dim_h': 23, 'dropout': 0.0972184432826201, 'batch_size': 1132, 'n_epochs': 271, 'learning_rate': 0.002655230549368521}. Best is trial 2 with value: 0.7121283937706205.


training failed
Build model with 3 layers of attention
Epoch 1/158, Loss: 0.8756502013147613
0.5658927876819163
Epoch 11/158, Loss: 0.31404471802122796
0.6019108110518957
Epoch 21/158, Loss: 0.15655739881374217
0.6393839783382641
Epoch 31/158, Loss: 0.1299147156101686
0.6583572550783571
Epoch 41/158, Loss: 0.11786752957620739
0.6746272404419217
Epoch 51/158, Loss: 0.26723017313598113
0.0005571464282882584
Epoch 61/158, Loss: 0.26355808163866584
0.0022021102266038357
Epoch 71/158, Loss: 0.26430585612485435
0.007139855932926625
Epoch 81/158, Loss: 0.2639977913579823
0.005413809253816244
Epoch 91/158, Loss: 0.2657055794088929
0.0007392924372075156
Epoch 101/158, Loss: 0.2646020546003624
0.0020886074255198235
Epoch 111/158, Loss: 0.264394055178136
0.007666198923789941
Epoch 121/158, Loss: 0.26549031373895243
0.0027084002508932525
Epoch 131/158, Loss: 0.2659716948314949
0.00421742035500254
Epoch 141/158, Loss: 0.2651715059707194
0.003376865374778744
Epoch 151/158, Loss: 0.26601079327088817


[I 2024-01-23 08:13:51,631] Trial 4 finished with value: 0.0009439729250945863 and parameters: {'hidden_dim_h': 48, 'dropout': 0.21040069282551277, 'batch_size': 298, 'n_epochs': 158, 'learning_rate': 0.00042913008176501507}. Best is trial 2 with value: 0.7121283937706205.


Build model with 3 layers of attention
Epoch 1/295, Loss: 0.34811082290064904
0.577577978621156
Epoch 11/295, Loss: 0.2916498972523597
0.6125669560532853
Epoch 21/295, Loss: 0.26832950211340384
0.6313786870804446
Epoch 31/295, Loss: 0.25221195768925453
0.6308525061853492
Epoch 41/295, Loss: 0.2400469183921814
0.6376879971262801
Epoch 51/295, Loss: 0.22634796173341812
0.6478274714862008
Epoch 61/295, Loss: 0.21257136089186515
0.641912541079751
Epoch 71/295, Loss: 0.19963519275188446
0.6492871418424491
Epoch 81/295, Loss: 0.18898708253137528
0.6546844776311115
Epoch 91/295, Loss: 0.17787488381708821
0.6507374569480837
Epoch 101/295, Loss: 0.16628590610719496
0.6654010732103045
Epoch 111/295, Loss: 0.15644433854087705
0.6708363141528803
Epoch 121/295, Loss: 0.14804512839163503
0.6742767834211327
Epoch 131/295, Loss: 0.1388944637390875
0.6794424861192921
Epoch 141/295, Loss: 0.13046725623069272
0.6859676902534848
Epoch 151/295, Loss: 0.12447088548252659
0.6878204403846877
Epoch 161/295, Lo

[I 2024-01-23 08:16:24,572] Trial 5 finished with value: 0.7183267974858158 and parameters: {'hidden_dim_h': 33, 'dropout': 0.3242432805170107, 'batch_size': 791, 'n_epochs': 295, 'learning_rate': 0.00024686546211236896}. Best is trial 5 with value: 0.7183267974858158.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/252, Loss: 0.8951033060101495
0.5669988726479174
Epoch 11/252, Loss: 0.24484852193922235
0.6282437151598558
Epoch 21/252, Loss: 0.18758352403191553
0.6455299522935997
Epoch 31/252, Loss: 0.15421875836192697
0.6332248469411529
Epoch 41/252, Loss: 0.12015111949564754
0.659474833444599
Epoch 51/252, Loss: 0.09760421465920366
0.6713997734657393
Epoch 61/252, Loss: 0.08688699405478395
0.6895210871287657
Epoch 71/252, Loss: 0.08137905859536883
0.6826689366713586
Epoch 81/252, Loss: 0.07638554061776485
0.7033064554274504
Epoch 91/252, Loss: 0.07226499159266983
0.699490876406789
Epoch 101/252, Loss: 0.06884243201626383
0.7143437828143175
Epoch 111/252, Loss: 0.06688355338638244
0.7135375003933478
Epoch 121/252, Loss: 0.06241564919659193
0.7263190571204557
Epoch 131/252, Loss: 0.05990843685424846
0.7237473865102675
Epoch 141/252, Loss: 0.058024277073749596
0.7244657336337572
Epoch 151/252, Loss: 0.061913047

[I 2024-01-23 08:19:45,972] Trial 6 finished with value: 0.7321680749029571 and parameters: {'hidden_dim_h': 26, 'dropout': 0.3312762657923331, 'batch_size': 174, 'n_epochs': 252, 'learning_rate': 0.0004424259521257618}. Best is trial 6 with value: 0.7321680749029571.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/283, Loss: 0.5825660891003079
0.574282899727411
Epoch 11/283, Loss: 0.481645324991809
0.6177744540518237
Epoch 21/283, Loss: 0.4120524567034509
0.6292855649697908
Epoch 31/283, Loss: 0.361500583589077
0.639642499467817
Epoch 41/283, Loss: 0.32410771151383716
0.6391815068229878
Epoch 51/283, Loss: 0.29612816621859867
0.6597839485864824
Epoch 61/283, Loss: 0.2766990413268407
0.6736755825670522
Epoch 71/283, Loss: 0.2628622183369266
0.6751193706919437
Epoch 81/283, Loss: 0.25227103837662274
0.6850184807862164
Epoch 91/283, Loss: 0.24300996255543497
0.688444459574332
Epoch 101/283, Loss: 0.2356097073190742
0.667008173827768
Epoch 111/283, Loss: 0.2257057237956259
0.6870626541139024
Epoch 121/283, Loss: 0.21557452612453037
0.6944445632061469
Epoch 131/283, Loss: 0.20576785794562763
0.703182422964761
Epoch 141/283, Loss: 0.1954165432188246
0.7030093056011796
Epoch 151/283, Loss: 0.18614518352680737
0.704

[I 2024-01-23 08:22:25,663] Trial 7 finished with value: 0.7161565496891478 and parameters: {'hidden_dim_h': 43, 'dropout': 0.062412939490076313, 'batch_size': 674, 'n_epochs': 283, 'learning_rate': 0.00016278672106400215}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/115, Loss: 2.5335689577563056
0.00195628362246989
Epoch 11/115, Loss: 0.275962528483621
0.008073993528331304
Epoch 21/115, Loss: 0.26490471589154213
0.0018307150985434543
Epoch 31/115, Loss: 0.264835049879962
0.0001762250820693207
Epoch 41/115, Loss: 0.2646073164611027
0.0029710990545776168
Epoch 51/115, Loss: 0.26451463637680844
0.0008994087313450673
Epoch 61/115, Loss: 0.2646727027564213
0.00412999910955061
Epoch 71/115, Loss: 0.2647785744790373
0.0019783200417960644
Epoch 81/115, Loss: 0.26516166380767164
0.0008311773968276565
Epoch 91/115, Loss: 0.2649332685717221
0.00468356511160969
Epoch 101/115, Loss: 0.2642887522434366
0.003556345515009985
Epoch 111/115, Loss: 0.2640827277611042
0.016396005359172718


[I 2024-01-23 08:23:36,648] Trial 8 finished with value: 0.016396005359172718 and parameters: {'hidden_dim_h': 49, 'dropout': 0.3258310960270046, 'batch_size': 830, 'n_epochs': 115, 'learning_rate': 0.005906149941081174}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/279, Loss: 0.1977237829566002
0.5930294213829334
Epoch 11/279, Loss: 0.15589387953281403
0.6485471768608013
Epoch 21/279, Loss: 0.1391051785647869
0.6712045618707407
Epoch 31/279, Loss: 0.12226571023464203
0.689694066034794
Epoch 41/279, Loss: 0.11003571152687072
0.7034257209800285
Epoch 51/279, Loss: 0.09747184917330742
0.7079653157359258
Epoch 61/279, Loss: 0.08719767078757286
0.7168692773041797
Epoch 71/279, Loss: 0.0794942732155323
0.7186742367295536
Epoch 81/279, Loss: 0.07953867062926293
0.7139042289571348
Epoch 91/279, Loss: 0.06866290800273418
0.7255372593678497
Epoch 101/279, Loss: 0.06366414994001389
0.7275934550232649
Epoch 111/279, Loss: 0.06245414532721043
0.7223500505637013
Epoch 121/279, Loss: 0.05823981761932373
0.7291378034926866
Epoch 131/279, Loss: 0.056512833759188655
0.7256718022399681
Epoch 141/279, Loss: 0.05464756786823273
0.7245317339943164
Epoch 151/279, Loss: 0.2745732817053795
3.5081782666450935e-05
Epoch 161/27

[I 2024-01-23 08:26:39,605] Trial 9 finished with value: 0.0007750642851939705 and parameters: {'hidden_dim_h': 49, 'dropout': 0.29706380248573844, 'batch_size': 483, 'n_epochs': 279, 'learning_rate': 0.0003683542591582204}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/210, Loss: 1.5301054332046828
0.24275849963398535
Epoch 11/210, Loss: 1.1972093597116829
0.5577737678731198
Epoch 21/210, Loss: 0.9839668433536546
0.5507282818022006
Epoch 31/210, Loss: 0.8026235111088932
0.5617853918031898
Epoch 41/210, Loss: 0.6482482983976229
0.5684309840372156
Epoch 51/210, Loss: 0.5198547494710739
0.5812059773865711
Epoch 61/210, Loss: 0.4162901008977052
0.5941954630885937
Epoch 71/210, Loss: 0.33629232601391224
0.6027163523437208
Epoch 81/210, Loss: 0.28031507003506856
0.6131699898626489
Epoch 91/210, Loss: 0.24529997505652854
0.6173039520812914
Epoch 101/210, Loss: 0.226023856690738
0.6227992512570416
Epoch 111/210, Loss: 0.21185137917556524
0.6277024933204334
Epoch 121/210, Loss: 0.19922467367918423
0.6291081449019909
Epoch 131/210, Loss: 0.18844935297966003
0.6328319638746468
Epoch 141/210, Loss: 0.17895482265425527
0.637726954650694
Epoch 151/210, Loss: 0.17033195127752535
0.6427073730164354
Epoch 161/210, Loss: 

[I 2024-01-23 08:31:08,414] Trial 10 finished with value: 0.6530962205777873 and parameters: {'hidden_dim_h': 10, 'dropout': 0.15954301830418502, 'batch_size': 100, 'n_epochs': 210, 'learning_rate': 3.24947362619717e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/40, Loss: 2.9093110725797455
0.06673148045084902
Epoch 11/40, Loss: 2.809119808262792
0.4903060034862904
Epoch 21/40, Loss: 2.772854410368821
0.5140246640198097
Epoch 31/40, Loss: 2.7430160538903596
0.5211032248040727


[I 2024-01-23 08:31:25,325] Trial 11 finished with value: 0.5211032248040727 and parameters: {'hidden_dim_h': 19, 'dropout': 0.23849797086891023, 'batch_size': 848, 'n_epochs': 40, 'learning_rate': 1.2767916948095839e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/233, Loss: 0.3559092234733493
0.5684172632726285
Epoch 11/233, Loss: 0.2652839165787364
0.0010616071902894782
Epoch 21/233, Loss: 0.26533910112325537
0.0010429877981194256
Epoch 31/233, Loss: 0.26526101002859515
0.0004563122355863861
Epoch 41/233, Loss: 0.26526409768780995
0.002850531101221537
Epoch 51/233, Loss: 0.26520945547625074
0.001371976310904593
Epoch 61/233, Loss: 0.2650786194690438
0.001290007341433545
Epoch 71/233, Loss: 0.2654106883808624
0.00022371154961367738
Epoch 81/233, Loss: 0.26522019297577615
0.0004515996514137078
Epoch 91/233, Loss: 0.264947522171708
0.0002454772633258135
Epoch 101/233, Loss: 0.26524125628693157
0.0016976115806931638
Epoch 111/233, Loss: 0.264678324377814
0.0005263064246468347
Epoch 121/233, Loss: 0.26482925026915793
0.0021477274810682966
Epoch 131/233, Loss: 0.2649106573919917
0.0027205854426588616
Epoch 141/233, Loss: 0.2648605541434399
0.0011919167967758477
Epoch 151/233, Loss: 0.26418716547101045
0

[I 2024-01-23 08:33:28,758] Trial 12 finished with value: 0.0035531065883653897 and parameters: {'hidden_dim_h': 34, 'dropout': 0.34423648436713805, 'batch_size': 560, 'n_epochs': 233, 'learning_rate': 0.0012866946762227357}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/238, Loss: 0.2735949055901889
0.48142663190507873
Epoch 11/238, Loss: 0.25504770412527283
0.5987525197656817
Epoch 21/238, Loss: 0.24743993785874596
0.6074220231441757
Epoch 31/238, Loss: 0.24092122347190462
0.6241769720967765
Epoch 41/238, Loss: 0.2347971822681098
0.6333661503914602
Epoch 51/238, Loss: 0.22865167619853183
0.641050509188024
Epoch 61/238, Loss: 0.2228032910618289
0.6500394456910072
Epoch 71/238, Loss: 0.2177359220282785
0.6517759447992075
Epoch 81/238, Loss: 0.21254017332504535
0.6529644856454002
Epoch 91/238, Loss: 0.20769667008827472
0.6571860590467908
Epoch 101/238, Loss: 0.2028859232006402
0.6606102491960555
Epoch 111/238, Loss: 0.1984740773151661
0.6580481011978038
Epoch 121/238, Loss: 0.19425350016561047
0.6609370949627876
Epoch 131/238, Loss: 0.18959749824014202
0.6661533225135888
Epoch 141/238, Loss: 0.18571510088854823
0.6682826357542405
Epoch 151/238, Loss: 0.18159063314569407
0.6724928547525028
Epoch 161/238, Los

[I 2024-01-23 08:35:16,719] Trial 13 finished with value: 0.6885815317506884 and parameters: {'hidden_dim_h': 30, 'dropout': 0.26580350080273274, 'batch_size': 829, 'n_epochs': 238, 'learning_rate': 8.484872860644318e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/249, Loss: 1.4418955659866333
0.565307587907034
Epoch 11/249, Loss: 0.6830218529701233
0.5878125567254359
Epoch 21/249, Loss: 0.3564320755004883
0.0002840337714824416
Epoch 31/249, Loss: 0.28136419773101806
3.7050257718328702e-06
Epoch 41/249, Loss: 0.2663614785671234
9.113766477533853e-05
Epoch 51/249, Loss: 0.2646291989088059
0.000782521865125174
Epoch 61/249, Loss: 0.2640005028247833
0.003889414130045298
Epoch 71/249, Loss: 0.2653381872177124
0.0004714384440191904
Epoch 81/249, Loss: 0.2652725303173065
0.0010230846607134161
Epoch 91/249, Loss: 0.264341334104538
0.0012691567291150748
Epoch 101/249, Loss: 0.26457072257995606
0.004614654985603795
Epoch 111/249, Loss: 0.2640287137031555
0.0032924464884199313
Epoch 121/249, Loss: 0.2639226520061493
0.0033116910305649276
Epoch 131/249, Loss: 0.26440369844436645
0.004056069071980739
Epoch 141/249, Loss: 0.26419253706932067
0.0038847515797189734
Epoch 151/249, Loss: 0.264202721118927
0.00432115

[I 2024-01-23 08:36:59,100] Trial 14 finished with value: 0.004198490575350982 and parameters: {'hidden_dim_h': 17, 'dropout': 0.15642867055954884, 'batch_size': 962, 'n_epochs': 249, 'learning_rate': 0.001379482332618753}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/293, Loss: 1.0352464961378198
0.4974656667120992
Epoch 11/293, Loss: 0.9819064532455645
0.546958481654357
Epoch 21/293, Loss: 0.9537328073852941
0.5465544055280785
Epoch 31/293, Loss: 0.9157984492025877
0.5488917224355251
Epoch 41/293, Loss: 0.8801972238641036
0.01886559551857382
Epoch 51/293, Loss: 0.8352311771166953
0.5514815577209341
Epoch 61/293, Loss: 0.8029857622949701
0.5648625566210245
Epoch 71/293, Loss: 0.7645865145482516
0.575254013866132
Epoch 81/293, Loss: 0.7276135369351036
0.5831659998426325
Epoch 91/293, Loss: 0.6914694622943276
0.5943130294982409
Epoch 101/293, Loss: 0.661059131747798
0.5974363506124691
Epoch 111/293, Loss: 0.6261898843865645
0.603836625148372
Epoch 121/293, Loss: 0.6056801300299796
0.6061465093252506
Epoch 131/293, Loss: 0.5731807602079291
0.6107048365575685
Epoch 141/293, Loss: 0.5433714468228189
0.6147062268379905
Epoch 151/293, Loss: 0.5206265676962701
0.6206099275185466
Epoch 161/293, Loss: 0.49496576

[I 2024-01-23 08:39:33,199] Trial 15 finished with value: 0.6589925616168143 and parameters: {'hidden_dim_h': 38, 'dropout': 0.3486661087533102, 'batch_size': 644, 'n_epochs': 293, 'learning_rate': 4.172385090992121e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/179, Loss: 1.1490572428299208
0.5318707069059085
Epoch 11/179, Loss: 0.8164060843192925
0.5478094419094056
Epoch 21/179, Loss: 0.5893873829962843
0.5815610333030631
Epoch 31/179, Loss: 0.4181107105845112
0.6055628793890506
Epoch 41/179, Loss: 0.29357456504288365
0.6175060707959688
Epoch 51/179, Loss: 0.20543449132119196
0.6415807514962915
Epoch 61/179, Loss: 0.14853433330180282
0.6643708014143083
Epoch 71/179, Loss: 0.1147686322614298
0.6789664792904084
Epoch 81/179, Loss: 0.0970720411357233
0.6836817041391424
Epoch 91/179, Loss: 0.09023438343557261
0.6925196912135039
Epoch 101/179, Loss: 0.0868784179121761
0.6941307052042365
Epoch 111/179, Loss: 0.08478346252340381
0.6984877854081959
Epoch 121/179, Loss: 0.0834355797555487
0.7015919538528295
Epoch 131/179, Loss: 0.08245317789457612
0.7014490105224588
Epoch 141/179, Loss: 0.0801497346768945
0.7047726961473314
Epoch 151/179, Loss: 0.07794427480232918
0.707543450734907
Epoch 161/179, Loss: 0

[I 2024-01-23 08:41:05,769] Trial 16 finished with value: 0.7086635527624605 and parameters: {'hidden_dim_h': 28, 'dropout': 0.2870028596202202, 'batch_size': 407, 'n_epochs': 179, 'learning_rate': 0.00018501746135717563}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/253, Loss: 0.21478554737918518
0.0009535789521288843
Epoch 11/253, Loss: 0.2662455312469426
9.814551504980874e-05
Epoch 21/253, Loss: 0.2652537252973108
0.0003173983518297938
Epoch 31/253, Loss: 0.2653770692208234
0.00015322642649076822
Epoch 41/253, Loss: 0.2654041630380294
0.00022694088381734748
Epoch 51/253, Loss: 0.2656025592895115
0.0011459590633595559
Epoch 61/253, Loss: 0.26514724817346125
3.068714785462141e-05
Epoch 71/253, Loss: 0.265416021732723
3.0381385829227135e-05
Epoch 81/253, Loss: 0.26529883593320847
0.0048100577591003325
Epoch 91/253, Loss: 0.2652309072368285
0.0008693734922957167
Epoch 101/253, Loss: 0.2652192847693668
0.00043358572177096966
Epoch 111/253, Loss: 0.26523193673175927
2.448308627627422e-05
Epoch 121/253, Loss: 0.26543963262263465
0.0027685850133739006
Epoch 131/253, Loss: 0.2646326405160567
0.004824950653620119
Epoch 141/253, Loss: 0.2652389933081234
0.001330186859935473
Epoch 151/253, Loss: 0.2644337626064

[I 2024-01-23 08:42:56,828] Trial 17 finished with value: 0.0007706985484153308 and parameters: {'hidden_dim_h': 23, 'dropout': 0.19635181222670234, 'batch_size': 703, 'n_epochs': 253, 'learning_rate': 0.008536256972113352}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/299, Loss: 1.4191928958892823
0.576205062668281
Epoch 11/299, Loss: 0.8542240381240844
0.0006999883964329157
Epoch 21/299, Loss: 0.5340891695022583
0.0007688146722564625
Epoch 31/299, Loss: 0.3625427329540253
0.001755547423917373
Epoch 41/299, Loss: 0.30129335045814515
0.003517130110441756
Epoch 51/299, Loss: 0.2730030608177185
0.0014580229336249126
Epoch 61/299, Loss: 0.26636853098869323
0.002154916034611173
Epoch 71/299, Loss: 0.2670546990633011
6.620091678696957e-05
Epoch 81/299, Loss: 0.2674161946773529
0.00022121796577929603
Epoch 91/299, Loss: 0.2636208862066269
0.00011196688143752814
Epoch 101/299, Loss: 0.2646785855293274
0.0022111348842879476
Epoch 111/299, Loss: 0.2633621245622635
0.0028165344022542385
Epoch 121/299, Loss: 0.26765028238296507
0.0015463946003667457
Epoch 131/299, Loss: 0.2644843566417694
0.00031224766360118185
Epoch 141/299, Loss: 0.2665450930595398
0.0007351660859251723
Epoch 151/299, Loss: 0.26943443179130555
0.

[I 2024-01-23 08:45:32,430] Trial 18 finished with value: 0.0007290710481845191 and parameters: {'hidden_dim_h': 33, 'dropout': 0.211605258178062, 'batch_size': 993, 'n_epochs': 299, 'learning_rate': 0.0008752482048305134}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/207, Loss: 1.6577929649780045
0.5655659457174251
Epoch 11/207, Loss: 1.4769891251378984
0.5495524939133417
Epoch 21/207, Loss: 1.3330333126125051
0.5720514778675698
Epoch 31/207, Loss: 1.199682244613989
0.5795870160560838
Epoch 41/207, Loss: 1.0767775738417213
0.595337927418502
Epoch 51/207, Loss: 0.9617016146432108
0.6045683004440741
Epoch 61/207, Loss: 0.8558156454741065
0.6024343477037312
Epoch 71/207, Loss: 0.7581997504874841
0.6112647662457268
Epoch 81/207, Loss: 0.6677026668591286
0.6222457152681273
Epoch 91/207, Loss: 0.5845980528575271
0.628575694289083
Epoch 101/207, Loss: 0.5097114385953591
0.632181909930921
Epoch 111/207, Loss: 0.44151312646581165
0.6400646100281485
Epoch 121/207, Loss: 0.3801347822395723
0.6453848443200554
Epoch 131/207, Loss: 0.32568792353815107
0.6558519429682994
Epoch 141/207, Loss: 0.27787812365524805
0.6630101938666012
Epoch 151/207, Loss: 0.23701761685200592
0.6670117329040837
Epoch 161/207, Loss: 0.20351

[I 2024-01-23 08:47:46,202] Trial 19 finished with value: 0.6885334224849663 and parameters: {'hidden_dim_h': 41, 'dropout': 0.318040884965266, 'batch_size': 358, 'n_epochs': 207, 'learning_rate': 6.398016383148326e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/174, Loss: 0.5432449195120069
0.003367916489949733
Epoch 11/174, Loss: 0.2642504228485955
0.00917413446161674
Epoch 21/174, Loss: 0.2635947306950887
0.01185751381692741
Epoch 31/174, Loss: 0.2632877720726861
0.006463209825751166
Epoch 41/174, Loss: 0.2649473034673267
0.0005818834606619675
Epoch 51/174, Loss: 0.2647055112653308
0.00012095201772597896
Epoch 61/174, Loss: 0.2646576911211014
0.0018982289522346688
Epoch 71/174, Loss: 0.2650765422317717
0.0009971161695738063
Epoch 81/174, Loss: 0.2643953657812542
0.0025855632945499143
Epoch 91/174, Loss: 0.26440175076325734
0.001030557980013214
Epoch 101/174, Loss: 0.2643864724371168
0.002781096065900541
Epoch 111/174, Loss: 0.26479606959554886
0.00027155150655532316
Epoch 121/174, Loss: 0.2647881660196516
0.002877617579233263
Epoch 131/174, Loss: 0.2645844989352756
0.0005457967029840106
Epoch 141/174, Loss: 0.2644847790400187
0.004245629802331667
Epoch 151/174, Loss: 0.2644355333513684
0.003496

[I 2024-01-23 08:49:00,291] Trial 20 finished with value: 0.0032985319433402006 and parameters: {'hidden_dim_h': 14, 'dropout': 0.24784684384709693, 'batch_size': 536, 'n_epochs': 174, 'learning_rate': 0.0028298892424130423}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/272, Loss: 0.22825060521855073
0.5810568213899652
Epoch 11/272, Loss: 0.19406785333857818
0.6234311353900164
Epoch 21/272, Loss: 0.18427751432446873
0.6365644707384838
Epoch 31/272, Loss: 0.17689724836279364
0.6427665245786349
Epoch 41/272, Loss: 0.16918685155756333
0.6559939662988069
Epoch 51/272, Loss: 0.1606935753541834
0.668020693922326
Epoch 61/272, Loss: 0.26546385446015525
0.0036292991061141616
Epoch 71/272, Loss: 0.2626914246117367
0.001537280111254012
Epoch 81/272, Loss: 0.2630598203224294
0.005238668272482093
Epoch 91/272, Loss: 0.26272953082533446
0.005982340104493419
Epoch 101/272, Loss: 0.26229313892476697
0.009180118912867342
Epoch 111/272, Loss: 0.2636249227558865
0.006517886314813214
Epoch 121/272, Loss: 0.2604014641221832
0.011704829956294967
Epoch 131/272, Loss: 0.25959983292747946
0.01522504561558388
Epoch 141/272, Loss: 0.25574593158329234
0.016379459116224536
Epoch 151/272, Loss: 0.2549608415540527
0.018815485946128967

[I 2024-01-23 08:51:36,630] Trial 21 finished with value: 0.01935506426026652 and parameters: {'hidden_dim_h': 43, 'dropout': 0.056330288226097355, 'batch_size': 709, 'n_epochs': 272, 'learning_rate': 0.00021516286561348334}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/300, Loss: 0.27348766554343074
0.5647822098429568
Epoch 11/300, Loss: 0.20539185051855288
0.5938116499845697
Epoch 21/300, Loss: 0.17718065530061722
0.6086886895655629
Epoch 31/300, Loss: 0.1593451899917502
0.6206290052840268
Epoch 41/300, Loss: 0.14784716069698334
0.6344435051862682
Epoch 51/300, Loss: 0.14403039922839717
0.6368530594071681
Epoch 61/300, Loss: 0.13827426574732127
0.6486012178713333
Epoch 71/300, Loss: 0.1345574022515824
0.6518849368210319
Epoch 81/300, Loss: 0.13191842875982585
0.6568137495976858
Epoch 91/300, Loss: 0.12965425986208415
0.6537964565159218
Epoch 101/300, Loss: 0.12586253450105064
0.6579508181156184
Epoch 111/300, Loss: 0.12268821071637304
0.6626357493169677
Epoch 121/300, Loss: 0.11923233105948097
0.6623860324804397
Epoch 131/300, Loss: 0.11544655243817128
0.6682683194492531
Epoch 141/300, Loss: 0.11197940750341666
0.6705485369810622
Epoch 151/300, Loss: 0.10850173725109351
0.6745013202521243
Epoch 161/300,

[I 2024-01-23 08:53:48,590] Trial 22 finished with value: 0.6185141766254182 and parameters: {'hidden_dim_h': 24, 'dropout': 0.10460781173847945, 'batch_size': 636, 'n_epochs': 300, 'learning_rate': 0.00013744221779672143}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/262, Loss: 1.030371418222785
0.5623926768065336
Epoch 11/262, Loss: 0.6054773218929768
0.573867694264846
Epoch 21/262, Loss: 0.40646678395569324
0.3443087643661435
Epoch 31/262, Loss: 0.3015893967822194
0.00018896431260404686
Epoch 41/262, Loss: 0.2716337516903877
0.00014288588003418605
Epoch 51/262, Loss: 0.2653652182780206
2.9759379566482063e-05
Epoch 61/262, Loss: 0.2643978507257998
0.0026742666185520297
Epoch 71/262, Loss: 0.26518545486032963
0.0038580995596419984
Epoch 81/262, Loss: 0.26452733809128404
0.0021348516260888163
Epoch 91/262, Loss: 0.2642447059042752
0.0018262954415106001
Epoch 101/262, Loss: 0.2649744818918407
0.00038190032369116026
Epoch 111/262, Loss: 0.26366060972213745
0.0036870330453473666
Epoch 121/262, Loss: 0.2649481389671564
0.005227606056772516
Epoch 131/262, Loss: 0.2647259789519012
0.005374178119728088
Epoch 141/262, Loss: 0.2645644792355597
0.003406569245920773
Epoch 151/262, Loss: 0.2659157537855208
0.000233

[I 2024-01-23 08:56:07,918] Trial 23 finished with value: 0.004618355252326637 and parameters: {'hidden_dim_h': 39, 'dropout': 0.05884679793843805, 'batch_size': 765, 'n_epochs': 262, 'learning_rate': 0.000699774400310937}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/230, Loss: 0.32168391117682826
0.5635339824777237
Epoch 11/230, Loss: 0.24299166122308144
0.6158901146482838
Epoch 21/230, Loss: 0.21281074445981246
0.6449407962878524
Epoch 31/230, Loss: 0.19660135645132798
0.6478538955878285
Epoch 41/230, Loss: 0.18583563944468132
0.6589707088633201
Epoch 51/230, Loss: 0.17703582461063677
0.6701476994103511
Epoch 61/230, Loss: 0.17084142164542124
0.6658389921306876
Epoch 71/230, Loss: 0.1643720681850727
0.676525865408478
Epoch 81/230, Loss: 0.16162865265057638
0.6555573728009567
Epoch 91/230, Loss: 0.1530048967554019
0.6751167527128826
Epoch 101/230, Loss: 0.14496187693797624
0.6848994640592724
Epoch 111/230, Loss: 0.13827471521038276
0.6879048819959726
Epoch 121/230, Loss: 0.13540969445155218
0.6672935454048088
Epoch 131/230, Loss: 0.1268851966238939
0.6838267775628721
Epoch 141/230, Loss: 0.1215011950295705
0.6873006178023857
Epoch 151/230, Loss: 0.26309118878382903
0.013767617632345618
Epoch 161/230, 

[I 2024-01-23 08:57:53,998] Trial 24 finished with value: 0.011977012211180842 and parameters: {'hidden_dim_h': 32, 'dropout': 0.15096219527936838, 'batch_size': 946, 'n_epochs': 230, 'learning_rate': 0.00023515305841947845}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/284, Loss: 2.492584385522982
0.5673672488415972
Epoch 11/284, Loss: 2.380877070310639
0.5282459557334399
Epoch 21/284, Loss: 2.2834810222067485
0.5203110599536583
Epoch 31/284, Loss: 2.190358272412928
0.5348725716979705
Epoch 41/284, Loss: 2.100803003078554
0.5418130988267787
Epoch 51/284, Loss: 2.0136095808773504
0.5480416112492849
Epoch 61/284, Loss: 1.9299253428854592
0.5612183714869569
Epoch 71/284, Loss: 1.848835363620665
0.565041936909833
Epoch 81/284, Loss: 1.769739988373547
0.5714762824984533
Epoch 91/284, Loss: 1.6843595010478323
0.00048225747876701365
Epoch 101/284, Loss: 1.6011288020668961
0.0015082755651732833
Epoch 111/284, Loss: 1.5210356188983452
0.0014384622728692202
Epoch 121/284, Loss: 1.445500705300308
0.007747462759322874
Epoch 131/284, Loss: 1.3725570294915177
0.011503715662808266
Epoch 141/284, Loss: 1.301788644092839
0.024824316432369136
Epoch 151/284, Loss: 1.2331814853156484
0.04439594925470987
Epoch 161/284, Loss:

[I 2024-01-23 09:00:40,418] Trial 25 finished with value: 0.6169049983190792 and parameters: {'hidden_dim_h': 45, 'dropout': 0.11666284428286088, 'batch_size': 584, 'n_epochs': 284, 'learning_rate': 5.75122368366984e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/259, Loss: 0.28106353234271614
0.3365614869242797
Epoch 11/259, Loss: 0.2527162320151621
0.5873428488634748
Epoch 21/259, Loss: 0.24993331852007886
0.589061046617945
Epoch 31/259, Loss: 0.24692269673152845
0.5922970130695772
Epoch 41/259, Loss: 0.24431823437311212
0.5947538538610087
Epoch 51/259, Loss: 0.2415402966494463
0.5987565274356295
Epoch 61/259, Loss: 0.23930658248006081
0.6033849840578094
Epoch 71/259, Loss: 0.23718322022837035
0.6061210893004545
Epoch 81/259, Loss: 0.23430937346147032
0.6101211088117747
Epoch 91/259, Loss: 0.23234401126297152
0.6126100284269066
Epoch 101/259, Loss: 0.2298921428772868
0.6164536344257016
Epoch 111/259, Loss: 0.2275094216575428
0.6194009562919719
Epoch 121/259, Loss: 0.22492844565790526
0.6235138371346466
Epoch 131/259, Loss: 0.22293246126904778
0.6263566589248036
Epoch 141/259, Loss: 0.22076453846328112
0.6282748878430626
Epoch 151/259, Loss: 0.21853521952823718
0.6310408602643853
Epoch 161/259, Lo

[I 2024-01-23 09:02:48,042] Trial 26 finished with value: 0.6425802501816867 and parameters: {'hidden_dim_h': 28, 'dropout': 0.18028915314849067, 'batch_size': 493, 'n_epochs': 259, 'learning_rate': 1.932683739542185e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/229, Loss: 0.4381159433221395
0.5779473195630861
Epoch 11/229, Loss: 0.28280471238942273
0.6293274622719524
Epoch 21/229, Loss: 0.23261282810595182
0.6381365855257711
Epoch 31/229, Loss: 0.20941730872192213
0.6593116102832912
Epoch 41/229, Loss: 0.18659428652145166
0.6637268316716102
Epoch 51/229, Loss: 0.16596529744895158
0.6717478516139949
Epoch 61/229, Loss: 0.14845634416668815
0.6779911051448132
Epoch 71/229, Loss: 0.13150603238460237
0.6820476953837149
Epoch 81/229, Loss: 0.11893782758079799
0.6853216008152129
Epoch 91/229, Loss: 0.10716013940034715
0.6924143920720544
Epoch 101/229, Loss: 0.10061331488917359
0.6981165750100574
Epoch 111/229, Loss: 0.09189662963679407
0.7053138708767879
Epoch 121/229, Loss: 0.08623842492831492
0.7029852162462285
Epoch 131/229, Loss: 0.07998378756167614
0.7034771675116771
Epoch 141/229, Loss: 0.07705721529447927
0.7101521318579317
Epoch 151/229, Loss: 0.07406755691740365
0.7122255149745333
Epoch 161/229

[I 2024-01-23 09:05:32,304] Trial 27 finished with value: 0.7269946562540881 and parameters: {'hidden_dim_h': 36, 'dropout': 0.27844253803844543, 'batch_size': 213, 'n_epochs': 229, 'learning_rate': 0.00014573147736626855}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/193, Loss: 1.056277184873014
0.538830076886982
Epoch 11/193, Loss: 0.2748328839000818
0.0010183330458535802
Epoch 21/193, Loss: 0.2651766495326081
9.287365968604331e-07
Epoch 31/193, Loss: 0.2651361468675974
2.5647990409200453e-05
Epoch 41/193, Loss: 0.26445767796925596
0.0012291644816078207
Epoch 51/193, Loss: 0.26410035768876206
0.00015541458082395633
Epoch 61/193, Loss: 0.2642036862671375
1.8022838576724953e-06
Epoch 71/193, Loss: 0.26411690653578657
0.0014028107928071353
Epoch 81/193, Loss: 0.26277818561003013
0.0035544713507724484
Epoch 91/193, Loss: 0.26383130806120664
0.0032009132986314864
Epoch 101/193, Loss: 0.26386120190491547
3.511372537187521e-07
Epoch 111/193, Loss: 0.263272207129646
0.005112471164755057
Epoch 121/193, Loss: 0.2633361031074782
0.001880097803884272
Epoch 131/193, Loss: 0.26276648719165774
0.0006797918567995919
Epoch 141/193, Loss: 0.26304609751379165
0.003960944625943696
Epoch 151/193, Loss: 0.26241791177843066

[I 2024-01-23 09:08:16,866] Trial 28 finished with value: 0.004208449577726561 and parameters: {'hidden_dim_h': 36, 'dropout': 0.28068958927671395, 'batch_size': 162, 'n_epochs': 193, 'learning_rate': 0.000626582416651316}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/157, Loss: 0.8248024293042104
0.5458429906573524
Epoch 11/157, Loss: 0.5171983753322461
0.6056067746518915
Epoch 21/157, Loss: 0.32385862406787524
0.6320243764730429
Epoch 31/157, Loss: 0.20614591199870502
0.6477535170871261
Epoch 41/157, Loss: 0.14226948346840132
0.6666266669069864
Epoch 51/157, Loss: 0.11606756549909575
0.6774846211939606
Epoch 61/157, Loss: 0.10682825033270985
0.6831977554397679
Epoch 71/157, Loss: 0.10380647222109891
0.6873093704107162
Epoch 81/157, Loss: 0.09914956043619629
0.6947416436539685
Epoch 91/157, Loss: 0.09444313803944018
0.6952085995722407
Epoch 101/157, Loss: 0.0915666830238946
0.6977171827834827
Epoch 111/157, Loss: 0.08684240889932038
0.7029460057381841
Epoch 121/157, Loss: 0.08435567345367659
0.7020136537724267
Epoch 131/157, Loss: 0.0822551623670333
0.7067298135433422
Epoch 141/157, Loss: 0.08060530494522611
0.7110704877061539
Epoch 151/157, Loss: 0.07788066501054194
0.7134442211872293


[I 2024-01-23 09:10:08,000] Trial 29 finished with value: 0.7134442211872293 and parameters: {'hidden_dim_h': 36, 'dropout': 0.26456254676198054, 'batch_size': 220, 'n_epochs': 157, 'learning_rate': 0.00012834243186925624}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/225, Loss: 0.5548697528482853
0.5836637869706557
Epoch 11/225, Loss: 0.4037720687773036
0.6073922607111347
Epoch 21/225, Loss: 0.30624649082792216
0.6244011231676069
Epoch 31/225, Loss: 0.23783296192514486
0.6361847162053504
Epoch 41/225, Loss: 0.19240307139939275
0.6475212783201209
Epoch 51/225, Loss: 0.16492204042686814
0.6578414797827573
Epoch 61/225, Loss: 0.15133959791441073
0.6627056498387556
Epoch 71/225, Loss: 0.14258890033795915
0.6661515327138126
Epoch 81/225, Loss: 0.13715795992777266
0.6726988645459864
Epoch 91/225, Loss: 0.1320096266338195
0.6760236449640075
Epoch 101/225, Loss: 0.12607249292148942
0.6758605869116321
Epoch 111/225, Loss: 0.12018048823222346
0.6791045401788768
Epoch 121/225, Loss: 0.11412069972219138
0.6821560994785453
Epoch 131/225, Loss: 0.10877665628989537
0.6843172271840059
Epoch 141/225, Loss: 0.10442768491205127
0.6877878022637613
Epoch 151/225, Loss: 0.09906439166301968
0.6911772691487413
Epoch 161/225, 

[I 2024-01-23 09:12:24,979] Trial 30 finished with value: 0.7069639124734833 and parameters: {'hidden_dim_h': 31, 'dropout': 0.312423412307248, 'batch_size': 276, 'n_epochs': 225, 'learning_rate': 9.369761314774037e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/279, Loss: 0.5319468195239703
0.5600732625133099
Epoch 11/279, Loss: 0.3801539530356725
0.5985107595494171
Epoch 21/279, Loss: 0.2866353844602903
0.6249493013396794
Epoch 31/279, Loss: 0.22691820661226908
0.6364876775974481
Epoch 41/279, Loss: 0.19292346363266308
0.6497928469926831
Epoch 51/279, Loss: 0.17518426080544788
0.6517595346226415
Epoch 61/279, Loss: 0.1650541769961516
0.6596711265927996
Epoch 71/279, Loss: 0.15870805233716964
0.6625920178183182
Epoch 81/279, Loss: 0.15186525757114092
0.6647635062719056
Epoch 91/279, Loss: 0.14654248331983885
0.6709719721999353
Epoch 101/279, Loss: 0.13909279567499955
0.6758754446618918
Epoch 111/279, Loss: 0.13090300597250462
0.6875398696558709
Epoch 121/279, Loss: 0.12229697108268738
0.6931379848508497
Epoch 131/279, Loss: 0.11489681663612525
0.694387116704467
Epoch 141/279, Loss: 0.10861709415912628
0.6996510478803105
Epoch 151/279, Loss: 0.10170226469635964
0.7065161482844725
Epoch 161/279, Lo

[I 2024-01-23 09:15:07,341] Trial 31 finished with value: 0.7260556472001223 and parameters: {'hidden_dim_h': 39, 'dropout': 0.22966358786610375, 'batch_size': 404, 'n_epochs': 279, 'learning_rate': 0.00015980723339466922}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/252, Loss: 0.420120186863407
0.5602514746905091
Epoch 11/252, Loss: 0.19592546214980464
0.6299751199607088
Epoch 21/252, Loss: 0.1559482791250752
0.6564560499883811
Epoch 31/252, Loss: 0.14168035803783324
0.6644665819565917
Epoch 41/252, Loss: 0.13423127656982792
0.6655231143136581
Epoch 51/252, Loss: 0.11969016552452118
0.6752796723830164
Epoch 61/252, Loss: 0.10572615938801919
0.6952197235080361
Epoch 71/252, Loss: 0.09708235112409438
0.6952524974832014
Epoch 81/252, Loss: 0.08758862004164726
0.7119220447287166
Epoch 91/252, Loss: 0.07797201831013925
0.7146702297023423
Epoch 101/252, Loss: 0.07267267317060501
0.7219606308772236
Epoch 111/252, Loss: 0.06805306823263245
0.7180505231090397
Epoch 121/252, Loss: 0.06924796693267361
0.7156246648857254
Epoch 131/252, Loss: 0.06710708982521488
0.7271234787506059
Epoch 141/252, Loss: 0.059274894756174853
0.7343238622430598
Epoch 151/252, Loss: 0.06473921455683247
0.7261662523442625
Epoch 161/252,

[I 2024-01-23 09:17:36,832] Trial 32 finished with value: 0.735869700940863 and parameters: {'hidden_dim_h': 39, 'dropout': 0.22548915994336155, 'batch_size': 391, 'n_epochs': 252, 'learning_rate': 0.00040760326902778793}. Best is trial 32 with value: 0.735869700940863.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/245, Loss: 2.0138760590162432
0.5631020251785067
Epoch 11/245, Loss: 1.0603836696656024
0.5882052001509794
Epoch 21/245, Loss: 0.527906920577659
0.618237269127995
Epoch 31/245, Loss: 0.2665057419264903
0.6640486359817503
Epoch 41/245, Loss: 0.16256861974958514
0.6661915665810462
Epoch 51/245, Loss: 0.130238556226746
0.674209096155905
Epoch 61/245, Loss: 0.1233379592905279
0.6797561435007817
Epoch 71/245, Loss: 0.1153412679912614
0.6890189523083008
Epoch 81/245, Loss: 0.10727526662779636
0.6986074506549356
Epoch 91/245, Loss: 0.2673265015492674
0.0013035644680485768
Epoch 101/245, Loss: 0.2669936401433632
0.003212996464674156
Epoch 111/245, Loss: 0.2673961493324061
0.0030322803496575634
Epoch 121/245, Loss: 0.2664600004915331
0.0008346842138748206
Epoch 131/245, Loss: 0.2658524711112507
0.0018050433746710406
Epoch 141/245, Loss: 0.26643919187491055
3.7237964002537006e-05
Epoch 151/245, Loss: 0.26577

[I 2024-01-23 09:20:00,329] Trial 33 finished with value: 0.0016457019572732828 and parameters: {'hidden_dim_h': 39, 'dropout': 0.22866278324359052, 'batch_size': 393, 'n_epochs': 245, 'learning_rate': 0.00048040984470997715}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/220, Loss: 0.2968337474146275
0.5959418997084502
Epoch 11/220, Loss: 0.26394886862147937
0.0012196783098965334
Epoch 21/220, Loss: 0.26462186617080613
0.0031485285253817627
Epoch 31/220, Loss: 0.2642828855249617
2.5650632975225528e-05
Epoch 41/220, Loss: 0.26478691534562543
0.0013776143036569077
Epoch 51/220, Loss: 0.2641433310328108
0.00136393203781701
Epoch 61/220, Loss: 0.26415695340344403
0.0015326721593367084
Epoch 71/220, Loss: 0.26483500169383156
0.00020501371488043142
Epoch 81/220, Loss: 0.2647967499614966
6.543088206891242e-06
Epoch 91/220, Loss: 0.26449573235680357
0.001910931468676614
Epoch 101/220, Loss: 0.26516088662725507
9.97777710620627e-08
Epoch 111/220, Loss: 0.2647774320359182
0.00020102789024044408
Epoch 121/220, Loss: 0.264814785935662
0.0009956142377207887
Epoch 131/220, Loss: 0.26482929274289296
4.8894076547876846e-05
Epoch 141/220, Loss: 0.2652742409645909
0.0020334540652618277
Epoch 151/220, Loss: 0.264844285117255

[I 2024-01-23 09:22:31,038] Trial 34 finished with value: 0.0037112287931952478 and parameters: {'hidden_dim_h': 36, 'dropout': 0.23131050410211593, 'batch_size': 242, 'n_epochs': 220, 'learning_rate': 0.0012784729502292513}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/192, Loss: 1.6846208470208304
0.5447131660892557
Epoch 11/192, Loss: 0.9390778703348978
0.5976100303996074
Epoch 21/192, Loss: 0.49792703092098234
0.6367837813918634
Epoch 31/192, Loss: 0.26066737515585764
0.6684241350940163
Epoch 41/192, Loss: 0.15414138351167953
0.6901473012757828
Epoch 51/192, Loss: 0.11444519736937114
0.7008403089364214
Epoch 61/192, Loss: 0.10426135701792581
0.6966106113617422
Epoch 71/192, Loss: 0.09945444007005011
0.7047813851519492
Epoch 81/192, Loss: 0.09165485809956278
0.7179358401163846
Epoch 91/192, Loss: 0.08672140570623534
0.7096034214577327
Epoch 101/192, Loss: 0.079059970166002
0.7186580477742461
Epoch 111/192, Loss: 0.07206390627792904
0.7201081003262624
Epoch 121/192, Loss: 0.06608063408306666
0.7251050178128862
Epoch 131/192, Loss: 0.06283549160829612
0.7184411645068876
Epoch 141/192, Loss: 0.05902986739362989
0.7183751951328956
Epoch 151/192, Loss: 0.055443543355379786
0.7198643515764777
Epoch 161/192, 

[I 2024-01-23 09:24:38,717] Trial 35 finished with value: 0.7189072588972484 and parameters: {'hidden_dim_h': 46, 'dropout': 0.2699743495426901, 'batch_size': 343, 'n_epochs': 192, 'learning_rate': 0.00034808806981486833}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/139, Loss: 0.36401541218871164
0.5980899938042106
Epoch 11/139, Loss: 0.17982274464198522
0.646722088281565
Epoch 21/139, Loss: 0.13030665570071764
0.669037553388103
Epoch 31/139, Loss: 0.09965514178786959
0.6923886460107926
Epoch 41/139, Loss: 0.08467922813835599
0.702035698510336
Epoch 51/139, Loss: 0.07767785370704673
0.7072040369622313
Epoch 61/139, Loss: 0.07261098664076555
0.7094511476630333
Epoch 71/139, Loss: 0.07042393535375595
0.7166679397235592
Epoch 81/139, Loss: 0.06732029590223516
0.7176625726778265
Epoch 91/139, Loss: 0.06469475380366757
0.724325724629975
Epoch 101/139, Loss: 0.062312290959414984
0.7236508926916355
Epoch 111/139, Loss: 0.061105016406093324
0.722741346754673
Epoch 121/139, Loss: 0.05919888101163365
0.7225786089539209
Epoch 131/139, Loss: 0.05750587263277599
0.7152510211006118


[I 2024-01-23 09:27:17,951] Trial 36 finished with value: 0.7152510211006118 and parameters: {'hidden_dim_h': 24, 'dropout': 0.24821141425297047, 'batch_size': 114, 'n_epochs': 139, 'learning_rate': 0.00028499850247870156}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/266, Loss: 0.8620514495898102
0.5411868949561356
Epoch 11/266, Loss: 0.24807021287033112
0.6368948553985183
Epoch 21/266, Loss: 0.26335591456647645
0.0004840889566664366
Epoch 31/266, Loss: 0.26279211410526504
0.003741496989555066
Epoch 41/266, Loss: 0.2636177267058421
0.0008594053618241914
Epoch 51/266, Loss: 0.2630235441660477
0.0017620020461287542
Epoch 61/266, Loss: 0.26358287175328043
0.0005201412977166496
Epoch 71/266, Loss: 0.2630234143491519
0.004960721266998875
Epoch 81/266, Loss: 0.2631607192047572
0.005661348236667715
Epoch 91/266, Loss: 0.2626278891654338
0.0025997781235868912
Epoch 101/266, Loss: 0.26391195695279007
6.377658254921854e-08
Epoch 111/266, Loss: 0.263716160745944
0.003520290753477293
Epoch 121/266, Loss: 0.2627178695747408
0.00026920464050696376
Epoch 131/266, Loss: 0.26291084251666474
0.0006266448049987941
Epoch 141/266, Loss: 0.26244509156982776
0.0015899076238759388
Epoch 151/266, Loss: 0.26247164948006807
0.00

[I 2024-01-23 09:30:42,842] Trial 37 finished with value: 0.003031039115904061 and parameters: {'hidden_dim_h': 41, 'dropout': 0.21584200470101306, 'batch_size': 203, 'n_epochs': 266, 'learning_rate': 0.0005228330912876873}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/54, Loss: 0.4318383812904358
0.5915551603060116
Epoch 11/54, Loss: 0.2646643289110877
0.0013025835883268132
Epoch 21/54, Loss: 0.2646439714865251
0.004464477343484298
Epoch 31/54, Loss: 0.2647139484232122
0.0021794636837298926
Epoch 41/54, Loss: 0.2647622788494283
0.002404910068867992
Epoch 51/54, Loss: 0.26449261199344287
0.005726626301272029


[I 2024-01-23 09:31:09,842] Trial 38 finished with value: 0.005726626301272029 and parameters: {'hidden_dim_h': 26, 'dropout': 0.30526248837069087, 'batch_size': 435, 'n_epochs': 54, 'learning_rate': 0.002260026352644526}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/215, Loss: 0.6042437179883321
0.56855246124153
Epoch 11/215, Loss: 0.4819310216108958
0.5744156041405911
Epoch 21/215, Loss: 0.40299400766690574
0.5845644661539598
Epoch 31/215, Loss: 0.35065007090568545
0.5879243323121496
Epoch 41/215, Loss: 0.2859564435482025
0.5810001407608248
Epoch 51/215, Loss: 0.24690272510051728
0.6026540359834198
Epoch 61/215, Loss: 0.22643932779630024
0.6255078233053807
Epoch 71/215, Loss: 0.21511283040046691
0.6402911085717645
Epoch 81/215, Loss: 0.20650709172089896
0.6443967061842472
Epoch 91/215, Loss: 0.19762380242347719
0.6526416150137061
Epoch 101/215, Loss: 0.18857099036375682
0.6540187889052522
Epoch 111/215, Loss: 0.1787739098072052
0.6609366213412019
Epoch 121/215, Loss: 0.16904547890027363
0.658214532452451
Epoch 131/215, Loss: 0.15996005455652873
0.6649463105681982
Epoch 141/215, Loss: 0.1511890866359075
0.6675135542291494
Epoch 151/215, Loss: 0.14345971405506133
0.6703115647867359
Epoch 161/215, Loss:

[I 2024-01-23 09:33:04,451] Trial 39 finished with value: 0.6792358562692914 and parameters: {'hidden_dim_h': 20, 'dropout': 0.18993891898433468, 'batch_size': 320, 'n_epochs': 215, 'learning_rate': 0.00011078603024265119}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/242, Loss: 0.30219987836377377
0.5890422883559175
Epoch 11/242, Loss: 0.2710975553455024
5.405226514111679e-08
Epoch 21/242, Loss: 0.2686536357320588
0.0010295150965257064
Epoch 31/242, Loss: 0.26683701410375793
0.0001072621923508609
Epoch 41/242, Loss: 0.2638469253120751
0.0032994336037507907
Epoch 51/242, Loss: 0.26507300436496734
0.0010665222048132652
Epoch 61/242, Loss: 0.26568419686679184
0.00012748288136863934
Epoch 71/242, Loss: 0.265649463082182
0.000972511553978817
Epoch 81/242, Loss: 0.2641692403061637
0.0005048017325909052
Epoch 91/242, Loss: 0.2642256168455913
0.0015374968476981094
Epoch 101/242, Loss: 0.2641108091535239
0.0004335556969680856
Epoch 111/242, Loss: 0.26429375327866655
0.0019641780671431426
Epoch 121/242, Loss: 0.2644649935179743
0.0035476425297245426
Epoch 131/242, Loss: 0.2645600380568669
0.00468269703385878
Epoch 141/242, Loss: 0.26407060057952486
2.9994772460163005e-05
Epoch 151/242, Loss: 0.2642318752305261
0

[I 2024-01-23 09:36:28,259] Trial 40 finished with value: 0.0016875981261806447 and parameters: {'hidden_dim_h': 34, 'dropout': 0.33665586423848554, 'batch_size': 165, 'n_epochs': 242, 'learning_rate': 0.0009546968488976103}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/197, Loss: 1.3445422375524365
0.5452880614459172
Epoch 11/197, Loss: 0.7526080922500508
0.0008691666607357707
Epoch 21/197, Loss: 0.4451802924678132
0.0013530858855065225
Epoch 31/197, Loss: 0.31355818622821086
4.45679972050111e-05
Epoch 41/197, Loss: 0.2735510234897201
0.00219482915941301
Epoch 51/197, Loss: 0.26509905546098145
0.0003495976869256719
Epoch 61/197, Loss: 0.264208291229364
0.001642718763189728
Epoch 71/197, Loss: 0.2641060839633684
0.0039426077846137445
Epoch 81/197, Loss: 0.2642246335744858
0.0015807266860917392
Epoch 91/197, Loss: 0.26492548935316707
1.9963271835705464e-05
Epoch 101/197, Loss: 0.2644119341228459
0.0001796763521993803
Epoch 111/197, Loss: 0.26396913826465607
0.0034614434705547924
Epoch 121/197, Loss: 0.26334185052562403
0.002038454089591666
Epoch 131/197, Loss: 0.26397955276676127
0.0011153092479629797
Epoch 141/197, Loss: 0.2630641975918332
0.0018869387790145422
Epoch 151/197, Loss: 0.2631176998083656
0.00

[I 2024-01-23 09:38:42,788] Trial 41 finished with value: 0.0001789032556944285 and parameters: {'hidden_dim_h': 47, 'dropout': 0.27666752642808345, 'batch_size': 323, 'n_epochs': 197, 'learning_rate': 0.0003278778527098577}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/188, Loss: 0.40411799140723353
0.5858166636745088
Epoch 11/188, Loss: 0.18000830799700265
0.6569908158673652
Epoch 21/188, Loss: 0.15414779175476856
0.6548207453275507
Epoch 31/188, Loss: 0.1356396823223815
0.6842925760538079
Epoch 41/188, Loss: 0.12027765433472323
0.6914719063716898
Epoch 51/188, Loss: 0.10441641410790294
0.6992041063657626
Epoch 61/188, Loss: 0.09160740565822785
0.7024899214990873
Epoch 71/188, Loss: 0.0800753360591739
0.7102299157449383
Epoch 81/188, Loss: 0.07302686890744302
0.7115052933606045
Epoch 91/188, Loss: 0.07413152385368404
0.7117379943206804
Epoch 101/188, Loss: 0.06332331850945232
0.720545039730359
Epoch 111/188, Loss: 0.273296893181571
0.0056252839967365935
Epoch 121/188, Loss: 0.2702596695442875
0.0016464046630543135
Epoch 131/188, Loss: 0.2726371740720358
0.0003993666171547674
Epoch 141/188, Loss: 0.2709925409900137
0.005740902905126467
Epoch 151/188, Loss: 0.2704172166715185
0.0019644046835566686
Epoch 1

[I 2024-01-23 09:40:54,818] Trial 42 finished with value: 9.678546528968646e-05 and parameters: {'hidden_dim_h': 46, 'dropout': 0.25350885729520845, 'batch_size': 291, 'n_epochs': 188, 'learning_rate': 0.00038675984009643683}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/280, Loss: 0.20132789100116155
0.5840368669027876
Epoch 11/280, Loss: 0.16631907112193559
0.6315193495740039
Epoch 21/280, Loss: 0.15234765297961686
0.6503051041856575
Epoch 31/280, Loss: 0.20408133487656432
0.31211326422751506
Epoch 41/280, Loss: 0.27018976886317414
0.0013033891413621044
Epoch 51/280, Loss: 0.26711956566234807
0.002000681789955984
Epoch 61/280, Loss: 0.2641292422447564
0.0059201798186091885
Epoch 71/280, Loss: 0.26272704353872334
0.0074262682945053626
Epoch 81/280, Loss: 0.26354863339999934
0.010865596892474655
Epoch 91/280, Loss: 0.26384599000777836
0.004374460202438766
Epoch 101/280, Loss: 0.26120525739102995
0.007805144291856482
Epoch 111/280, Loss: 0.26166476927838234
0.006960218568253994
Epoch 121/280, Loss: 0.2620727551995583
0.007934521529956384
Epoch 131/280, Loss: 0.2619898648194547
0.0085575088936727
Epoch 141/280, Loss: 0.2626292146039459
0.008278044614950773
Epoch 151/280, Loss: 0.2637940842025685
0.0016476332

[I 2024-01-23 09:43:56,872] Trial 43 finished with value: 0.00993921251907562 and parameters: {'hidden_dim_h': 50, 'dropout': 0.2976689052690173, 'batch_size': 457, 'n_epochs': 280, 'learning_rate': 0.0001679230212987706}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/224, Loss: 0.4834523657336831
0.5702914186135644
Epoch 11/224, Loss: 0.26918957591988146
0.6503720044841109
Epoch 21/224, Loss: 0.27768779755569994
0.00010967550286966011
Epoch 31/224, Loss: 0.26536340778693557
0.0009348932009370274
Epoch 41/224, Loss: 0.26450538937933743
0.001951318855577935
Epoch 51/224, Loss: 0.26347872242331505
0.001176739476133336
Epoch 61/224, Loss: 0.2621887547429651
0.004390442358583749
Epoch 71/224, Loss: 0.26212438754737377
0.005263836866369229
Epoch 81/224, Loss: 0.2620135813485831
0.001313523912056403
Epoch 91/224, Loss: 0.26245846203528345
0.0005312618940871528
Epoch 101/224, Loss: 0.26264024106785655
0.0034420644591828193
Epoch 111/224, Loss: 0.2617175383493304
0.002615909003902981
Epoch 121/224, Loss: 0.26174792810343206
0.005026807166592096
Epoch 131/224, Loss: 0.26198718743398786
0.009746626715783187
Epoch 141/224, Loss: 0.2620281921699643
0.00337453274740098
Epoch 151/224, Loss: 0.26285555795766413
0.0058

[I 2024-01-23 09:46:17,851] Trial 44 finished with value: 0.011413952415481118 and parameters: {'hidden_dim_h': 41, 'dropout': 0.22183737109270796, 'batch_size': 374, 'n_epochs': 224, 'learning_rate': 0.0002891901477928048}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/238, Loss: 0.22618289589881896
0.5712174176298767
Epoch 11/238, Loss: 0.15386606603860856
0.6244951053647785
Epoch 21/238, Loss: 0.13301359824836254
0.6434714849625036
Epoch 31/238, Loss: 0.1247731514275074
0.656048427343824
Epoch 41/238, Loss: 0.12122629098594188
0.6678121918670291
Epoch 51/238, Loss: 0.11633003205060959
0.6598513791918674
Epoch 61/238, Loss: 0.11085006445646287
0.676192844277278
Epoch 71/238, Loss: 0.10959621630609036
0.6753407250261834
Epoch 81/238, Loss: 0.10330209620296955
0.6809939541378753
Epoch 91/238, Loss: 0.09778980873525142
0.6918835185979392
Epoch 101/238, Loss: 0.09282382503151894
0.6944869090291653
Epoch 111/238, Loss: 0.08844304047524928
0.7008613477823006
Epoch 121/238, Loss: 0.08397854901850224
0.7028936214254232
Epoch 131/238, Loss: 0.08189337886869907
0.7018102851665994
Epoch 141/238, Loss: 0.07649771496653557
0.7060447339468459
Epoch 151/238, Loss: 0.07307582311332225
0.7093182452766684
Epoch 161/238, 

[I 2024-01-23 09:48:30,169] Trial 45 finished with value: 0.7247142201279388 and parameters: {'hidden_dim_h': 44, 'dropout': 0.2725501903354426, 'batch_size': 1194, 'n_epochs': 238, 'learning_rate': 0.00044711797721777363}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/257, Loss: 2.085735691560281
0.5313950933488085
Epoch 11/257, Loss: 1.5815757848121024
0.5557762720268498
Epoch 21/257, Loss: 1.1700316632116163
0.5931015015877981
Epoch 31/257, Loss: 0.8354319118164681
0.6061437027432447
Epoch 41/257, Loss: 0.5715608920599963
0.6330753396541098
Epoch 51/257, Loss: 0.3756124673662959
0.6636621110657287
Epoch 61/257, Loss: 0.27995782663693297
0.5220601470839185
Epoch 71/257, Loss: 0.1723743261517705
0.6445476078853158
Epoch 81/257, Loss: 0.13726076769667703
0.6613525347904949
Epoch 91/257, Loss: 0.12805694043636323
0.6731053712978247
Epoch 101/257, Loss: 0.11839831427142426
0.6839059276130564
Epoch 111/257, Loss: 0.11006197844808166
0.698156007232118
Epoch 121/257, Loss: 0.10321001849464469
0.6953006947983479
Epoch 131/257, Loss: 0.09664467615452972
0.7081226261009294
Epoch 141/257, Loss: 0.09322579811150963
0.7106342251537896
Epoch 151/257, Loss: 0.10439317836552053
0.42824821519066025
Epoch 161/257, Loss:

[I 2024-01-23 09:52:57,514] Trial 46 finished with value: 0.6922693440424444 and parameters: {'hidden_dim_h': 44, 'dropout': 0.24172199373891584, 'batch_size': 129, 'n_epochs': 257, 'learning_rate': 7.099429578752019e-05}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/272, Loss: 0.2654209414666349
0.5761376250369569
Epoch 11/272, Loss: 0.17406478388742966
0.6495927466484132
Epoch 21/272, Loss: 0.14954365146431056
0.6658012180779702
Epoch 31/272, Loss: 0.14252598786895926
0.670644281666402
Epoch 41/272, Loss: 0.13376601006497035
0.6833689891039151
Epoch 51/272, Loss: 0.12673482129519636
0.6824449100968284
Epoch 61/272, Loss: 0.12151411446658048
0.6808684072058889
Epoch 71/272, Loss: 0.11417292938991026
0.687379128399229
Epoch 81/272, Loss: 0.10591419752348553
0.6900906241708804
Epoch 91/272, Loss: 0.09885674850507216
0.696594818070334
Epoch 101/272, Loss: 0.09321432967077602
0.6997209044332425
Epoch 111/272, Loss: 0.08727934177626263
0.7028721965867668
Epoch 121/272, Loss: 0.08596353673122147
0.7047995378236176
Epoch 131/272, Loss: 0.08032617142254656
0.706854913220157
Epoch 141/272, Loss: 0.07719142497940497
0.7121383229741521
Epoch 151/272, Loss: 0.0733329447155649
0.713201463603455
Epoch 161/272, Loss

[I 2024-01-23 09:55:18,382] Trial 47 finished with value: 0.7167041860858777 and parameters: {'hidden_dim_h': 38, 'dropout': 0.29011660337672274, 'batch_size': 1128, 'n_epochs': 272, 'learning_rate': 0.0005006476598958318}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/96, Loss: 0.7324866169974917
0.5593684858565793
Epoch 11/96, Loss: 0.5812537698518663
0.5815036479261566
Epoch 21/96, Loss: 0.49396799433799016
0.6069916689112074
Epoch 31/96, Loss: 0.42075211235455107
0.6186038462141074
Epoch 41/96, Loss: 0.358467888264429
0.6325188038086963
Epoch 51/96, Loss: 0.3036617324465797
0.6437906059033154
Epoch 61/96, Loss: 0.2589656618379411
0.6494026360254064
Epoch 71/96, Loss: 0.22001474244253977
0.662567743821608
Epoch 81/96, Loss: 0.1911046483686992
0.6682639425747553
Epoch 91/96, Loss: 0.16838949705873216
0.6592707264349958


[I 2024-01-23 09:56:11,557] Trial 48 finished with value: 0.6592707264349958 and parameters: {'hidden_dim_h': 42, 'dropout': 0.3315743018455075, 'batch_size': 1167, 'n_epochs': 96, 'learning_rate': 0.00021528413396066815}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/241, Loss: 0.3430199737244464
0.5941160647481198
Epoch 11/241, Loss: 0.2626404860552321
0.009613459532260142
Epoch 21/241, Loss: 0.2617579289573304
0.0033091832695640215
Epoch 31/241, Loss: 0.26343952151055033
0.0010674273962621105
Epoch 41/241, Loss: 0.2634061963317242
4.831003189040147e-05
Epoch 51/241, Loss: 0.2647011597105797
0.004336627903377207
Epoch 61/241, Loss: 0.26363026287327423
0.0017826716319535917
Epoch 71/241, Loss: 0.26283707707486254
5.1335976319971194e-05
Epoch 81/241, Loss: 0.2628996343688762
2.0527350521377735e-08
Epoch 91/241, Loss: 0.26236661650398946
0.00015918769690291985
Epoch 101/241, Loss: 0.2616180362219506
0.006071051781255213
Epoch 111/241, Loss: 0.26261789700452315
5.9518799227273896e-05
Epoch 121/241, Loss: 0.26263294147050126
0.0006601453679749793
Epoch 131/241, Loss: 0.26280694597579063
0.0049132772730447255
Epoch 141/241, Loss: 0.26213789953196304
0.006119294544317577
Epoch 151/241, Loss: 0.26231264941235

[I 2024-01-23 09:58:55,385] Trial 49 finished with value: 0.0002048462752616068 and parameters: {'hidden_dim_h': 38, 'dropout': 0.17846951816309745, 'batch_size': 254, 'n_epochs': 241, 'learning_rate': 0.0009498722693919247}. Best is trial 32 with value: 0.735869700940863.


Epoch 241/241, Loss: 0.2644238589291877
0.0002048462752616068
Build model with 3 layers of attention
Epoch 1/284, Loss: 0.4321105397762136
0.4890100667364678
Epoch 11/284, Loss: 0.3824718112641193
0.5781221950500725
Epoch 21/284, Loss: 0.3610202435483324
0.5931300890567137
Epoch 31/284, Loss: 0.34241309698591843
0.6049433567269298
Epoch 41/284, Loss: 0.32607735471522553
0.6177337317643677
Epoch 51/284, Loss: 0.3124194671498968
0.6241279017148779
Epoch 61/284, Loss: 0.2999525678918717
0.6282685142462718
Epoch 71/284, Loss: 0.28945319449647944
0.6345739021052037
Epoch 81/284, Loss: 0.2805150398548613
0.6380464541399505
Epoch 91/284, Loss: 0.27281729876995087
0.6376759567106209
Epoch 101/284, Loss: 0.26554694169379295
0.6436232445765194
Epoch 111/284, Loss: 0.2600670425181693
0.6458851948802833
Epoch 121/284, Loss: 0.2548967739369007
0.6445496830721366
Epoch 131/284, Loss: 0.250174712944538
0.6486765072356135
Epoch 141/284, Loss: 0.24499415526998805
0.6491246207247654
Epoch 151/284, Loss:

[I 2024-01-23 10:01:15,951] Trial 50 finished with value: 0.6737772891076174 and parameters: {'hidden_dim_h': 30, 'dropout': 0.26013168367841993, 'batch_size': 514, 'n_epochs': 284, 'learning_rate': 4.4688828156069894e-05}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/205, Loss: 1.300530425139836
0.5350748810249437
Epoch 11/205, Loss: 0.9686946477208819
0.5763967972891902
Epoch 21/205, Loss: 0.7300325555460794
0.6174771998369161
Epoch 31/205, Loss: 0.553015029004642
0.6493825762905338
Epoch 41/205, Loss: 0.42661238355296
0.647999971775911
Epoch 51/205, Loss: 0.3400216341018677
0.658199524492902
Epoch 61/205, Loss: 0.2858593972665923
0.6659474906221395
Epoch 71/205, Loss: 0.25491843500307626
0.6738681093625183
Epoch 81/205, Loss: 0.23646865572248185
0.6784613751900487
Epoch 91/205, Loss: 0.22184580372912543
0.6792711964019202
Epoch 101/205, Loss: 0.2072397178837231
0.6844599251814844
Epoch 111/205, Loss: 0.19165320800883429
0.6779565492262234
Epoch 121/205, Loss: 0.1752938824040549
0.6868456177010395
Epoch 131/205, Loss: 0.15940118612987655
0.6965593862935952
Epoch 141/205, Loss: 0.14581456684640476
0.6984761700183114
Epoch 151/205, Loss: 0.13238372334412166
0.7022229888983885
Epoch 161/205, Loss: 0.1213

[I 2024-01-23 10:03:32,392] Trial 51 finished with value: 0.7217369928703768 and parameters: {'hidden_dim_h': 45, 'dropout': 0.2723050270157892, 'batch_size': 342, 'n_epochs': 205, 'learning_rate': 0.0001646778802060433}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/208, Loss: 0.7075048038592705
0.5644623934741666
Epoch 11/208, Loss: 0.3366818288197884
0.6313399721621468
Epoch 21/208, Loss: 0.1796513387790093
0.6576578142113941
Epoch 31/208, Loss: 0.12970705404877664
0.6719924258724282
Epoch 41/208, Loss: 0.1185332028911664
0.6835378249966686
Epoch 51/208, Loss: 0.11107541976066736
0.6840974387855546
Epoch 61/208, Loss: 0.10103940109793956
0.6963973757423757
Epoch 71/208, Loss: 0.09202646114505254
0.6946928269005614
Epoch 81/208, Loss: 0.08466207459568978
0.707161541552486
Epoch 91/208, Loss: 0.07817225582324541
0.719864025353921
Epoch 101/208, Loss: 0.0743706737859891
0.7153473547311364
Epoch 111/208, Loss: 0.07110238788792721
0.7189314849707252
Epoch 121/208, Loss: 0.06825552468116466
0.7227754621587895
Epoch 131/208, Loss: 0.06563522916000623
0.7223858969364085
Epoch 141/208, Loss: 0.06399844440703209
0.7217626473157612
Epoch 151/208, Loss: 0.061907900210756525
0.724791333594929
Epoch 161/208, Loss

[I 2024-01-23 10:06:16,768] Trial 52 finished with value: 0.7327824775209302 and parameters: {'hidden_dim_h': 40, 'dropout': 0.20702835611598297, 'batch_size': 184, 'n_epochs': 208, 'learning_rate': 0.00016650195900128322}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/233, Loss: 0.6964484031001726
0.548644301781926
Epoch 11/233, Loss: 0.4964584931731224
0.6058601032395903
Epoch 21/233, Loss: 0.3712695337831974
0.6296599443277785
Epoch 31/233, Loss: 0.2934178291509549
0.6427199056150199
Epoch 41/233, Loss: 0.2500223105152448
0.6609680871549004
Epoch 51/233, Loss: 0.22693190177281697
0.6563313969838849
Epoch 61/233, Loss: 0.20977486222982406
0.6585105809713475
Epoch 71/233, Loss: 0.1921870137254397
0.665130521219039
Epoch 81/233, Loss: 0.17600255322953065
0.6660837499774046
Epoch 91/233, Loss: 0.1615383869037032
0.6769167523650602
Epoch 101/233, Loss: 0.1482706737394134
0.6774398309064014
Epoch 111/233, Loss: 0.13706308621913194
0.6859886263684221
Epoch 121/233, Loss: 0.1274394765496254
0.6851329299117302
Epoch 131/233, Loss: 0.11813923108081023
0.6921090050363004
Epoch 141/233, Loss: 0.10947491178909938
0.6967676378004388
Epoch 151/233, Loss: 0.10212169978767634
0.7000965298346554
Epoch 161/233, Loss: 0.

[I 2024-01-23 10:09:12,791] Trial 53 finished with value: 0.7230472891874393 and parameters: {'hidden_dim_h': 40, 'dropout': 0.20275439012332208, 'batch_size': 200, 'n_epochs': 233, 'learning_rate': 9.198453827332333e-05}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/251, Loss: 0.197223582615455
0.5797938484777206
Epoch 11/251, Loss: 0.17500227006773153
0.6217162550554383
Epoch 21/251, Loss: 0.16817601894338927
0.6263688394966755
Epoch 31/251, Loss: 0.1623063584168752
0.6418015827726424
Epoch 41/251, Loss: 0.15964392013847828
0.6493374516914314
Epoch 51/251, Loss: 0.15530223647753397
0.6467783934838801
Epoch 61/251, Loss: 0.15154750210543474
0.6525255446510058
Epoch 71/251, Loss: 0.14720135740935802
0.6582956896949353
Epoch 81/251, Loss: 0.14407062654693922
0.6641756904220266
Epoch 91/251, Loss: 0.14052992748717466
0.6684628862924857
Epoch 101/251, Loss: 0.13698008935898542
0.6731867925516777
Epoch 111/251, Loss: 0.13332256643722454
0.6780882304001796
Epoch 121/251, Loss: 0.1303686617563168
0.6734907967915927
Epoch 131/251, Loss: 0.12935854773968458
0.6701098499478827
Epoch 141/251, Loss: 0.12568053882569075
0.6762798007825925
Epoch 151/251, Loss: 0.12251139711588621
0.6823933066354343
Epoch 161/251, L

[I 2024-01-23 10:11:32,747] Trial 54 finished with value: 0.6900555140676057 and parameters: {'hidden_dim_h': 43, 'dropout': 0.23818512467608657, 'batch_size': 1024, 'n_epochs': 251, 'learning_rate': 0.000129668625308857}. Best is trial 32 with value: 0.735869700940863.


Epoch 251/251, Loss: 0.09975179377943277
0.6900555140676057
Build model with 3 layers of attention
Epoch 1/216, Loss: 0.2615802187047549
0.6042150191164392


[I 2024-01-23 10:11:34,997] Trial 55 finished with value: 0.6042150191164392 and parameters: {'hidden_dim_h': 35, 'dropout': 0.2064254647427, 'batch_size': 161, 'n_epochs': 216, 'learning_rate': 0.0002597281003453839}. Best is trial 32 with value: 0.735869700940863.


training failed
Build model with 3 layers of attention
Epoch 1/268, Loss: 1.1786091380649142
0.5507874825587576


[I 2024-01-23 10:11:36,903] Trial 56 finished with value: 0.5507874825587576 and parameters: {'hidden_dim_h': 38, 'dropout': 0.2255104565773971, 'batch_size': 267, 'n_epochs': 268, 'learning_rate': 0.0006175308358076706}. Best is trial 32 with value: 0.735869700940863.


training failed
Build model with 3 layers of attention
training failed


[W 2024-01-23 10:11:37,094] Trial 57 failed with parameters: {'hidden_dim_h': 40, 'dropout': 0.171378915294019, 'batch_size': 595, 'n_epochs': 233, 'learning_rate': 0.00020076739042322988} because of the following error: IndexError('index -1 is out of bounds for axis 0 with size 0').
Traceback (most recent call last):
  File "/home/juannanzhou/.local/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/scratch/local/22019942/ipykernel_2825595/727483821.py", line 55, in objective
    criterion = np.array(r2_test)[-1]
IndexError: index -1 is out of bounds for axis 0 with size 0
[W 2024-01-23 10:11:37,105] Trial 57 failed with value None.

KeyboardInterrupt



In [90]:
# num_layers = 1
# model_name = "TF_" + str(num_layers)

# criterion_best = 0.
# study = optuna.create_study(direction='maximize')
# study.optimize(objective, n_trials=10)

# # Print the best hyperparameters
# best_trial = study.best_trial
# print("Best Trial:")
# print(f"  Criterion: {best_trial.value:.4f}")
# print("  Params: ")
# for key, value in best_trial.params.items():
#     print(f"    {key}: {value}")

# best_hyper_parameters = {}
# for key, value in best_trial.params.items():
#     best_hyper_parameters[key] = value

# model_best.eval()
# pred, true = model_best(X_val.flatten(1)).flatten().detach().cpu().numpy(), y_val.flatten().detach().cpu().numpy()

# r2_test = pearsonr(pred, true)[0]**2
# print(f"{model_name} achieved R2 = {r2_test}")

# # save test R2 score
# import csv
# with open(os.path.join(results_path, "R2s.csv"), mode='a', newline='') as file:
#     writer = csv.writer(file)
#     writer.writerows([[model_name, r2_test]])

# # save predictions
# pd.DataFrame({"prediction": pred, "true": true}).to_csv(os.path.join(results_path, model_name + "_predictions.csv"), index=False)

# # save best model
# torch.save(model_best, os.path.join(results_path, model_name + "_BestModel"))

#### Use 2 layer model weights for 3 layer model

In [260]:
hidden_dim_h = 20
dropout = .3
num_heads = 4

In [261]:
# load trained 2 layer model

In [262]:
#blue_jz/ProteinLLE/output/Faure2023_1_lenient_20%_rep_1/TF_2_BestModel

In [265]:
model2 = torch.load("../output/Faure2023_1_lenient_20%_rep_1/TF_1_BestModel")
model2.eval()
pred, true = model2(X_test.flatten(1)).flatten().detach().cpu().numpy(), y_test.flatten().detach().cpu().numpy()
r2_test = pearsonr(pred, true)[0]**2
print(r2_test)
hidden_dim_h = int(model2.state_dict()['embedding.weight'].shape[1]/num_heads)

0.6438946561100509


In [266]:
model = Transformer_2k(L, input_dim, hidden_dim_h*num_heads, 3, num_heads, dropout).to(device)

In [252]:
# parameter list to replace
param_names = []
for name, _ in model2.named_parameters():
    param_names.append(name)

In [254]:
param_names = param_names[:-6] #keep intermediate layers
print(param_names)

['embedding.weight', 'transformer_layers.0.in_proj_weight', 'transformer_layers.0.in_proj_bias', 'transformer_layers.0.out_proj.weight', 'transformer_layers.0.out_proj.bias']


In [255]:
para_dict = model.state_dict()

for param_name in param_names:
    para_dict[param_name] = model2.state_dict()[param_name]

model.load_state_dict(para_dict)

for param_name in param_names:
    print((model.state_dict()[param_name] - model2.state_dict()[param_name]).flatten().abs().max())

tensor(0., device='cuda:0')
tensor(0., device='cuda:0')
tensor(0., device='cuda:0')
tensor(0., device='cuda:0')
tensor(0., device='cuda:0')


In [256]:
for name, param in model.named_parameters():
    if name in param_names:
        param.requires_grad = True

In [268]:
learning_rate = 0.001

n_epochs = 200

train_loader = data.DataLoader(train_dataset,
                               batch_size=1000,
                               shuffle=True,
                               drop_last=False)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

r2_test = []

for epoch in range(n_epochs):

        model.train()
        total_loss = 0
        for batch_inputs, batch_targets in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_inputs)
            loss = criterion(outputs, batch_targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        if epoch % 10 == 0:
            print(f"Epoch {epoch+1}/{n_epochs}, Loss: {total_loss/len(train_loader)}")
            model.eval()
            pred, true = model(X_val.flatten(1)).flatten().detach().cpu().numpy(), y_val.flatten().detach().cpu().numpy()
            print(pearsonr(pred, true)[0]**2)
            if pearsonr(pred, true)[0]**2 == "nan":
                break
            r2_test.append(pearsonr(pred, true)[0]**2)



Epoch 1/200, Loss: 0.35701390480001766
0.5821588211461421
Epoch 11/200, Loss: 0.15426477789878845
0.6320004774059087
Epoch 21/200, Loss: 0.11442591591427724
0.6584617570833626
Epoch 31/200, Loss: 0.10740993513415258
0.6764735015607805
Epoch 41/200, Loss: 0.2711897399276495
0.0015172391296805815
Epoch 51/200, Loss: 0.26785799985130626
0.0007570187323048151
Epoch 61/200, Loss: 0.26740775133172673
0.005492701101539478
Epoch 71/200, Loss: 0.2667521846791108
0.0017937879931389051
Epoch 81/200, Loss: 0.2667158283293247
0.0005081054030514672
Epoch 91/200, Loss: 0.26644781480232876
0.0004852651432631466


KeyboardInterrupt: 

In [31]:
import optuna
from scipy.stats import pearsonr

# learning_rate = 0.0001
num_heads = 4

sequence_length = L
input_dim = AA_size*L
output_dim = 1

def objective(trial):
    global criterion_best, model_best

    # hidden_dim_h = trial.suggest_int('hidden_dim_h', 10, 50)
    dropout = trial.suggest_float('dropout', 0.05, 0.35)
    batch_size = trial.suggest_int('batch_size', 100, 1200)
    n_epochs = trial.suggest_int('n_epochs', 30, 300)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
    # learning_rate = trial.suggest_float"learning_rate", 1e-5, 1e-2, log=True)
    
    print(f"Build model with {num_layers} layers of attention")
    model = Transformer_2k(L, input_dim, hidden_dim_h*num_heads, num_layers, num_heads, dropout).to(device)
    
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   drop_last=False)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    r2_test = []
    try: 
        for epoch in range(n_epochs):

                model.train()
                total_loss = 0
                for batch_inputs, batch_targets in train_loader:
                    optimizer.zero_grad()
                    outputs = model(batch_inputs)
                    loss = criterion(outputs, batch_targets)
                    loss.backward()
                    optimizer.step()
                    total_loss += loss.item()

                if epoch % 10 == 0:
                    print(f"Epoch {epoch+1}/{n_epochs}, Loss: {total_loss/len(train_loader)}")
                    model.eval()
                    pred, true = model(X_val.flatten(1)).flatten().detach().cpu().numpy(), y_val.flatten().detach().cpu().numpy()
                    print(pearsonr(pred, true)[0]**2)
                    if pearsonr(pred, true)[0]**2 == "nan":
                        break
                    r2_test.append(pearsonr(pred, true)[0]**2)
                    
    except: print("training failed")
    
    criterion = np.array(r2_test)[-1]
    if criterion > criterion_best:
        print("Found better hyperparameter, update model")
        criterion_best = criterion
        model_best = model
    
    return np.array(r2_test)[-1]

In [32]:
n_trials = 100
for num_layers in [3]:

    model_name = "TF_" + str(num_layers)

    criterion_best = 0.
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=100)

    # Print the best hyperparameters
    best_trial = study.best_trial
    print("Best Trial:")
    print(f"  Criterion: {best_trial.value:.4f}")
    print("  Params: ")
    for key, value in best_trial.params.items():
        print(f"    {key}: {value}")  

    best_hyper_parameters = {}
    for key, value in best_trial.params.items():
        best_hyper_parameters[key] = value

    model_best.eval()
    pred, true = model_best(X_val.flatten(1)).flatten().detach().cpu().numpy(), y_val.flatten().detach().cpu().numpy()

    r2_test = pearsonr(pred, true)[0]**2
    print(f"{model_name} achieved R2 = {r2_test}")

    # save test R2 score
    import csv
    with open(os.path.join(results_path, "R2s.csv"), mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerows([[model_name, r2_test]])

    # save predictions
    pd.DataFrame({"prediction": pred, "true": true}).to_csv(os.path.join(results_path, model_name + "_predictions.csv"), index=False)

    # save best model
    torch.save(model_best, os.path.join(results_path, model_name + "_BestModel"))        

[I 2024-01-23 08:07:12,156] A new study created in memory with name: no-name-9ceacbef-9367-4cb3-a14b-0e722ec6a617
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)


Build model with 3 layers of attention
Epoch 1/123, Loss: 0.3468048758804798
0.5892912713597088
Epoch 11/123, Loss: 0.2642652206122875
0.6129965785127874
Epoch 21/123, Loss: 0.22934292983263732
0.6311927918564046
Epoch 31/123, Loss: 0.21113975830376147
0.6443502924711522
Epoch 41/123, Loss: 0.19708516132086515
0.6504993991633975
Epoch 51/123, Loss: 0.18342391336336733
0.6677516249362482
Epoch 61/123, Loss: 0.17090581953525544
0.6774703758202425
Epoch 71/123, Loss: 0.15834331437945365
0.6796323277559938
Epoch 81/123, Loss: 0.1463141399435699
0.6920782741135568
Epoch 91/123, Loss: 0.13601415921002627
0.6973881357264899
Epoch 101/123, Loss: 0.1265990188345313
0.702695653967123
Epoch 111/123, Loss: 0.11875486066564918
0.7080159577231476
Epoch 121/123, Loss: 0.11120947944000363
0.7028042272724704


[I 2024-01-23 08:08:29,822] Trial 0 finished with value: 0.7028042272724704 and parameters: {'hidden_dim_h': 36, 'dropout': 0.25742575781617977, 'batch_size': 301, 'n_epochs': 123, 'learning_rate': 0.00011240362450141765}. Best is trial 0 with value: 0.7028042272724704.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/90, Loss: 1.20313455419881
0.5440092898285732
Epoch 11/90, Loss: 0.16233487240970135
0.6477442230141487
Epoch 21/90, Loss: 0.12880157615457263
0.6719237341183601
Epoch 31/90, Loss: 0.10349343131695475
0.6881717250558703
Epoch 41/90, Loss: 0.0843707654092993
0.698508802319591
Epoch 51/90, Loss: 0.2746527566441468
0.00024100814353415667
Epoch 61/90, Loss: 0.2663124609206404
5.0388163711705656e-05
Epoch 71/90, Loss: 0.26712113874299187
0.005192823681819379


[I 2024-01-23 08:09:27,650] Trial 1 finished with value: 0.005192823681819379 and parameters: {'hidden_dim_h': 27, 'dropout': 0.32142519817814147, 'batch_size': 171, 'n_epochs': 90, 'learning_rate': 0.0007362933258734223}. Best is trial 0 with value: 0.7028042272724704.


training failed
Build model with 3 layers of attention
Epoch 1/204, Loss: 0.3495357741009105
0.5818015279955808
Epoch 11/204, Loss: 0.27513719553297217
0.616224074665219
Epoch 21/204, Loss: 0.24191308563405817
0.6056977605007944
Epoch 31/204, Loss: 0.22224012897773224
0.6439968560216938
Epoch 41/204, Loss: 0.21077478202906522
0.661683024495613
Epoch 51/204, Loss: 0.20100092616948215
0.6603140691334045
Epoch 61/204, Loss: 0.19217888604510913
0.6710633095918463
Epoch 71/204, Loss: 0.1859149675477635
0.6702180928065025
Epoch 81/204, Loss: 0.17319744215770203
0.6829139007536574
Epoch 91/204, Loss: 0.16945953125303442
0.6668164548995806
Epoch 101/204, Loss: 0.15807498043233698
0.6781190440636525
Epoch 111/204, Loss: 0.1490108607844873
0.6857967679996755
Epoch 121/204, Loss: 0.14058748768134552
0.6903260691065938
Epoch 131/204, Loss: 0.13247053900902922
0.6920062877241804
Epoch 141/204, Loss: 0.12466890635815533
0.6979170288029721
Epoch 151/204, Loss: 0.11721845750104297
0.7005924620159869
E

[I 2024-01-23 08:11:21,572] Trial 2 finished with value: 0.7121283937706205 and parameters: {'hidden_dim_h': 43, 'dropout': 0.2949059516884788, 'batch_size': 1093, 'n_epochs': 204, 'learning_rate': 0.00033198510793771327}. Best is trial 2 with value: 0.7121283937706205.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/271, Loss: 3.143383665518327
0.5257819947959567
Epoch 11/271, Loss: 1.1362451558763331
0.5669759863333228
Epoch 21/271, Loss: 0.4757066694172946
0.6141335388666388
Epoch 31/271, Loss: 0.2951907461339777
3.85399872712815e-05
Epoch 41/271, Loss: 0.2671243291009556
0.00010200771280196123
Epoch 51/271, Loss: 0.2666315700520169
0.00323696912806395
Epoch 61/271, Loss: 0.266037871891802
2.718967771032305e-06
Epoch 71/271, Loss: 0.26528323780406604
0.0005661678243277156
Epoch 81/271, Loss: 0.26599206165833905
7.521820799840894e-05


[I 2024-01-23 08:11:58,222] Trial 3 finished with value: 7.521820799840894e-05 and parameters: {'hidden_dim_h': 23, 'dropout': 0.0972184432826201, 'batch_size': 1132, 'n_epochs': 271, 'learning_rate': 0.002655230549368521}. Best is trial 2 with value: 0.7121283937706205.


training failed
Build model with 3 layers of attention
Epoch 1/158, Loss: 0.8756502013147613
0.5658927876819163
Epoch 11/158, Loss: 0.31404471802122796
0.6019108110518957
Epoch 21/158, Loss: 0.15655739881374217
0.6393839783382641
Epoch 31/158, Loss: 0.1299147156101686
0.6583572550783571
Epoch 41/158, Loss: 0.11786752957620739
0.6746272404419217
Epoch 51/158, Loss: 0.26723017313598113
0.0005571464282882584
Epoch 61/158, Loss: 0.26355808163866584
0.0022021102266038357
Epoch 71/158, Loss: 0.26430585612485435
0.007139855932926625
Epoch 81/158, Loss: 0.2639977913579823
0.005413809253816244
Epoch 91/158, Loss: 0.2657055794088929
0.0007392924372075156
Epoch 101/158, Loss: 0.2646020546003624
0.0020886074255198235
Epoch 111/158, Loss: 0.264394055178136
0.007666198923789941
Epoch 121/158, Loss: 0.26549031373895243
0.0027084002508932525
Epoch 131/158, Loss: 0.2659716948314949
0.00421742035500254
Epoch 141/158, Loss: 0.2651715059707194
0.003376865374778744
Epoch 151/158, Loss: 0.26601079327088817


[I 2024-01-23 08:13:51,631] Trial 4 finished with value: 0.0009439729250945863 and parameters: {'hidden_dim_h': 48, 'dropout': 0.21040069282551277, 'batch_size': 298, 'n_epochs': 158, 'learning_rate': 0.00042913008176501507}. Best is trial 2 with value: 0.7121283937706205.


Build model with 3 layers of attention
Epoch 1/295, Loss: 0.34811082290064904
0.577577978621156
Epoch 11/295, Loss: 0.2916498972523597
0.6125669560532853
Epoch 21/295, Loss: 0.26832950211340384
0.6313786870804446
Epoch 31/295, Loss: 0.25221195768925453
0.6308525061853492
Epoch 41/295, Loss: 0.2400469183921814
0.6376879971262801
Epoch 51/295, Loss: 0.22634796173341812
0.6478274714862008
Epoch 61/295, Loss: 0.21257136089186515
0.641912541079751
Epoch 71/295, Loss: 0.19963519275188446
0.6492871418424491
Epoch 81/295, Loss: 0.18898708253137528
0.6546844776311115
Epoch 91/295, Loss: 0.17787488381708821
0.6507374569480837
Epoch 101/295, Loss: 0.16628590610719496
0.6654010732103045
Epoch 111/295, Loss: 0.15644433854087705
0.6708363141528803
Epoch 121/295, Loss: 0.14804512839163503
0.6742767834211327
Epoch 131/295, Loss: 0.1388944637390875
0.6794424861192921
Epoch 141/295, Loss: 0.13046725623069272
0.6859676902534848
Epoch 151/295, Loss: 0.12447088548252659
0.6878204403846877
Epoch 161/295, Lo

[I 2024-01-23 08:16:24,572] Trial 5 finished with value: 0.7183267974858158 and parameters: {'hidden_dim_h': 33, 'dropout': 0.3242432805170107, 'batch_size': 791, 'n_epochs': 295, 'learning_rate': 0.00024686546211236896}. Best is trial 5 with value: 0.7183267974858158.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/252, Loss: 0.8951033060101495
0.5669988726479174
Epoch 11/252, Loss: 0.24484852193922235
0.6282437151598558
Epoch 21/252, Loss: 0.18758352403191553
0.6455299522935997
Epoch 31/252, Loss: 0.15421875836192697
0.6332248469411529
Epoch 41/252, Loss: 0.12015111949564754
0.659474833444599
Epoch 51/252, Loss: 0.09760421465920366
0.6713997734657393
Epoch 61/252, Loss: 0.08688699405478395
0.6895210871287657
Epoch 71/252, Loss: 0.08137905859536883
0.6826689366713586
Epoch 81/252, Loss: 0.07638554061776485
0.7033064554274504
Epoch 91/252, Loss: 0.07226499159266983
0.699490876406789
Epoch 101/252, Loss: 0.06884243201626383
0.7143437828143175
Epoch 111/252, Loss: 0.06688355338638244
0.7135375003933478
Epoch 121/252, Loss: 0.06241564919659193
0.7263190571204557
Epoch 131/252, Loss: 0.05990843685424846
0.7237473865102675
Epoch 141/252, Loss: 0.058024277073749596
0.7244657336337572
Epoch 151/252, Loss: 0.061913047

[I 2024-01-23 08:19:45,972] Trial 6 finished with value: 0.7321680749029571 and parameters: {'hidden_dim_h': 26, 'dropout': 0.3312762657923331, 'batch_size': 174, 'n_epochs': 252, 'learning_rate': 0.0004424259521257618}. Best is trial 6 with value: 0.7321680749029571.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/283, Loss: 0.5825660891003079
0.574282899727411
Epoch 11/283, Loss: 0.481645324991809
0.6177744540518237
Epoch 21/283, Loss: 0.4120524567034509
0.6292855649697908
Epoch 31/283, Loss: 0.361500583589077
0.639642499467817
Epoch 41/283, Loss: 0.32410771151383716
0.6391815068229878
Epoch 51/283, Loss: 0.29612816621859867
0.6597839485864824
Epoch 61/283, Loss: 0.2766990413268407
0.6736755825670522
Epoch 71/283, Loss: 0.2628622183369266
0.6751193706919437
Epoch 81/283, Loss: 0.25227103837662274
0.6850184807862164
Epoch 91/283, Loss: 0.24300996255543497
0.688444459574332
Epoch 101/283, Loss: 0.2356097073190742
0.667008173827768
Epoch 111/283, Loss: 0.2257057237956259
0.6870626541139024
Epoch 121/283, Loss: 0.21557452612453037
0.6944445632061469
Epoch 131/283, Loss: 0.20576785794562763
0.703182422964761
Epoch 141/283, Loss: 0.1954165432188246
0.7030093056011796
Epoch 151/283, Loss: 0.18614518352680737
0.704

[I 2024-01-23 08:22:25,663] Trial 7 finished with value: 0.7161565496891478 and parameters: {'hidden_dim_h': 43, 'dropout': 0.062412939490076313, 'batch_size': 674, 'n_epochs': 283, 'learning_rate': 0.00016278672106400215}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/115, Loss: 2.5335689577563056
0.00195628362246989
Epoch 11/115, Loss: 0.275962528483621
0.008073993528331304
Epoch 21/115, Loss: 0.26490471589154213
0.0018307150985434543
Epoch 31/115, Loss: 0.264835049879962
0.0001762250820693207
Epoch 41/115, Loss: 0.2646073164611027
0.0029710990545776168
Epoch 51/115, Loss: 0.26451463637680844
0.0008994087313450673
Epoch 61/115, Loss: 0.2646727027564213
0.00412999910955061
Epoch 71/115, Loss: 0.2647785744790373
0.0019783200417960644
Epoch 81/115, Loss: 0.26516166380767164
0.0008311773968276565
Epoch 91/115, Loss: 0.2649332685717221
0.00468356511160969
Epoch 101/115, Loss: 0.2642887522434366
0.003556345515009985
Epoch 111/115, Loss: 0.2640827277611042
0.016396005359172718


[I 2024-01-23 08:23:36,648] Trial 8 finished with value: 0.016396005359172718 and parameters: {'hidden_dim_h': 49, 'dropout': 0.3258310960270046, 'batch_size': 830, 'n_epochs': 115, 'learning_rate': 0.005906149941081174}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/279, Loss: 0.1977237829566002
0.5930294213829334
Epoch 11/279, Loss: 0.15589387953281403
0.6485471768608013
Epoch 21/279, Loss: 0.1391051785647869
0.6712045618707407
Epoch 31/279, Loss: 0.12226571023464203
0.689694066034794
Epoch 41/279, Loss: 0.11003571152687072
0.7034257209800285
Epoch 51/279, Loss: 0.09747184917330742
0.7079653157359258
Epoch 61/279, Loss: 0.08719767078757286
0.7168692773041797
Epoch 71/279, Loss: 0.0794942732155323
0.7186742367295536
Epoch 81/279, Loss: 0.07953867062926293
0.7139042289571348
Epoch 91/279, Loss: 0.06866290800273418
0.7255372593678497
Epoch 101/279, Loss: 0.06366414994001389
0.7275934550232649
Epoch 111/279, Loss: 0.06245414532721043
0.7223500505637013
Epoch 121/279, Loss: 0.05823981761932373
0.7291378034926866
Epoch 131/279, Loss: 0.056512833759188655
0.7256718022399681
Epoch 141/279, Loss: 0.05464756786823273
0.7245317339943164
Epoch 151/279, Loss: 0.2745732817053795
3.5081782666450935e-05
Epoch 161/27

[I 2024-01-23 08:26:39,605] Trial 9 finished with value: 0.0007750642851939705 and parameters: {'hidden_dim_h': 49, 'dropout': 0.29706380248573844, 'batch_size': 483, 'n_epochs': 279, 'learning_rate': 0.0003683542591582204}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/210, Loss: 1.5301054332046828
0.24275849963398535
Epoch 11/210, Loss: 1.1972093597116829
0.5577737678731198
Epoch 21/210, Loss: 0.9839668433536546
0.5507282818022006
Epoch 31/210, Loss: 0.8026235111088932
0.5617853918031898
Epoch 41/210, Loss: 0.6482482983976229
0.5684309840372156
Epoch 51/210, Loss: 0.5198547494710739
0.5812059773865711
Epoch 61/210, Loss: 0.4162901008977052
0.5941954630885937
Epoch 71/210, Loss: 0.33629232601391224
0.6027163523437208
Epoch 81/210, Loss: 0.28031507003506856
0.6131699898626489
Epoch 91/210, Loss: 0.24529997505652854
0.6173039520812914
Epoch 101/210, Loss: 0.226023856690738
0.6227992512570416
Epoch 111/210, Loss: 0.21185137917556524
0.6277024933204334
Epoch 121/210, Loss: 0.19922467367918423
0.6291081449019909
Epoch 131/210, Loss: 0.18844935297966003
0.6328319638746468
Epoch 141/210, Loss: 0.17895482265425527
0.637726954650694
Epoch 151/210, Loss: 0.17033195127752535
0.6427073730164354
Epoch 161/210, Loss: 

[I 2024-01-23 08:31:08,414] Trial 10 finished with value: 0.6530962205777873 and parameters: {'hidden_dim_h': 10, 'dropout': 0.15954301830418502, 'batch_size': 100, 'n_epochs': 210, 'learning_rate': 3.24947362619717e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/40, Loss: 2.9093110725797455
0.06673148045084902
Epoch 11/40, Loss: 2.809119808262792
0.4903060034862904
Epoch 21/40, Loss: 2.772854410368821
0.5140246640198097
Epoch 31/40, Loss: 2.7430160538903596
0.5211032248040727


[I 2024-01-23 08:31:25,325] Trial 11 finished with value: 0.5211032248040727 and parameters: {'hidden_dim_h': 19, 'dropout': 0.23849797086891023, 'batch_size': 848, 'n_epochs': 40, 'learning_rate': 1.2767916948095839e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/233, Loss: 0.3559092234733493
0.5684172632726285
Epoch 11/233, Loss: 0.2652839165787364
0.0010616071902894782
Epoch 21/233, Loss: 0.26533910112325537
0.0010429877981194256
Epoch 31/233, Loss: 0.26526101002859515
0.0004563122355863861
Epoch 41/233, Loss: 0.26526409768780995
0.002850531101221537
Epoch 51/233, Loss: 0.26520945547625074
0.001371976310904593
Epoch 61/233, Loss: 0.2650786194690438
0.001290007341433545
Epoch 71/233, Loss: 0.2654106883808624
0.00022371154961367738
Epoch 81/233, Loss: 0.26522019297577615
0.0004515996514137078
Epoch 91/233, Loss: 0.264947522171708
0.0002454772633258135
Epoch 101/233, Loss: 0.26524125628693157
0.0016976115806931638
Epoch 111/233, Loss: 0.264678324377814
0.0005263064246468347
Epoch 121/233, Loss: 0.26482925026915793
0.0021477274810682966
Epoch 131/233, Loss: 0.2649106573919917
0.0027205854426588616
Epoch 141/233, Loss: 0.2648605541434399
0.0011919167967758477
Epoch 151/233, Loss: 0.26418716547101045
0

[I 2024-01-23 08:33:28,758] Trial 12 finished with value: 0.0035531065883653897 and parameters: {'hidden_dim_h': 34, 'dropout': 0.34423648436713805, 'batch_size': 560, 'n_epochs': 233, 'learning_rate': 0.0012866946762227357}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/238, Loss: 0.2735949055901889
0.48142663190507873
Epoch 11/238, Loss: 0.25504770412527283
0.5987525197656817
Epoch 21/238, Loss: 0.24743993785874596
0.6074220231441757
Epoch 31/238, Loss: 0.24092122347190462
0.6241769720967765
Epoch 41/238, Loss: 0.2347971822681098
0.6333661503914602
Epoch 51/238, Loss: 0.22865167619853183
0.641050509188024
Epoch 61/238, Loss: 0.2228032910618289
0.6500394456910072
Epoch 71/238, Loss: 0.2177359220282785
0.6517759447992075
Epoch 81/238, Loss: 0.21254017332504535
0.6529644856454002
Epoch 91/238, Loss: 0.20769667008827472
0.6571860590467908
Epoch 101/238, Loss: 0.2028859232006402
0.6606102491960555
Epoch 111/238, Loss: 0.1984740773151661
0.6580481011978038
Epoch 121/238, Loss: 0.19425350016561047
0.6609370949627876
Epoch 131/238, Loss: 0.18959749824014202
0.6661533225135888
Epoch 141/238, Loss: 0.18571510088854823
0.6682826357542405
Epoch 151/238, Loss: 0.18159063314569407
0.6724928547525028
Epoch 161/238, Los

[I 2024-01-23 08:35:16,719] Trial 13 finished with value: 0.6885815317506884 and parameters: {'hidden_dim_h': 30, 'dropout': 0.26580350080273274, 'batch_size': 829, 'n_epochs': 238, 'learning_rate': 8.484872860644318e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/249, Loss: 1.4418955659866333
0.565307587907034
Epoch 11/249, Loss: 0.6830218529701233
0.5878125567254359
Epoch 21/249, Loss: 0.3564320755004883
0.0002840337714824416
Epoch 31/249, Loss: 0.28136419773101806
3.7050257718328702e-06
Epoch 41/249, Loss: 0.2663614785671234
9.113766477533853e-05
Epoch 51/249, Loss: 0.2646291989088059
0.000782521865125174
Epoch 61/249, Loss: 0.2640005028247833
0.003889414130045298
Epoch 71/249, Loss: 0.2653381872177124
0.0004714384440191904
Epoch 81/249, Loss: 0.2652725303173065
0.0010230846607134161
Epoch 91/249, Loss: 0.264341334104538
0.0012691567291150748
Epoch 101/249, Loss: 0.26457072257995606
0.004614654985603795
Epoch 111/249, Loss: 0.2640287137031555
0.0032924464884199313
Epoch 121/249, Loss: 0.2639226520061493
0.0033116910305649276
Epoch 131/249, Loss: 0.26440369844436645
0.004056069071980739
Epoch 141/249, Loss: 0.26419253706932067
0.0038847515797189734
Epoch 151/249, Loss: 0.264202721118927
0.00432115

[I 2024-01-23 08:36:59,100] Trial 14 finished with value: 0.004198490575350982 and parameters: {'hidden_dim_h': 17, 'dropout': 0.15642867055954884, 'batch_size': 962, 'n_epochs': 249, 'learning_rate': 0.001379482332618753}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/293, Loss: 1.0352464961378198
0.4974656667120992
Epoch 11/293, Loss: 0.9819064532455645
0.546958481654357
Epoch 21/293, Loss: 0.9537328073852941
0.5465544055280785
Epoch 31/293, Loss: 0.9157984492025877
0.5488917224355251
Epoch 41/293, Loss: 0.8801972238641036
0.01886559551857382
Epoch 51/293, Loss: 0.8352311771166953
0.5514815577209341
Epoch 61/293, Loss: 0.8029857622949701
0.5648625566210245
Epoch 71/293, Loss: 0.7645865145482516
0.575254013866132
Epoch 81/293, Loss: 0.7276135369351036
0.5831659998426325
Epoch 91/293, Loss: 0.6914694622943276
0.5943130294982409
Epoch 101/293, Loss: 0.661059131747798
0.5974363506124691
Epoch 111/293, Loss: 0.6261898843865645
0.603836625148372
Epoch 121/293, Loss: 0.6056801300299796
0.6061465093252506
Epoch 131/293, Loss: 0.5731807602079291
0.6107048365575685
Epoch 141/293, Loss: 0.5433714468228189
0.6147062268379905
Epoch 151/293, Loss: 0.5206265676962701
0.6206099275185466
Epoch 161/293, Loss: 0.49496576

[I 2024-01-23 08:39:33,199] Trial 15 finished with value: 0.6589925616168143 and parameters: {'hidden_dim_h': 38, 'dropout': 0.3486661087533102, 'batch_size': 644, 'n_epochs': 293, 'learning_rate': 4.172385090992121e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/179, Loss: 1.1490572428299208
0.5318707069059085
Epoch 11/179, Loss: 0.8164060843192925
0.5478094419094056
Epoch 21/179, Loss: 0.5893873829962843
0.5815610333030631
Epoch 31/179, Loss: 0.4181107105845112
0.6055628793890506
Epoch 41/179, Loss: 0.29357456504288365
0.6175060707959688
Epoch 51/179, Loss: 0.20543449132119196
0.6415807514962915
Epoch 61/179, Loss: 0.14853433330180282
0.6643708014143083
Epoch 71/179, Loss: 0.1147686322614298
0.6789664792904084
Epoch 81/179, Loss: 0.0970720411357233
0.6836817041391424
Epoch 91/179, Loss: 0.09023438343557261
0.6925196912135039
Epoch 101/179, Loss: 0.0868784179121761
0.6941307052042365
Epoch 111/179, Loss: 0.08478346252340381
0.6984877854081959
Epoch 121/179, Loss: 0.0834355797555487
0.7015919538528295
Epoch 131/179, Loss: 0.08245317789457612
0.7014490105224588
Epoch 141/179, Loss: 0.0801497346768945
0.7047726961473314
Epoch 151/179, Loss: 0.07794427480232918
0.707543450734907
Epoch 161/179, Loss: 0

[I 2024-01-23 08:41:05,769] Trial 16 finished with value: 0.7086635527624605 and parameters: {'hidden_dim_h': 28, 'dropout': 0.2870028596202202, 'batch_size': 407, 'n_epochs': 179, 'learning_rate': 0.00018501746135717563}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/253, Loss: 0.21478554737918518
0.0009535789521288843
Epoch 11/253, Loss: 0.2662455312469426
9.814551504980874e-05
Epoch 21/253, Loss: 0.2652537252973108
0.0003173983518297938
Epoch 31/253, Loss: 0.2653770692208234
0.00015322642649076822
Epoch 41/253, Loss: 0.2654041630380294
0.00022694088381734748
Epoch 51/253, Loss: 0.2656025592895115
0.0011459590633595559
Epoch 61/253, Loss: 0.26514724817346125
3.068714785462141e-05
Epoch 71/253, Loss: 0.265416021732723
3.0381385829227135e-05
Epoch 81/253, Loss: 0.26529883593320847
0.0048100577591003325
Epoch 91/253, Loss: 0.2652309072368285
0.0008693734922957167
Epoch 101/253, Loss: 0.2652192847693668
0.00043358572177096966
Epoch 111/253, Loss: 0.26523193673175927
2.448308627627422e-05
Epoch 121/253, Loss: 0.26543963262263465
0.0027685850133739006
Epoch 131/253, Loss: 0.2646326405160567
0.004824950653620119
Epoch 141/253, Loss: 0.2652389933081234
0.001330186859935473
Epoch 151/253, Loss: 0.2644337626064

[I 2024-01-23 08:42:56,828] Trial 17 finished with value: 0.0007706985484153308 and parameters: {'hidden_dim_h': 23, 'dropout': 0.19635181222670234, 'batch_size': 703, 'n_epochs': 253, 'learning_rate': 0.008536256972113352}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/299, Loss: 1.4191928958892823
0.576205062668281
Epoch 11/299, Loss: 0.8542240381240844
0.0006999883964329157
Epoch 21/299, Loss: 0.5340891695022583
0.0007688146722564625
Epoch 31/299, Loss: 0.3625427329540253
0.001755547423917373
Epoch 41/299, Loss: 0.30129335045814515
0.003517130110441756
Epoch 51/299, Loss: 0.2730030608177185
0.0014580229336249126
Epoch 61/299, Loss: 0.26636853098869323
0.002154916034611173
Epoch 71/299, Loss: 0.2670546990633011
6.620091678696957e-05
Epoch 81/299, Loss: 0.2674161946773529
0.00022121796577929603
Epoch 91/299, Loss: 0.2636208862066269
0.00011196688143752814
Epoch 101/299, Loss: 0.2646785855293274
0.0022111348842879476
Epoch 111/299, Loss: 0.2633621245622635
0.0028165344022542385
Epoch 121/299, Loss: 0.26765028238296507
0.0015463946003667457
Epoch 131/299, Loss: 0.2644843566417694
0.00031224766360118185
Epoch 141/299, Loss: 0.2665450930595398
0.0007351660859251723
Epoch 151/299, Loss: 0.26943443179130555
0.

[I 2024-01-23 08:45:32,430] Trial 18 finished with value: 0.0007290710481845191 and parameters: {'hidden_dim_h': 33, 'dropout': 0.211605258178062, 'batch_size': 993, 'n_epochs': 299, 'learning_rate': 0.0008752482048305134}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/207, Loss: 1.6577929649780045
0.5655659457174251
Epoch 11/207, Loss: 1.4769891251378984
0.5495524939133417
Epoch 21/207, Loss: 1.3330333126125051
0.5720514778675698
Epoch 31/207, Loss: 1.199682244613989
0.5795870160560838
Epoch 41/207, Loss: 1.0767775738417213
0.595337927418502
Epoch 51/207, Loss: 0.9617016146432108
0.6045683004440741
Epoch 61/207, Loss: 0.8558156454741065
0.6024343477037312
Epoch 71/207, Loss: 0.7581997504874841
0.6112647662457268
Epoch 81/207, Loss: 0.6677026668591286
0.6222457152681273
Epoch 91/207, Loss: 0.5845980528575271
0.628575694289083
Epoch 101/207, Loss: 0.5097114385953591
0.632181909930921
Epoch 111/207, Loss: 0.44151312646581165
0.6400646100281485
Epoch 121/207, Loss: 0.3801347822395723
0.6453848443200554
Epoch 131/207, Loss: 0.32568792353815107
0.6558519429682994
Epoch 141/207, Loss: 0.27787812365524805
0.6630101938666012
Epoch 151/207, Loss: 0.23701761685200592
0.6670117329040837
Epoch 161/207, Loss: 0.20351

[I 2024-01-23 08:47:46,202] Trial 19 finished with value: 0.6885334224849663 and parameters: {'hidden_dim_h': 41, 'dropout': 0.318040884965266, 'batch_size': 358, 'n_epochs': 207, 'learning_rate': 6.398016383148326e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/174, Loss: 0.5432449195120069
0.003367916489949733
Epoch 11/174, Loss: 0.2642504228485955
0.00917413446161674
Epoch 21/174, Loss: 0.2635947306950887
0.01185751381692741
Epoch 31/174, Loss: 0.2632877720726861
0.006463209825751166
Epoch 41/174, Loss: 0.2649473034673267
0.0005818834606619675
Epoch 51/174, Loss: 0.2647055112653308
0.00012095201772597896
Epoch 61/174, Loss: 0.2646576911211014
0.0018982289522346688
Epoch 71/174, Loss: 0.2650765422317717
0.0009971161695738063
Epoch 81/174, Loss: 0.2643953657812542
0.0025855632945499143
Epoch 91/174, Loss: 0.26440175076325734
0.001030557980013214
Epoch 101/174, Loss: 0.2643864724371168
0.002781096065900541
Epoch 111/174, Loss: 0.26479606959554886
0.00027155150655532316
Epoch 121/174, Loss: 0.2647881660196516
0.002877617579233263
Epoch 131/174, Loss: 0.2645844989352756
0.0005457967029840106
Epoch 141/174, Loss: 0.2644847790400187
0.004245629802331667
Epoch 151/174, Loss: 0.2644355333513684
0.003496

[I 2024-01-23 08:49:00,291] Trial 20 finished with value: 0.0032985319433402006 and parameters: {'hidden_dim_h': 14, 'dropout': 0.24784684384709693, 'batch_size': 536, 'n_epochs': 174, 'learning_rate': 0.0028298892424130423}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/272, Loss: 0.22825060521855073
0.5810568213899652
Epoch 11/272, Loss: 0.19406785333857818
0.6234311353900164
Epoch 21/272, Loss: 0.18427751432446873
0.6365644707384838
Epoch 31/272, Loss: 0.17689724836279364
0.6427665245786349
Epoch 41/272, Loss: 0.16918685155756333
0.6559939662988069
Epoch 51/272, Loss: 0.1606935753541834
0.668020693922326
Epoch 61/272, Loss: 0.26546385446015525
0.0036292991061141616
Epoch 71/272, Loss: 0.2626914246117367
0.001537280111254012
Epoch 81/272, Loss: 0.2630598203224294
0.005238668272482093
Epoch 91/272, Loss: 0.26272953082533446
0.005982340104493419
Epoch 101/272, Loss: 0.26229313892476697
0.009180118912867342
Epoch 111/272, Loss: 0.2636249227558865
0.006517886314813214
Epoch 121/272, Loss: 0.2604014641221832
0.011704829956294967
Epoch 131/272, Loss: 0.25959983292747946
0.01522504561558388
Epoch 141/272, Loss: 0.25574593158329234
0.016379459116224536
Epoch 151/272, Loss: 0.2549608415540527
0.018815485946128967

[I 2024-01-23 08:51:36,630] Trial 21 finished with value: 0.01935506426026652 and parameters: {'hidden_dim_h': 43, 'dropout': 0.056330288226097355, 'batch_size': 709, 'n_epochs': 272, 'learning_rate': 0.00021516286561348334}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/300, Loss: 0.27348766554343074
0.5647822098429568
Epoch 11/300, Loss: 0.20539185051855288
0.5938116499845697
Epoch 21/300, Loss: 0.17718065530061722
0.6086886895655629
Epoch 31/300, Loss: 0.1593451899917502
0.6206290052840268
Epoch 41/300, Loss: 0.14784716069698334
0.6344435051862682
Epoch 51/300, Loss: 0.14403039922839717
0.6368530594071681
Epoch 61/300, Loss: 0.13827426574732127
0.6486012178713333
Epoch 71/300, Loss: 0.1345574022515824
0.6518849368210319
Epoch 81/300, Loss: 0.13191842875982585
0.6568137495976858
Epoch 91/300, Loss: 0.12965425986208415
0.6537964565159218
Epoch 101/300, Loss: 0.12586253450105064
0.6579508181156184
Epoch 111/300, Loss: 0.12268821071637304
0.6626357493169677
Epoch 121/300, Loss: 0.11923233105948097
0.6623860324804397
Epoch 131/300, Loss: 0.11544655243817128
0.6682683194492531
Epoch 141/300, Loss: 0.11197940750341666
0.6705485369810622
Epoch 151/300, Loss: 0.10850173725109351
0.6745013202521243
Epoch 161/300,

[I 2024-01-23 08:53:48,590] Trial 22 finished with value: 0.6185141766254182 and parameters: {'hidden_dim_h': 24, 'dropout': 0.10460781173847945, 'batch_size': 636, 'n_epochs': 300, 'learning_rate': 0.00013744221779672143}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/262, Loss: 1.030371418222785
0.5623926768065336
Epoch 11/262, Loss: 0.6054773218929768
0.573867694264846
Epoch 21/262, Loss: 0.40646678395569324
0.3443087643661435
Epoch 31/262, Loss: 0.3015893967822194
0.00018896431260404686
Epoch 41/262, Loss: 0.2716337516903877
0.00014288588003418605
Epoch 51/262, Loss: 0.2653652182780206
2.9759379566482063e-05
Epoch 61/262, Loss: 0.2643978507257998
0.0026742666185520297
Epoch 71/262, Loss: 0.26518545486032963
0.0038580995596419984
Epoch 81/262, Loss: 0.26452733809128404
0.0021348516260888163
Epoch 91/262, Loss: 0.2642447059042752
0.0018262954415106001
Epoch 101/262, Loss: 0.2649744818918407
0.00038190032369116026
Epoch 111/262, Loss: 0.26366060972213745
0.0036870330453473666
Epoch 121/262, Loss: 0.2649481389671564
0.005227606056772516
Epoch 131/262, Loss: 0.2647259789519012
0.005374178119728088
Epoch 141/262, Loss: 0.2645644792355597
0.003406569245920773
Epoch 151/262, Loss: 0.2659157537855208
0.000233

[I 2024-01-23 08:56:07,918] Trial 23 finished with value: 0.004618355252326637 and parameters: {'hidden_dim_h': 39, 'dropout': 0.05884679793843805, 'batch_size': 765, 'n_epochs': 262, 'learning_rate': 0.000699774400310937}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/230, Loss: 0.32168391117682826
0.5635339824777237
Epoch 11/230, Loss: 0.24299166122308144
0.6158901146482838
Epoch 21/230, Loss: 0.21281074445981246
0.6449407962878524
Epoch 31/230, Loss: 0.19660135645132798
0.6478538955878285
Epoch 41/230, Loss: 0.18583563944468132
0.6589707088633201
Epoch 51/230, Loss: 0.17703582461063677
0.6701476994103511
Epoch 61/230, Loss: 0.17084142164542124
0.6658389921306876
Epoch 71/230, Loss: 0.1643720681850727
0.676525865408478
Epoch 81/230, Loss: 0.16162865265057638
0.6555573728009567
Epoch 91/230, Loss: 0.1530048967554019
0.6751167527128826
Epoch 101/230, Loss: 0.14496187693797624
0.6848994640592724
Epoch 111/230, Loss: 0.13827471521038276
0.6879048819959726
Epoch 121/230, Loss: 0.13540969445155218
0.6672935454048088
Epoch 131/230, Loss: 0.1268851966238939
0.6838267775628721
Epoch 141/230, Loss: 0.1215011950295705
0.6873006178023857
Epoch 151/230, Loss: 0.26309118878382903
0.013767617632345618
Epoch 161/230, 

[I 2024-01-23 08:57:53,998] Trial 24 finished with value: 0.011977012211180842 and parameters: {'hidden_dim_h': 32, 'dropout': 0.15096219527936838, 'batch_size': 946, 'n_epochs': 230, 'learning_rate': 0.00023515305841947845}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/284, Loss: 2.492584385522982
0.5673672488415972
Epoch 11/284, Loss: 2.380877070310639
0.5282459557334399
Epoch 21/284, Loss: 2.2834810222067485
0.5203110599536583
Epoch 31/284, Loss: 2.190358272412928
0.5348725716979705
Epoch 41/284, Loss: 2.100803003078554
0.5418130988267787
Epoch 51/284, Loss: 2.0136095808773504
0.5480416112492849
Epoch 61/284, Loss: 1.9299253428854592
0.5612183714869569
Epoch 71/284, Loss: 1.848835363620665
0.565041936909833
Epoch 81/284, Loss: 1.769739988373547
0.5714762824984533
Epoch 91/284, Loss: 1.6843595010478323
0.00048225747876701365
Epoch 101/284, Loss: 1.6011288020668961
0.0015082755651732833
Epoch 111/284, Loss: 1.5210356188983452
0.0014384622728692202
Epoch 121/284, Loss: 1.445500705300308
0.007747462759322874
Epoch 131/284, Loss: 1.3725570294915177
0.011503715662808266
Epoch 141/284, Loss: 1.301788644092839
0.024824316432369136
Epoch 151/284, Loss: 1.2331814853156484
0.04439594925470987
Epoch 161/284, Loss:

[I 2024-01-23 09:00:40,418] Trial 25 finished with value: 0.6169049983190792 and parameters: {'hidden_dim_h': 45, 'dropout': 0.11666284428286088, 'batch_size': 584, 'n_epochs': 284, 'learning_rate': 5.75122368366984e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/259, Loss: 0.28106353234271614
0.3365614869242797
Epoch 11/259, Loss: 0.2527162320151621
0.5873428488634748
Epoch 21/259, Loss: 0.24993331852007886
0.589061046617945
Epoch 31/259, Loss: 0.24692269673152845
0.5922970130695772
Epoch 41/259, Loss: 0.24431823437311212
0.5947538538610087
Epoch 51/259, Loss: 0.2415402966494463
0.5987565274356295
Epoch 61/259, Loss: 0.23930658248006081
0.6033849840578094
Epoch 71/259, Loss: 0.23718322022837035
0.6061210893004545
Epoch 81/259, Loss: 0.23430937346147032
0.6101211088117747
Epoch 91/259, Loss: 0.23234401126297152
0.6126100284269066
Epoch 101/259, Loss: 0.2298921428772868
0.6164536344257016
Epoch 111/259, Loss: 0.2275094216575428
0.6194009562919719
Epoch 121/259, Loss: 0.22492844565790526
0.6235138371346466
Epoch 131/259, Loss: 0.22293246126904778
0.6263566589248036
Epoch 141/259, Loss: 0.22076453846328112
0.6282748878430626
Epoch 151/259, Loss: 0.21853521952823718
0.6310408602643853
Epoch 161/259, Lo

[I 2024-01-23 09:02:48,042] Trial 26 finished with value: 0.6425802501816867 and parameters: {'hidden_dim_h': 28, 'dropout': 0.18028915314849067, 'batch_size': 493, 'n_epochs': 259, 'learning_rate': 1.932683739542185e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/229, Loss: 0.4381159433221395
0.5779473195630861
Epoch 11/229, Loss: 0.28280471238942273
0.6293274622719524
Epoch 21/229, Loss: 0.23261282810595182
0.6381365855257711
Epoch 31/229, Loss: 0.20941730872192213
0.6593116102832912
Epoch 41/229, Loss: 0.18659428652145166
0.6637268316716102
Epoch 51/229, Loss: 0.16596529744895158
0.6717478516139949
Epoch 61/229, Loss: 0.14845634416668815
0.6779911051448132
Epoch 71/229, Loss: 0.13150603238460237
0.6820476953837149
Epoch 81/229, Loss: 0.11893782758079799
0.6853216008152129
Epoch 91/229, Loss: 0.10716013940034715
0.6924143920720544
Epoch 101/229, Loss: 0.10061331488917359
0.6981165750100574
Epoch 111/229, Loss: 0.09189662963679407
0.7053138708767879
Epoch 121/229, Loss: 0.08623842492831492
0.7029852162462285
Epoch 131/229, Loss: 0.07998378756167614
0.7034771675116771
Epoch 141/229, Loss: 0.07705721529447927
0.7101521318579317
Epoch 151/229, Loss: 0.07406755691740365
0.7122255149745333
Epoch 161/229

[I 2024-01-23 09:05:32,304] Trial 27 finished with value: 0.7269946562540881 and parameters: {'hidden_dim_h': 36, 'dropout': 0.27844253803844543, 'batch_size': 213, 'n_epochs': 229, 'learning_rate': 0.00014573147736626855}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/193, Loss: 1.056277184873014
0.538830076886982
Epoch 11/193, Loss: 0.2748328839000818
0.0010183330458535802
Epoch 21/193, Loss: 0.2651766495326081
9.287365968604331e-07
Epoch 31/193, Loss: 0.2651361468675974
2.5647990409200453e-05
Epoch 41/193, Loss: 0.26445767796925596
0.0012291644816078207
Epoch 51/193, Loss: 0.26410035768876206
0.00015541458082395633
Epoch 61/193, Loss: 0.2642036862671375
1.8022838576724953e-06
Epoch 71/193, Loss: 0.26411690653578657
0.0014028107928071353
Epoch 81/193, Loss: 0.26277818561003013
0.0035544713507724484
Epoch 91/193, Loss: 0.26383130806120664
0.0032009132986314864
Epoch 101/193, Loss: 0.26386120190491547
3.511372537187521e-07
Epoch 111/193, Loss: 0.263272207129646
0.005112471164755057
Epoch 121/193, Loss: 0.2633361031074782
0.001880097803884272
Epoch 131/193, Loss: 0.26276648719165774
0.0006797918567995919
Epoch 141/193, Loss: 0.26304609751379165
0.003960944625943696
Epoch 151/193, Loss: 0.26241791177843066

[I 2024-01-23 09:08:16,866] Trial 28 finished with value: 0.004208449577726561 and parameters: {'hidden_dim_h': 36, 'dropout': 0.28068958927671395, 'batch_size': 162, 'n_epochs': 193, 'learning_rate': 0.000626582416651316}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/157, Loss: 0.8248024293042104
0.5458429906573524
Epoch 11/157, Loss: 0.5171983753322461
0.6056067746518915
Epoch 21/157, Loss: 0.32385862406787524
0.6320243764730429
Epoch 31/157, Loss: 0.20614591199870502
0.6477535170871261
Epoch 41/157, Loss: 0.14226948346840132
0.6666266669069864
Epoch 51/157, Loss: 0.11606756549909575
0.6774846211939606
Epoch 61/157, Loss: 0.10682825033270985
0.6831977554397679
Epoch 71/157, Loss: 0.10380647222109891
0.6873093704107162
Epoch 81/157, Loss: 0.09914956043619629
0.6947416436539685
Epoch 91/157, Loss: 0.09444313803944018
0.6952085995722407
Epoch 101/157, Loss: 0.0915666830238946
0.6977171827834827
Epoch 111/157, Loss: 0.08684240889932038
0.7029460057381841
Epoch 121/157, Loss: 0.08435567345367659
0.7020136537724267
Epoch 131/157, Loss: 0.0822551623670333
0.7067298135433422
Epoch 141/157, Loss: 0.08060530494522611
0.7110704877061539
Epoch 151/157, Loss: 0.07788066501054194
0.7134442211872293


[I 2024-01-23 09:10:08,000] Trial 29 finished with value: 0.7134442211872293 and parameters: {'hidden_dim_h': 36, 'dropout': 0.26456254676198054, 'batch_size': 220, 'n_epochs': 157, 'learning_rate': 0.00012834243186925624}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/225, Loss: 0.5548697528482853
0.5836637869706557
Epoch 11/225, Loss: 0.4037720687773036
0.6073922607111347
Epoch 21/225, Loss: 0.30624649082792216
0.6244011231676069
Epoch 31/225, Loss: 0.23783296192514486
0.6361847162053504
Epoch 41/225, Loss: 0.19240307139939275
0.6475212783201209
Epoch 51/225, Loss: 0.16492204042686814
0.6578414797827573
Epoch 61/225, Loss: 0.15133959791441073
0.6627056498387556
Epoch 71/225, Loss: 0.14258890033795915
0.6661515327138126
Epoch 81/225, Loss: 0.13715795992777266
0.6726988645459864
Epoch 91/225, Loss: 0.1320096266338195
0.6760236449640075
Epoch 101/225, Loss: 0.12607249292148942
0.6758605869116321
Epoch 111/225, Loss: 0.12018048823222346
0.6791045401788768
Epoch 121/225, Loss: 0.11412069972219138
0.6821560994785453
Epoch 131/225, Loss: 0.10877665628989537
0.6843172271840059
Epoch 141/225, Loss: 0.10442768491205127
0.6877878022637613
Epoch 151/225, Loss: 0.09906439166301968
0.6911772691487413
Epoch 161/225, 

[I 2024-01-23 09:12:24,979] Trial 30 finished with value: 0.7069639124734833 and parameters: {'hidden_dim_h': 31, 'dropout': 0.312423412307248, 'batch_size': 276, 'n_epochs': 225, 'learning_rate': 9.369761314774037e-05}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/279, Loss: 0.5319468195239703
0.5600732625133099
Epoch 11/279, Loss: 0.3801539530356725
0.5985107595494171
Epoch 21/279, Loss: 0.2866353844602903
0.6249493013396794
Epoch 31/279, Loss: 0.22691820661226908
0.6364876775974481
Epoch 41/279, Loss: 0.19292346363266308
0.6497928469926831
Epoch 51/279, Loss: 0.17518426080544788
0.6517595346226415
Epoch 61/279, Loss: 0.1650541769961516
0.6596711265927996
Epoch 71/279, Loss: 0.15870805233716964
0.6625920178183182
Epoch 81/279, Loss: 0.15186525757114092
0.6647635062719056
Epoch 91/279, Loss: 0.14654248331983885
0.6709719721999353
Epoch 101/279, Loss: 0.13909279567499955
0.6758754446618918
Epoch 111/279, Loss: 0.13090300597250462
0.6875398696558709
Epoch 121/279, Loss: 0.12229697108268738
0.6931379848508497
Epoch 131/279, Loss: 0.11489681663612525
0.694387116704467
Epoch 141/279, Loss: 0.10861709415912628
0.6996510478803105
Epoch 151/279, Loss: 0.10170226469635964
0.7065161482844725
Epoch 161/279, Lo

[I 2024-01-23 09:15:07,341] Trial 31 finished with value: 0.7260556472001223 and parameters: {'hidden_dim_h': 39, 'dropout': 0.22966358786610375, 'batch_size': 404, 'n_epochs': 279, 'learning_rate': 0.00015980723339466922}. Best is trial 6 with value: 0.7321680749029571.


Build model with 3 layers of attention
Epoch 1/252, Loss: 0.420120186863407
0.5602514746905091
Epoch 11/252, Loss: 0.19592546214980464
0.6299751199607088
Epoch 21/252, Loss: 0.1559482791250752
0.6564560499883811
Epoch 31/252, Loss: 0.14168035803783324
0.6644665819565917
Epoch 41/252, Loss: 0.13423127656982792
0.6655231143136581
Epoch 51/252, Loss: 0.11969016552452118
0.6752796723830164
Epoch 61/252, Loss: 0.10572615938801919
0.6952197235080361
Epoch 71/252, Loss: 0.09708235112409438
0.6952524974832014
Epoch 81/252, Loss: 0.08758862004164726
0.7119220447287166
Epoch 91/252, Loss: 0.07797201831013925
0.7146702297023423
Epoch 101/252, Loss: 0.07267267317060501
0.7219606308772236
Epoch 111/252, Loss: 0.06805306823263245
0.7180505231090397
Epoch 121/252, Loss: 0.06924796693267361
0.7156246648857254
Epoch 131/252, Loss: 0.06710708982521488
0.7271234787506059
Epoch 141/252, Loss: 0.059274894756174853
0.7343238622430598
Epoch 151/252, Loss: 0.06473921455683247
0.7261662523442625
Epoch 161/252,

[I 2024-01-23 09:17:36,832] Trial 32 finished with value: 0.735869700940863 and parameters: {'hidden_dim_h': 39, 'dropout': 0.22548915994336155, 'batch_size': 391, 'n_epochs': 252, 'learning_rate': 0.00040760326902778793}. Best is trial 32 with value: 0.735869700940863.


Found better hyperparameter, update model
Build model with 3 layers of attention
Epoch 1/245, Loss: 2.0138760590162432
0.5631020251785067
Epoch 11/245, Loss: 1.0603836696656024
0.5882052001509794
Epoch 21/245, Loss: 0.527906920577659
0.618237269127995
Epoch 31/245, Loss: 0.2665057419264903
0.6640486359817503
Epoch 41/245, Loss: 0.16256861974958514
0.6661915665810462
Epoch 51/245, Loss: 0.130238556226746
0.674209096155905
Epoch 61/245, Loss: 0.1233379592905279
0.6797561435007817
Epoch 71/245, Loss: 0.1153412679912614
0.6890189523083008
Epoch 81/245, Loss: 0.10727526662779636
0.6986074506549356
Epoch 91/245, Loss: 0.2673265015492674
0.0013035644680485768
Epoch 101/245, Loss: 0.2669936401433632
0.003212996464674156
Epoch 111/245, Loss: 0.2673961493324061
0.0030322803496575634
Epoch 121/245, Loss: 0.2664600004915331
0.0008346842138748206
Epoch 131/245, Loss: 0.2658524711112507
0.0018050433746710406
Epoch 141/245, Loss: 0.26643919187491055
3.7237964002537006e-05
Epoch 151/245, Loss: 0.26577

[I 2024-01-23 09:20:00,329] Trial 33 finished with value: 0.0016457019572732828 and parameters: {'hidden_dim_h': 39, 'dropout': 0.22866278324359052, 'batch_size': 393, 'n_epochs': 245, 'learning_rate': 0.00048040984470997715}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/220, Loss: 0.2968337474146275
0.5959418997084502
Epoch 11/220, Loss: 0.26394886862147937
0.0012196783098965334
Epoch 21/220, Loss: 0.26462186617080613
0.0031485285253817627
Epoch 31/220, Loss: 0.2642828855249617
2.5650632975225528e-05
Epoch 41/220, Loss: 0.26478691534562543
0.0013776143036569077
Epoch 51/220, Loss: 0.2641433310328108
0.00136393203781701
Epoch 61/220, Loss: 0.26415695340344403
0.0015326721593367084
Epoch 71/220, Loss: 0.26483500169383156
0.00020501371488043142
Epoch 81/220, Loss: 0.2647967499614966
6.543088206891242e-06
Epoch 91/220, Loss: 0.26449573235680357
0.001910931468676614
Epoch 101/220, Loss: 0.26516088662725507
9.97777710620627e-08
Epoch 111/220, Loss: 0.2647774320359182
0.00020102789024044408
Epoch 121/220, Loss: 0.264814785935662
0.0009956142377207887
Epoch 131/220, Loss: 0.26482929274289296
4.8894076547876846e-05
Epoch 141/220, Loss: 0.2652742409645909
0.0020334540652618277
Epoch 151/220, Loss: 0.264844285117255

[I 2024-01-23 09:22:31,038] Trial 34 finished with value: 0.0037112287931952478 and parameters: {'hidden_dim_h': 36, 'dropout': 0.23131050410211593, 'batch_size': 242, 'n_epochs': 220, 'learning_rate': 0.0012784729502292513}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/192, Loss: 1.6846208470208304
0.5447131660892557
Epoch 11/192, Loss: 0.9390778703348978
0.5976100303996074
Epoch 21/192, Loss: 0.49792703092098234
0.6367837813918634
Epoch 31/192, Loss: 0.26066737515585764
0.6684241350940163
Epoch 41/192, Loss: 0.15414138351167953
0.6901473012757828
Epoch 51/192, Loss: 0.11444519736937114
0.7008403089364214
Epoch 61/192, Loss: 0.10426135701792581
0.6966106113617422
Epoch 71/192, Loss: 0.09945444007005011
0.7047813851519492
Epoch 81/192, Loss: 0.09165485809956278
0.7179358401163846
Epoch 91/192, Loss: 0.08672140570623534
0.7096034214577327
Epoch 101/192, Loss: 0.079059970166002
0.7186580477742461
Epoch 111/192, Loss: 0.07206390627792904
0.7201081003262624
Epoch 121/192, Loss: 0.06608063408306666
0.7251050178128862
Epoch 131/192, Loss: 0.06283549160829612
0.7184411645068876
Epoch 141/192, Loss: 0.05902986739362989
0.7183751951328956
Epoch 151/192, Loss: 0.055443543355379786
0.7198643515764777
Epoch 161/192, 

[I 2024-01-23 09:24:38,717] Trial 35 finished with value: 0.7189072588972484 and parameters: {'hidden_dim_h': 46, 'dropout': 0.2699743495426901, 'batch_size': 343, 'n_epochs': 192, 'learning_rate': 0.00034808806981486833}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/139, Loss: 0.36401541218871164
0.5980899938042106
Epoch 11/139, Loss: 0.17982274464198522
0.646722088281565
Epoch 21/139, Loss: 0.13030665570071764
0.669037553388103
Epoch 31/139, Loss: 0.09965514178786959
0.6923886460107926
Epoch 41/139, Loss: 0.08467922813835599
0.702035698510336
Epoch 51/139, Loss: 0.07767785370704673
0.7072040369622313
Epoch 61/139, Loss: 0.07261098664076555
0.7094511476630333
Epoch 71/139, Loss: 0.07042393535375595
0.7166679397235592
Epoch 81/139, Loss: 0.06732029590223516
0.7176625726778265
Epoch 91/139, Loss: 0.06469475380366757
0.724325724629975
Epoch 101/139, Loss: 0.062312290959414984
0.7236508926916355
Epoch 111/139, Loss: 0.061105016406093324
0.722741346754673
Epoch 121/139, Loss: 0.05919888101163365
0.7225786089539209
Epoch 131/139, Loss: 0.05750587263277599
0.7152510211006118


[I 2024-01-23 09:27:17,951] Trial 36 finished with value: 0.7152510211006118 and parameters: {'hidden_dim_h': 24, 'dropout': 0.24821141425297047, 'batch_size': 114, 'n_epochs': 139, 'learning_rate': 0.00028499850247870156}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/266, Loss: 0.8620514495898102
0.5411868949561356
Epoch 11/266, Loss: 0.24807021287033112
0.6368948553985183
Epoch 21/266, Loss: 0.26335591456647645
0.0004840889566664366
Epoch 31/266, Loss: 0.26279211410526504
0.003741496989555066
Epoch 41/266, Loss: 0.2636177267058421
0.0008594053618241914
Epoch 51/266, Loss: 0.2630235441660477
0.0017620020461287542
Epoch 61/266, Loss: 0.26358287175328043
0.0005201412977166496
Epoch 71/266, Loss: 0.2630234143491519
0.004960721266998875
Epoch 81/266, Loss: 0.2631607192047572
0.005661348236667715
Epoch 91/266, Loss: 0.2626278891654338
0.0025997781235868912
Epoch 101/266, Loss: 0.26391195695279007
6.377658254921854e-08
Epoch 111/266, Loss: 0.263716160745944
0.003520290753477293
Epoch 121/266, Loss: 0.2627178695747408
0.00026920464050696376
Epoch 131/266, Loss: 0.26291084251666474
0.0006266448049987941
Epoch 141/266, Loss: 0.26244509156982776
0.0015899076238759388
Epoch 151/266, Loss: 0.26247164948006807
0.00

[I 2024-01-23 09:30:42,842] Trial 37 finished with value: 0.003031039115904061 and parameters: {'hidden_dim_h': 41, 'dropout': 0.21584200470101306, 'batch_size': 203, 'n_epochs': 266, 'learning_rate': 0.0005228330912876873}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/54, Loss: 0.4318383812904358
0.5915551603060116
Epoch 11/54, Loss: 0.2646643289110877
0.0013025835883268132
Epoch 21/54, Loss: 0.2646439714865251
0.004464477343484298
Epoch 31/54, Loss: 0.2647139484232122
0.0021794636837298926
Epoch 41/54, Loss: 0.2647622788494283
0.002404910068867992
Epoch 51/54, Loss: 0.26449261199344287
0.005726626301272029


[I 2024-01-23 09:31:09,842] Trial 38 finished with value: 0.005726626301272029 and parameters: {'hidden_dim_h': 26, 'dropout': 0.30526248837069087, 'batch_size': 435, 'n_epochs': 54, 'learning_rate': 0.002260026352644526}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/215, Loss: 0.6042437179883321
0.56855246124153
Epoch 11/215, Loss: 0.4819310216108958
0.5744156041405911
Epoch 21/215, Loss: 0.40299400766690574
0.5845644661539598
Epoch 31/215, Loss: 0.35065007090568545
0.5879243323121496
Epoch 41/215, Loss: 0.2859564435482025
0.5810001407608248
Epoch 51/215, Loss: 0.24690272510051728
0.6026540359834198
Epoch 61/215, Loss: 0.22643932779630024
0.6255078233053807
Epoch 71/215, Loss: 0.21511283040046691
0.6402911085717645
Epoch 81/215, Loss: 0.20650709172089896
0.6443967061842472
Epoch 91/215, Loss: 0.19762380242347719
0.6526416150137061
Epoch 101/215, Loss: 0.18857099036375682
0.6540187889052522
Epoch 111/215, Loss: 0.1787739098072052
0.6609366213412019
Epoch 121/215, Loss: 0.16904547890027363
0.658214532452451
Epoch 131/215, Loss: 0.15996005455652873
0.6649463105681982
Epoch 141/215, Loss: 0.1511890866359075
0.6675135542291494
Epoch 151/215, Loss: 0.14345971405506133
0.6703115647867359
Epoch 161/215, Loss:

[I 2024-01-23 09:33:04,451] Trial 39 finished with value: 0.6792358562692914 and parameters: {'hidden_dim_h': 20, 'dropout': 0.18993891898433468, 'batch_size': 320, 'n_epochs': 215, 'learning_rate': 0.00011078603024265119}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/242, Loss: 0.30219987836377377
0.5890422883559175
Epoch 11/242, Loss: 0.2710975553455024
5.405226514111679e-08
Epoch 21/242, Loss: 0.2686536357320588
0.0010295150965257064
Epoch 31/242, Loss: 0.26683701410375793
0.0001072621923508609
Epoch 41/242, Loss: 0.2638469253120751
0.0032994336037507907
Epoch 51/242, Loss: 0.26507300436496734
0.0010665222048132652
Epoch 61/242, Loss: 0.26568419686679184
0.00012748288136863934
Epoch 71/242, Loss: 0.265649463082182
0.000972511553978817
Epoch 81/242, Loss: 0.2641692403061637
0.0005048017325909052
Epoch 91/242, Loss: 0.2642256168455913
0.0015374968476981094
Epoch 101/242, Loss: 0.2641108091535239
0.0004335556969680856
Epoch 111/242, Loss: 0.26429375327866655
0.0019641780671431426
Epoch 121/242, Loss: 0.2644649935179743
0.0035476425297245426
Epoch 131/242, Loss: 0.2645600380568669
0.00468269703385878
Epoch 141/242, Loss: 0.26407060057952486
2.9994772460163005e-05
Epoch 151/242, Loss: 0.2642318752305261
0

[I 2024-01-23 09:36:28,259] Trial 40 finished with value: 0.0016875981261806447 and parameters: {'hidden_dim_h': 34, 'dropout': 0.33665586423848554, 'batch_size': 165, 'n_epochs': 242, 'learning_rate': 0.0009546968488976103}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/197, Loss: 1.3445422375524365
0.5452880614459172
Epoch 11/197, Loss: 0.7526080922500508
0.0008691666607357707
Epoch 21/197, Loss: 0.4451802924678132
0.0013530858855065225
Epoch 31/197, Loss: 0.31355818622821086
4.45679972050111e-05
Epoch 41/197, Loss: 0.2735510234897201
0.00219482915941301
Epoch 51/197, Loss: 0.26509905546098145
0.0003495976869256719
Epoch 61/197, Loss: 0.264208291229364
0.001642718763189728
Epoch 71/197, Loss: 0.2641060839633684
0.0039426077846137445
Epoch 81/197, Loss: 0.2642246335744858
0.0015807266860917392
Epoch 91/197, Loss: 0.26492548935316707
1.9963271835705464e-05
Epoch 101/197, Loss: 0.2644119341228459
0.0001796763521993803
Epoch 111/197, Loss: 0.26396913826465607
0.0034614434705547924
Epoch 121/197, Loss: 0.26334185052562403
0.002038454089591666
Epoch 131/197, Loss: 0.26397955276676127
0.0011153092479629797
Epoch 141/197, Loss: 0.2630641975918332
0.0018869387790145422
Epoch 151/197, Loss: 0.2631176998083656
0.00

[I 2024-01-23 09:38:42,788] Trial 41 finished with value: 0.0001789032556944285 and parameters: {'hidden_dim_h': 47, 'dropout': 0.27666752642808345, 'batch_size': 323, 'n_epochs': 197, 'learning_rate': 0.0003278778527098577}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/188, Loss: 0.40411799140723353
0.5858166636745088
Epoch 11/188, Loss: 0.18000830799700265
0.6569908158673652
Epoch 21/188, Loss: 0.15414779175476856
0.6548207453275507
Epoch 31/188, Loss: 0.1356396823223815
0.6842925760538079
Epoch 41/188, Loss: 0.12027765433472323
0.6914719063716898
Epoch 51/188, Loss: 0.10441641410790294
0.6992041063657626
Epoch 61/188, Loss: 0.09160740565822785
0.7024899214990873
Epoch 71/188, Loss: 0.0800753360591739
0.7102299157449383
Epoch 81/188, Loss: 0.07302686890744302
0.7115052933606045
Epoch 91/188, Loss: 0.07413152385368404
0.7117379943206804
Epoch 101/188, Loss: 0.06332331850945232
0.720545039730359
Epoch 111/188, Loss: 0.273296893181571
0.0056252839967365935
Epoch 121/188, Loss: 0.2702596695442875
0.0016464046630543135
Epoch 131/188, Loss: 0.2726371740720358
0.0003993666171547674
Epoch 141/188, Loss: 0.2709925409900137
0.005740902905126467
Epoch 151/188, Loss: 0.2704172166715185
0.0019644046835566686
Epoch 1

[I 2024-01-23 09:40:54,818] Trial 42 finished with value: 9.678546528968646e-05 and parameters: {'hidden_dim_h': 46, 'dropout': 0.25350885729520845, 'batch_size': 291, 'n_epochs': 188, 'learning_rate': 0.00038675984009643683}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/280, Loss: 0.20132789100116155
0.5840368669027876
Epoch 11/280, Loss: 0.16631907112193559
0.6315193495740039
Epoch 21/280, Loss: 0.15234765297961686
0.6503051041856575
Epoch 31/280, Loss: 0.20408133487656432
0.31211326422751506
Epoch 41/280, Loss: 0.27018976886317414
0.0013033891413621044
Epoch 51/280, Loss: 0.26711956566234807
0.002000681789955984
Epoch 61/280, Loss: 0.2641292422447564
0.0059201798186091885
Epoch 71/280, Loss: 0.26272704353872334
0.0074262682945053626
Epoch 81/280, Loss: 0.26354863339999934
0.010865596892474655
Epoch 91/280, Loss: 0.26384599000777836
0.004374460202438766
Epoch 101/280, Loss: 0.26120525739102995
0.007805144291856482
Epoch 111/280, Loss: 0.26166476927838234
0.006960218568253994
Epoch 121/280, Loss: 0.2620727551995583
0.007934521529956384
Epoch 131/280, Loss: 0.2619898648194547
0.0085575088936727
Epoch 141/280, Loss: 0.2626292146039459
0.008278044614950773
Epoch 151/280, Loss: 0.2637940842025685
0.0016476332

[I 2024-01-23 09:43:56,872] Trial 43 finished with value: 0.00993921251907562 and parameters: {'hidden_dim_h': 50, 'dropout': 0.2976689052690173, 'batch_size': 457, 'n_epochs': 280, 'learning_rate': 0.0001679230212987706}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/224, Loss: 0.4834523657336831
0.5702914186135644
Epoch 11/224, Loss: 0.26918957591988146
0.6503720044841109
Epoch 21/224, Loss: 0.27768779755569994
0.00010967550286966011
Epoch 31/224, Loss: 0.26536340778693557
0.0009348932009370274
Epoch 41/224, Loss: 0.26450538937933743
0.001951318855577935
Epoch 51/224, Loss: 0.26347872242331505
0.001176739476133336
Epoch 61/224, Loss: 0.2621887547429651
0.004390442358583749
Epoch 71/224, Loss: 0.26212438754737377
0.005263836866369229
Epoch 81/224, Loss: 0.2620135813485831
0.001313523912056403
Epoch 91/224, Loss: 0.26245846203528345
0.0005312618940871528
Epoch 101/224, Loss: 0.26264024106785655
0.0034420644591828193
Epoch 111/224, Loss: 0.2617175383493304
0.002615909003902981
Epoch 121/224, Loss: 0.26174792810343206
0.005026807166592096
Epoch 131/224, Loss: 0.26198718743398786
0.009746626715783187
Epoch 141/224, Loss: 0.2620281921699643
0.00337453274740098
Epoch 151/224, Loss: 0.26285555795766413
0.0058

[I 2024-01-23 09:46:17,851] Trial 44 finished with value: 0.011413952415481118 and parameters: {'hidden_dim_h': 41, 'dropout': 0.22183737109270796, 'batch_size': 374, 'n_epochs': 224, 'learning_rate': 0.0002891901477928048}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/238, Loss: 0.22618289589881896
0.5712174176298767
Epoch 11/238, Loss: 0.15386606603860856
0.6244951053647785
Epoch 21/238, Loss: 0.13301359824836254
0.6434714849625036
Epoch 31/238, Loss: 0.1247731514275074
0.656048427343824
Epoch 41/238, Loss: 0.12122629098594188
0.6678121918670291
Epoch 51/238, Loss: 0.11633003205060959
0.6598513791918674
Epoch 61/238, Loss: 0.11085006445646287
0.676192844277278
Epoch 71/238, Loss: 0.10959621630609036
0.6753407250261834
Epoch 81/238, Loss: 0.10330209620296955
0.6809939541378753
Epoch 91/238, Loss: 0.09778980873525142
0.6918835185979392
Epoch 101/238, Loss: 0.09282382503151894
0.6944869090291653
Epoch 111/238, Loss: 0.08844304047524928
0.7008613477823006
Epoch 121/238, Loss: 0.08397854901850224
0.7028936214254232
Epoch 131/238, Loss: 0.08189337886869907
0.7018102851665994
Epoch 141/238, Loss: 0.07649771496653557
0.7060447339468459
Epoch 151/238, Loss: 0.07307582311332225
0.7093182452766684
Epoch 161/238, 

[I 2024-01-23 09:48:30,169] Trial 45 finished with value: 0.7247142201279388 and parameters: {'hidden_dim_h': 44, 'dropout': 0.2725501903354426, 'batch_size': 1194, 'n_epochs': 238, 'learning_rate': 0.00044711797721777363}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/257, Loss: 2.085735691560281
0.5313950933488085
Epoch 11/257, Loss: 1.5815757848121024
0.5557762720268498
Epoch 21/257, Loss: 1.1700316632116163
0.5931015015877981
Epoch 31/257, Loss: 0.8354319118164681
0.6061437027432447
Epoch 41/257, Loss: 0.5715608920599963
0.6330753396541098
Epoch 51/257, Loss: 0.3756124673662959
0.6636621110657287
Epoch 61/257, Loss: 0.27995782663693297
0.5220601470839185
Epoch 71/257, Loss: 0.1723743261517705
0.6445476078853158
Epoch 81/257, Loss: 0.13726076769667703
0.6613525347904949
Epoch 91/257, Loss: 0.12805694043636323
0.6731053712978247
Epoch 101/257, Loss: 0.11839831427142426
0.6839059276130564
Epoch 111/257, Loss: 0.11006197844808166
0.698156007232118
Epoch 121/257, Loss: 0.10321001849464469
0.6953006947983479
Epoch 131/257, Loss: 0.09664467615452972
0.7081226261009294
Epoch 141/257, Loss: 0.09322579811150963
0.7106342251537896
Epoch 151/257, Loss: 0.10439317836552053
0.42824821519066025
Epoch 161/257, Loss:

[I 2024-01-23 09:52:57,514] Trial 46 finished with value: 0.6922693440424444 and parameters: {'hidden_dim_h': 44, 'dropout': 0.24172199373891584, 'batch_size': 129, 'n_epochs': 257, 'learning_rate': 7.099429578752019e-05}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/272, Loss: 0.2654209414666349
0.5761376250369569
Epoch 11/272, Loss: 0.17406478388742966
0.6495927466484132
Epoch 21/272, Loss: 0.14954365146431056
0.6658012180779702
Epoch 31/272, Loss: 0.14252598786895926
0.670644281666402
Epoch 41/272, Loss: 0.13376601006497035
0.6833689891039151
Epoch 51/272, Loss: 0.12673482129519636
0.6824449100968284
Epoch 61/272, Loss: 0.12151411446658048
0.6808684072058889
Epoch 71/272, Loss: 0.11417292938991026
0.687379128399229
Epoch 81/272, Loss: 0.10591419752348553
0.6900906241708804
Epoch 91/272, Loss: 0.09885674850507216
0.696594818070334
Epoch 101/272, Loss: 0.09321432967077602
0.6997209044332425
Epoch 111/272, Loss: 0.08727934177626263
0.7028721965867668
Epoch 121/272, Loss: 0.08596353673122147
0.7047995378236176
Epoch 131/272, Loss: 0.08032617142254656
0.706854913220157
Epoch 141/272, Loss: 0.07719142497940497
0.7121383229741521
Epoch 151/272, Loss: 0.0733329447155649
0.713201463603455
Epoch 161/272, Loss

[I 2024-01-23 09:55:18,382] Trial 47 finished with value: 0.7167041860858777 and parameters: {'hidden_dim_h': 38, 'dropout': 0.29011660337672274, 'batch_size': 1128, 'n_epochs': 272, 'learning_rate': 0.0005006476598958318}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/96, Loss: 0.7324866169974917
0.5593684858565793
Epoch 11/96, Loss: 0.5812537698518663
0.5815036479261566
Epoch 21/96, Loss: 0.49396799433799016
0.6069916689112074
Epoch 31/96, Loss: 0.42075211235455107
0.6186038462141074
Epoch 41/96, Loss: 0.358467888264429
0.6325188038086963
Epoch 51/96, Loss: 0.3036617324465797
0.6437906059033154
Epoch 61/96, Loss: 0.2589656618379411
0.6494026360254064
Epoch 71/96, Loss: 0.22001474244253977
0.662567743821608
Epoch 81/96, Loss: 0.1911046483686992
0.6682639425747553
Epoch 91/96, Loss: 0.16838949705873216
0.6592707264349958


[I 2024-01-23 09:56:11,557] Trial 48 finished with value: 0.6592707264349958 and parameters: {'hidden_dim_h': 42, 'dropout': 0.3315743018455075, 'batch_size': 1167, 'n_epochs': 96, 'learning_rate': 0.00021528413396066815}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/241, Loss: 0.3430199737244464
0.5941160647481198
Epoch 11/241, Loss: 0.2626404860552321
0.009613459532260142
Epoch 21/241, Loss: 0.2617579289573304
0.0033091832695640215
Epoch 31/241, Loss: 0.26343952151055033
0.0010674273962621105
Epoch 41/241, Loss: 0.2634061963317242
4.831003189040147e-05
Epoch 51/241, Loss: 0.2647011597105797
0.004336627903377207
Epoch 61/241, Loss: 0.26363026287327423
0.0017826716319535917
Epoch 71/241, Loss: 0.26283707707486254
5.1335976319971194e-05
Epoch 81/241, Loss: 0.2628996343688762
2.0527350521377735e-08
Epoch 91/241, Loss: 0.26236661650398946
0.00015918769690291985
Epoch 101/241, Loss: 0.2616180362219506
0.006071051781255213
Epoch 111/241, Loss: 0.26261789700452315
5.9518799227273896e-05
Epoch 121/241, Loss: 0.26263294147050126
0.0006601453679749793
Epoch 131/241, Loss: 0.26280694597579063
0.0049132772730447255
Epoch 141/241, Loss: 0.26213789953196304
0.006119294544317577
Epoch 151/241, Loss: 0.26231264941235

[I 2024-01-23 09:58:55,385] Trial 49 finished with value: 0.0002048462752616068 and parameters: {'hidden_dim_h': 38, 'dropout': 0.17846951816309745, 'batch_size': 254, 'n_epochs': 241, 'learning_rate': 0.0009498722693919247}. Best is trial 32 with value: 0.735869700940863.


Epoch 241/241, Loss: 0.2644238589291877
0.0002048462752616068
Build model with 3 layers of attention
Epoch 1/284, Loss: 0.4321105397762136
0.4890100667364678
Epoch 11/284, Loss: 0.3824718112641193
0.5781221950500725
Epoch 21/284, Loss: 0.3610202435483324
0.5931300890567137
Epoch 31/284, Loss: 0.34241309698591843
0.6049433567269298
Epoch 41/284, Loss: 0.32607735471522553
0.6177337317643677
Epoch 51/284, Loss: 0.3124194671498968
0.6241279017148779
Epoch 61/284, Loss: 0.2999525678918717
0.6282685142462718
Epoch 71/284, Loss: 0.28945319449647944
0.6345739021052037
Epoch 81/284, Loss: 0.2805150398548613
0.6380464541399505
Epoch 91/284, Loss: 0.27281729876995087
0.6376759567106209
Epoch 101/284, Loss: 0.26554694169379295
0.6436232445765194
Epoch 111/284, Loss: 0.2600670425181693
0.6458851948802833
Epoch 121/284, Loss: 0.2548967739369007
0.6445496830721366
Epoch 131/284, Loss: 0.250174712944538
0.6486765072356135
Epoch 141/284, Loss: 0.24499415526998805
0.6491246207247654
Epoch 151/284, Loss:

[I 2024-01-23 10:01:15,951] Trial 50 finished with value: 0.6737772891076174 and parameters: {'hidden_dim_h': 30, 'dropout': 0.26013168367841993, 'batch_size': 514, 'n_epochs': 284, 'learning_rate': 4.4688828156069894e-05}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/205, Loss: 1.300530425139836
0.5350748810249437
Epoch 11/205, Loss: 0.9686946477208819
0.5763967972891902
Epoch 21/205, Loss: 0.7300325555460794
0.6174771998369161
Epoch 31/205, Loss: 0.553015029004642
0.6493825762905338
Epoch 41/205, Loss: 0.42661238355296
0.647999971775911
Epoch 51/205, Loss: 0.3400216341018677
0.658199524492902
Epoch 61/205, Loss: 0.2858593972665923
0.6659474906221395
Epoch 71/205, Loss: 0.25491843500307626
0.6738681093625183
Epoch 81/205, Loss: 0.23646865572248185
0.6784613751900487
Epoch 91/205, Loss: 0.22184580372912543
0.6792711964019202
Epoch 101/205, Loss: 0.2072397178837231
0.6844599251814844
Epoch 111/205, Loss: 0.19165320800883429
0.6779565492262234
Epoch 121/205, Loss: 0.1752938824040549
0.6868456177010395
Epoch 131/205, Loss: 0.15940118612987655
0.6965593862935952
Epoch 141/205, Loss: 0.14581456684640476
0.6984761700183114
Epoch 151/205, Loss: 0.13238372334412166
0.7022229888983885
Epoch 161/205, Loss: 0.1213

[I 2024-01-23 10:03:32,392] Trial 51 finished with value: 0.7217369928703768 and parameters: {'hidden_dim_h': 45, 'dropout': 0.2723050270157892, 'batch_size': 342, 'n_epochs': 205, 'learning_rate': 0.0001646778802060433}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/208, Loss: 0.7075048038592705
0.5644623934741666
Epoch 11/208, Loss: 0.3366818288197884
0.6313399721621468
Epoch 21/208, Loss: 0.1796513387790093
0.6576578142113941
Epoch 31/208, Loss: 0.12970705404877664
0.6719924258724282
Epoch 41/208, Loss: 0.1185332028911664
0.6835378249966686
Epoch 51/208, Loss: 0.11107541976066736
0.6840974387855546
Epoch 61/208, Loss: 0.10103940109793956
0.6963973757423757
Epoch 71/208, Loss: 0.09202646114505254
0.6946928269005614
Epoch 81/208, Loss: 0.08466207459568978
0.707161541552486
Epoch 91/208, Loss: 0.07817225582324541
0.719864025353921
Epoch 101/208, Loss: 0.0743706737859891
0.7153473547311364
Epoch 111/208, Loss: 0.07110238788792721
0.7189314849707252
Epoch 121/208, Loss: 0.06825552468116466
0.7227754621587895
Epoch 131/208, Loss: 0.06563522916000623
0.7223858969364085
Epoch 141/208, Loss: 0.06399844440703209
0.7217626473157612
Epoch 151/208, Loss: 0.061907900210756525
0.724791333594929
Epoch 161/208, Loss

[I 2024-01-23 10:06:16,768] Trial 52 finished with value: 0.7327824775209302 and parameters: {'hidden_dim_h': 40, 'dropout': 0.20702835611598297, 'batch_size': 184, 'n_epochs': 208, 'learning_rate': 0.00016650195900128322}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/233, Loss: 0.6964484031001726
0.548644301781926
Epoch 11/233, Loss: 0.4964584931731224
0.6058601032395903
Epoch 21/233, Loss: 0.3712695337831974
0.6296599443277785
Epoch 31/233, Loss: 0.2934178291509549
0.6427199056150199
Epoch 41/233, Loss: 0.2500223105152448
0.6609680871549004
Epoch 51/233, Loss: 0.22693190177281697
0.6563313969838849
Epoch 61/233, Loss: 0.20977486222982406
0.6585105809713475
Epoch 71/233, Loss: 0.1921870137254397
0.665130521219039
Epoch 81/233, Loss: 0.17600255322953065
0.6660837499774046
Epoch 91/233, Loss: 0.1615383869037032
0.6769167523650602
Epoch 101/233, Loss: 0.1482706737394134
0.6774398309064014
Epoch 111/233, Loss: 0.13706308621913194
0.6859886263684221
Epoch 121/233, Loss: 0.1274394765496254
0.6851329299117302
Epoch 131/233, Loss: 0.11813923108081023
0.6921090050363004
Epoch 141/233, Loss: 0.10947491178909938
0.6967676378004388
Epoch 151/233, Loss: 0.10212169978767634
0.7000965298346554
Epoch 161/233, Loss: 0.

[I 2024-01-23 10:09:12,791] Trial 53 finished with value: 0.7230472891874393 and parameters: {'hidden_dim_h': 40, 'dropout': 0.20275439012332208, 'batch_size': 200, 'n_epochs': 233, 'learning_rate': 9.198453827332333e-05}. Best is trial 32 with value: 0.735869700940863.


Build model with 3 layers of attention
Epoch 1/251, Loss: 0.197223582615455
0.5797938484777206
Epoch 11/251, Loss: 0.17500227006773153
0.6217162550554383
Epoch 21/251, Loss: 0.16817601894338927
0.6263688394966755
Epoch 31/251, Loss: 0.1623063584168752
0.6418015827726424
Epoch 41/251, Loss: 0.15964392013847828
0.6493374516914314
Epoch 51/251, Loss: 0.15530223647753397
0.6467783934838801
Epoch 61/251, Loss: 0.15154750210543474
0.6525255446510058
Epoch 71/251, Loss: 0.14720135740935802
0.6582956896949353
Epoch 81/251, Loss: 0.14407062654693922
0.6641756904220266
Epoch 91/251, Loss: 0.14052992748717466
0.6684628862924857
Epoch 101/251, Loss: 0.13698008935898542
0.6731867925516777
Epoch 111/251, Loss: 0.13332256643722454
0.6780882304001796
Epoch 121/251, Loss: 0.1303686617563168
0.6734907967915927
Epoch 131/251, Loss: 0.12935854773968458
0.6701098499478827
Epoch 141/251, Loss: 0.12568053882569075
0.6762798007825925
Epoch 151/251, Loss: 0.12251139711588621
0.6823933066354343
Epoch 161/251, L

[I 2024-01-23 10:11:32,747] Trial 54 finished with value: 0.6900555140676057 and parameters: {'hidden_dim_h': 43, 'dropout': 0.23818512467608657, 'batch_size': 1024, 'n_epochs': 251, 'learning_rate': 0.000129668625308857}. Best is trial 32 with value: 0.735869700940863.


Epoch 251/251, Loss: 0.09975179377943277
0.6900555140676057
Build model with 3 layers of attention
Epoch 1/216, Loss: 0.2615802187047549
0.6042150191164392


[I 2024-01-23 10:11:34,997] Trial 55 finished with value: 0.6042150191164392 and parameters: {'hidden_dim_h': 35, 'dropout': 0.2064254647427, 'batch_size': 161, 'n_epochs': 216, 'learning_rate': 0.0002597281003453839}. Best is trial 32 with value: 0.735869700940863.


training failed
Build model with 3 layers of attention
Epoch 1/268, Loss: 1.1786091380649142
0.5507874825587576


[I 2024-01-23 10:11:36,903] Trial 56 finished with value: 0.5507874825587576 and parameters: {'hidden_dim_h': 38, 'dropout': 0.2255104565773971, 'batch_size': 267, 'n_epochs': 268, 'learning_rate': 0.0006175308358076706}. Best is trial 32 with value: 0.735869700940863.


training failed
Build model with 3 layers of attention
training failed


[W 2024-01-23 10:11:37,094] Trial 57 failed with parameters: {'hidden_dim_h': 40, 'dropout': 0.171378915294019, 'batch_size': 595, 'n_epochs': 233, 'learning_rate': 0.00020076739042322988} because of the following error: IndexError('index -1 is out of bounds for axis 0 with size 0').
Traceback (most recent call last):
  File "/home/juannanzhou/.local/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/scratch/local/22019942/ipykernel_2825595/727483821.py", line 55, in objective
    criterion = np.array(r2_test)[-1]
IndexError: index -1 is out of bounds for axis 0 with size 0
[W 2024-01-23 10:11:37,105] Trial 57 failed with value None.

KeyboardInterrupt

