# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
from typing import Union
import random
from pathlib import Path

In [2]:
from Return_dataloader import pass_dataloader
from Transformer_model import TabTransformer
from Plot_Accuracies import plot_loss_curves
from engine import train

## Device Agnostic

In [3]:
# Clear Cache
torch.cuda.empty_cache()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Available device is: {device}")

Available device is: cpu


## Dataloader

In [4]:
train_dataloader, test_dataloader = pass_dataloader()

# Hyperparameter Training

In [5]:
from copy import deepcopy

def hyperparameter_tuning(param_grid,
                          train_dataloader,
                          test_dataloader,
                          device,
                          epochs=50):

    best_val_score = -float('inf')
    best_params = None
    best_results = None

    for params in param_grid:
        print(f"Training with d_model={params['d_model']}, nhead={params['nhead']}, "
              f"num_layers={params['num_layers']}, dropout={params['dropout']}, lr={params['lr']}")

        # Create model with given hyperparameters
        model = TabTransformer(input_dim=100,
                               d_model=params['d_model'],
                               nhead=params['nhead'],
                               num_layers=params['num_layers'],
                               dropout=params['dropout']).to(device)

        loss_fn = nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=params['lr'],
                                     weight_decay=1e-4)

        # Train model and get results dictionary
        results = train(model=model,
                        train_dataloader=train_dataloader,
                        test_dataloader=test_dataloader,
                        optimizer=optimizer,
                        loss_fn=loss_fn,
                        device=device,
                        epochs=epochs)

        # Pick best validation score (ROC-AUC here) from all epochs
        max_val_roc_auc = max(results['test_roc_auc'])

        print(f"Max val ROC-AUC: {max_val_roc_auc:.4f}")

        if max_val_roc_auc > best_val_score:
            best_val_score = max_val_roc_auc
            best_params = params
            best_results = deepcopy(results)

    print(f"\nBest params: {best_params}")
    print(f"Best validation ROC-AUC: {best_val_score:.4f}")

    return best_params, best_results


In [6]:
from itertools import product
NUM_EPOCHS = 10


# param_grid = {
#     "d_model": [64, 128, 25],
#     "nhead": [2, 4, 8, 16],
#     "num_layers": [i * 2 for i in range(1,51)],
#     "dropout": [0.0, 0.1 , 0.3 , 0.5],
#     "lr": [0.1, 0.01, 0.001],
#     "weight_decay": [0, 1e-4],
#     "batch_size": [4, 8, 16, 32, 64]
# }
param_grid = {
    "d_model": [64, 128],
    "nhead": [2, 4],
    "num_layers": [2, 4, 6],
    "dropout": [0.1, 0.3],
    "lr": [0.001, 0.0001],
    "weight_decay": [0, 1e-4],
    "batch_size": [16, 32]
}


param_grid = [
    dict(zip(param_grid.keys(), values))
    for values in product(*param_grid.values())
]
best_params, best_results = hyperparameter_tuning(
    param_grid,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    device=device,
    epochs=NUM_EPOCHS
)

Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001




  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0311 | Acc: 0.9899 | Prec: 0.9898 | Recall: 0.9884 | F1: 0.9891 | ROC-AUC: 0.9993 || Test Loss: 0.3068 | Acc: 0.9349 | Prec: 0.9769 | Recall: 0.9524 | F1: 0.9645 | ROC-AUC: 0.9320
Max val ROC-AUC: 0.9563
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0168 | Acc: 0.9963 | Prec: 0.9964 | Recall: 0.9956 | F1: 0.9960 | ROC-AUC: 0.9993 || Test Loss: 0.3794 | Acc: 0.9372 | Prec: 0.9794 | Recall: 0.9524 | F1: 0.9657 | ROC-AUC: 0.9188
Max val ROC-AUC: 0.9715
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0168 | Acc: 0.9943 | Prec: 0.9956 | Recall: 0.9920 | F1: 0.9938 | ROC-AUC: 0.9998 || Test Loss: 0.3226 | Acc: 0.9279 | Prec: 0.9718 | Recall: 0.9499 | F1: 0.9607 | ROC-AUC: 0.9425
Max val ROC-AUC: 0.9533
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0300 | Acc: 0.9919 | Prec: 0.9934 | Recall: 0.9891 | F1: 0.9913 | ROC-AUC: 0.9988 || Test Loss: 0.3569 | Acc: 0.9419 | Prec: 0.9795 | Recall: 0.9574 | F1: 0.9683 | ROC-AUC: 0.9445
Max val ROC-AUC: 0.9559
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0312 | Acc: 0.9923 | Prec: 0.9949 | Recall: 0.9884 | F1: 0.9916 | ROC-AUC: 0.9987 || Test Loss: 0.3355 | Acc: 0.9209 | Prec: 0.9765 | Recall: 0.9373 | F1: 0.9565 | ROC-AUC: 0.9142
Max val ROC-AUC: 0.9498
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0276 | Acc: 0.9929 | Prec: 0.9942 | Recall: 0.9906 | F1: 0.9924 | ROC-AUC: 0.9993 || Test Loss: 0.2423 | Acc: 0.9419 | Prec: 0.9845 | Recall: 0.9524 | F1: 0.9682 | ROC-AUC: 0.9436
Max val ROC-AUC: 0.9585
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0357 | Acc: 0.9902 | Prec: 0.9905 | Recall: 0.9884 | F1: 0.9895 | ROC-AUC: 0.9991 || Test Loss: 0.2262 | Acc: 0.9395 | Prec: 0.9844 | Recall: 0.9499 | F1: 0.9668 | ROC-AUC: 0.9649
Max val ROC-AUC: 0.9649
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0218 | Acc: 0.9949 | Prec: 0.9971 | Recall: 0.9920 | F1: 0.9945 | ROC-AUC: 0.9994 || Test Loss: 0.2362 | Acc: 0.9372 | Prec: 0.9794 | Recall: 0.9524 | F1: 0.9657 | ROC-AUC: 0.9496
Max val ROC-AUC: 0.9510
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0448 | Acc: 0.9882 | Prec: 0.9934 | Recall: 0.9811 | F1: 0.9872 | ROC-AUC: 0.9972 || Test Loss: 0.4506 | Acc: 0.9349 | Prec: 0.9720 | Recall: 0.9574 | F1: 0.9646 | ROC-AUC: 0.8998
Max val ROC-AUC: 0.9503
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0445 | Acc: 0.9892 | Prec: 0.9927 | Recall: 0.9840 | F1: 0.9883 | ROC-AUC: 0.9979 || Test Loss: 0.3500 | Acc: 0.9163 | Prec: 0.9789 | Recall: 0.9298 | F1: 0.9537 | ROC-AUC: 0.9458
Max val ROC-AUC: 0.9531
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0334 | Acc: 0.9892 | Prec: 0.9884 | Recall: 0.9884 | F1: 0.9884 | ROC-AUC: 0.9991 || Test Loss: 0.4569 | Acc: 0.9256 | Prec: 0.9791 | Recall: 0.9398 | F1: 0.9591 | ROC-AUC: 0.9169
Max val ROC-AUC: 0.9567
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0274 | Acc: 0.9902 | Prec: 0.9934 | Recall: 0.9855 | F1: 0.9894 | ROC-AUC: 0.9992 || Test Loss: 0.4240 | Acc: 0.9372 | Prec: 0.9844 | Recall: 0.9474 | F1: 0.9655 | ROC-AUC: 0.9209
Max val ROC-AUC: 0.9469
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0806 | Acc: 0.9734 | Prec: 0.9779 | Recall: 0.9644 | F1: 0.9711 | ROC-AUC: 0.9946 || Test Loss: 0.3007 | Acc: 0.9163 | Prec: 0.9866 | Recall: 0.9223 | F1: 0.9534 | ROC-AUC: 0.9371
Max val ROC-AUC: 0.9475
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0650 | Acc: 0.9788 | Prec: 0.9761 | Recall: 0.9782 | F1: 0.9771 | ROC-AUC: 0.9969 || Test Loss: 0.2933 | Acc: 0.9233 | Prec: 0.9841 | Recall: 0.9323 | F1: 0.9575 | ROC-AUC: 0.9288
Max val ROC-AUC: 0.9501
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0905 | Acc: 0.9704 | Prec: 0.9701 | Recall: 0.9659 | F1: 0.9680 | ROC-AUC: 0.9933 || Test Loss: 0.2862 | Acc: 0.9209 | Prec: 0.9841 | Recall: 0.9298 | F1: 0.9562 | ROC-AUC: 0.9305
Max val ROC-AUC: 0.9416
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0791 | Acc: 0.9724 | Prec: 0.9772 | Recall: 0.9630 | F1: 0.9700 | ROC-AUC: 0.9959 || Test Loss: 0.3170 | Acc: 0.9093 | Prec: 0.9839 | Recall: 0.9173 | F1: 0.9494 | ROC-AUC: 0.9392
Max val ROC-AUC: 0.9618
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0274 | Acc: 0.9912 | Prec: 0.9913 | Recall: 0.9898 | F1: 0.9906 | ROC-AUC: 0.9994 || Test Loss: 0.4652 | Acc: 0.9000 | Prec: 0.9759 | Recall: 0.9148 | F1: 0.9444 | ROC-AUC: 0.9366
Max val ROC-AUC: 0.9583
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0312 | Acc: 0.9879 | Prec: 0.9891 | Recall: 0.9847 | F1: 0.9869 | ROC-AUC: 0.9989 || Test Loss: 0.4762 | Acc: 0.9279 | Prec: 0.9742 | Recall: 0.9474 | F1: 0.9606 | ROC-AUC: 0.9341
Max val ROC-AUC: 0.9520
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0269 | Acc: 0.9912 | Prec: 0.9927 | Recall: 0.9884 | F1: 0.9905 | ROC-AUC: 0.9994 || Test Loss: 0.3524 | Acc: 0.9302 | Prec: 0.9767 | Recall: 0.9474 | F1: 0.9618 | ROC-AUC: 0.8938
Max val ROC-AUC: 0.9484
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0448 | Acc: 0.9855 | Prec: 0.9904 | Recall: 0.9782 | F1: 0.9843 | ROC-AUC: 0.9983 || Test Loss: 0.3533 | Acc: 0.9302 | Prec: 0.9767 | Recall: 0.9474 | F1: 0.9618 | ROC-AUC: 0.9333
Max val ROC-AUC: 0.9550
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0244 | Acc: 0.9933 | Prec: 0.9949 | Recall: 0.9906 | F1: 0.9927 | ROC-AUC: 0.9994 || Test Loss: 0.2798 | Acc: 0.9326 | Prec: 0.9768 | Recall: 0.9499 | F1: 0.9632 | ROC-AUC: 0.9057
Max val ROC-AUC: 0.9398
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0235 | Acc: 0.9949 | Prec: 0.9978 | Recall: 0.9913 | F1: 0.9945 | ROC-AUC: 0.9993 || Test Loss: 0.2960 | Acc: 0.9372 | Prec: 0.9769 | Recall: 0.9549 | F1: 0.9658 | ROC-AUC: 0.9313
Max val ROC-AUC: 0.9539
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0327 | Acc: 0.9916 | Prec: 0.9927 | Recall: 0.9891 | F1: 0.9909 | ROC-AUC: 0.9982 || Test Loss: 0.3715 | Acc: 0.9186 | Prec: 0.9789 | Recall: 0.9323 | F1: 0.9551 | ROC-AUC: 0.9372
Max val ROC-AUC: 0.9617
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0348 | Acc: 0.9902 | Prec: 0.9927 | Recall: 0.9862 | F1: 0.9894 | ROC-AUC: 0.9985 || Test Loss: 0.3542 | Acc: 0.9302 | Prec: 0.9743 | Recall: 0.9499 | F1: 0.9619 | ROC-AUC: 0.9140
Max val ROC-AUC: 0.9454
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0504 | Acc: 0.9855 | Prec: 0.9876 | Recall: 0.9811 | F1: 0.9843 | ROC-AUC: 0.9972 || Test Loss: 0.4677 | Acc: 0.9070 | Prec: 0.9787 | Recall: 0.9198 | F1: 0.9483 | ROC-AUC: 0.9389
Max val ROC-AUC: 0.9425
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0515 | Acc: 0.9828 | Prec: 0.9797 | Recall: 0.9833 | F1: 0.9815 | ROC-AUC: 0.9976 || Test Loss: 0.4192 | Acc: 0.9302 | Prec: 0.9719 | Recall: 0.9524 | F1: 0.9620 | ROC-AUC: 0.9168
Max val ROC-AUC: 0.9479
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0467 | Acc: 0.9892 | Prec: 0.9934 | Recall: 0.9833 | F1: 0.9883 | ROC-AUC: 0.9974 || Test Loss: 0.3354 | Acc: 0.9256 | Prec: 0.9791 | Recall: 0.9398 | F1: 0.9591 | ROC-AUC: 0.9173
Max val ROC-AUC: 0.9470
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0367 | Acc: 0.9892 | Prec: 0.9956 | Recall: 0.9811 | F1: 0.9883 | ROC-AUC: 0.9987 || Test Loss: 0.4207 | Acc: 0.9023 | Prec: 0.9837 | Recall: 0.9098 | F1: 0.9453 | ROC-AUC: 0.9377
Max val ROC-AUC: 0.9528
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0684 | Acc: 0.9811 | Prec: 0.9846 | Recall: 0.9746 | F1: 0.9796 | ROC-AUC: 0.9946 || Test Loss: 0.2961 | Acc: 0.9302 | Prec: 0.9843 | Recall: 0.9398 | F1: 0.9615 | ROC-AUC: 0.9111
Max val ROC-AUC: 0.9439
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0687 | Acc: 0.9798 | Prec: 0.9853 | Recall: 0.9710 | F1: 0.9781 | ROC-AUC: 0.9955 || Test Loss: 0.3181 | Acc: 0.9186 | Prec: 0.9815 | Recall: 0.9298 | F1: 0.9550 | ROC-AUC: 0.9149
Max val ROC-AUC: 0.9523
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0738 | Acc: 0.9761 | Prec: 0.9794 | Recall: 0.9688 | F1: 0.9741 | ROC-AUC: 0.9951 || Test Loss: 0.3145 | Acc: 0.9186 | Prec: 0.9840 | Recall: 0.9273 | F1: 0.9548 | ROC-AUC: 0.9187
Max val ROC-AUC: 0.9456
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0625 | Acc: 0.9801 | Prec: 0.9838 | Recall: 0.9731 | F1: 0.9785 | ROC-AUC: 0.9962 || Test Loss: 0.3231 | Acc: 0.9023 | Prec: 0.9864 | Recall: 0.9073 | F1: 0.9452 | ROC-AUC: 0.9343
Max val ROC-AUC: 0.9521
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0431 | Acc: 0.9885 | Prec: 0.9877 | Recall: 0.9877 | F1: 0.9877 | ROC-AUC: 0.9973 || Test Loss: 0.3835 | Acc: 0.9163 | Prec: 0.9840 | Recall: 0.9248 | F1: 0.9535 | ROC-AUC: 0.9486
Max val ROC-AUC: 0.9515
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0408 | Acc: 0.9882 | Prec: 0.9912 | Recall: 0.9833 | F1: 0.9872 | ROC-AUC: 0.9982 || Test Loss: 0.3718 | Acc: 0.9233 | Prec: 0.9791 | Recall: 0.9373 | F1: 0.9577 | ROC-AUC: 0.9386
Max val ROC-AUC: 0.9671
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0215 | Acc: 0.9936 | Prec: 0.9956 | Recall: 0.9906 | F1: 0.9931 | ROC-AUC: 0.9997 || Test Loss: 0.3266 | Acc: 0.8814 | Prec: 0.9807 | Recall: 0.8897 | F1: 0.9330 | ROC-AUC: 0.9014
Max val ROC-AUC: 0.9394
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0120 | Acc: 0.9970 | Prec: 0.9971 | Recall: 0.9964 | F1: 0.9967 | ROC-AUC: 0.9999 || Test Loss: 0.3364 | Acc: 0.9326 | Prec: 0.9768 | Recall: 0.9499 | F1: 0.9632 | ROC-AUC: 0.9587
Max val ROC-AUC: 0.9588
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0333 | Acc: 0.9892 | Prec: 0.9884 | Recall: 0.9884 | F1: 0.9884 | ROC-AUC: 0.9988 || Test Loss: 0.2579 | Acc: 0.9465 | Prec: 0.9870 | Recall: 0.9549 | F1: 0.9707 | ROC-AUC: 0.9624
Max val ROC-AUC: 0.9687
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0246 | Acc: 0.9929 | Prec: 0.9934 | Recall: 0.9913 | F1: 0.9924 | ROC-AUC: 0.9992 || Test Loss: 0.3161 | Acc: 0.9209 | Prec: 0.9841 | Recall: 0.9298 | F1: 0.9562 | ROC-AUC: 0.9390
Max val ROC-AUC: 0.9468
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0173 | Acc: 0.9949 | Prec: 0.9956 | Recall: 0.9935 | F1: 0.9945 | ROC-AUC: 0.9999 || Test Loss: 0.3638 | Acc: 0.9279 | Prec: 0.9767 | Recall: 0.9449 | F1: 0.9605 | ROC-AUC: 0.9236
Max val ROC-AUC: 0.9500
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0228 | Acc: 0.9929 | Prec: 0.9934 | Recall: 0.9913 | F1: 0.9924 | ROC-AUC: 0.9993 || Test Loss: 0.2536 | Acc: 0.9442 | Prec: 0.9820 | Recall: 0.9574 | F1: 0.9695 | ROC-AUC: 0.9067
Max val ROC-AUC: 0.9610
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0301 | Acc: 0.9926 | Prec: 0.9949 | Recall: 0.9891 | F1: 0.9920 | ROC-AUC: 0.9989 || Test Loss: 0.5762 | Acc: 0.9209 | Prec: 0.9740 | Recall: 0.9398 | F1: 0.9566 | ROC-AUC: 0.9195
Max val ROC-AUC: 0.9552
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0430 | Acc: 0.9885 | Prec: 0.9891 | Recall: 0.9862 | F1: 0.9876 | ROC-AUC: 0.9976 || Test Loss: 0.3863 | Acc: 0.9256 | Prec: 0.9842 | Recall: 0.9348 | F1: 0.9589 | ROC-AUC: 0.9235
Max val ROC-AUC: 0.9625
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0390 | Acc: 0.9902 | Prec: 0.9963 | Recall: 0.9826 | F1: 0.9894 | ROC-AUC: 0.9977 || Test Loss: 0.4673 | Acc: 0.9209 | Prec: 0.9790 | Recall: 0.9348 | F1: 0.9564 | ROC-AUC: 0.9216
Max val ROC-AUC: 0.9478
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0401 | Acc: 0.9889 | Prec: 0.9905 | Recall: 0.9855 | F1: 0.9880 | ROC-AUC: 0.9978 || Test Loss: 0.4250 | Acc: 0.9256 | Prec: 0.9766 | Recall: 0.9424 | F1: 0.9592 | ROC-AUC: 0.9193
Max val ROC-AUC: 0.9413
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0652 | Acc: 0.9798 | Prec: 0.9845 | Recall: 0.9717 | F1: 0.9781 | ROC-AUC: 0.9962 || Test Loss: 0.3349 | Acc: 0.9256 | Prec: 0.9816 | Recall: 0.9373 | F1: 0.9590 | ROC-AUC: 0.9107
Max val ROC-AUC: 0.9552
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0750 | Acc: 0.9811 | Prec: 0.9889 | Recall: 0.9702 | F1: 0.9795 | ROC-AUC: 0.9937 || Test Loss: 0.3071 | Acc: 0.9256 | Prec: 0.9867 | Recall: 0.9323 | F1: 0.9588 | ROC-AUC: 0.9369
Max val ROC-AUC: 0.9663
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0598 | Acc: 0.9825 | Prec: 0.9861 | Recall: 0.9760 | F1: 0.9810 | ROC-AUC: 0.9971 || Test Loss: 0.3553 | Acc: 0.9186 | Prec: 0.9815 | Recall: 0.9298 | F1: 0.9550 | ROC-AUC: 0.9125
Max val ROC-AUC: 0.9456
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0622 | Acc: 0.9791 | Prec: 0.9817 | Recall: 0.9731 | F1: 0.9774 | ROC-AUC: 0.9967 || Test Loss: 0.3522 | Acc: 0.9209 | Prec: 0.9790 | Recall: 0.9348 | F1: 0.9564 | ROC-AUC: 0.9238
Max val ROC-AUC: 0.9550
Training with d_model=64, nhead=4, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0345 | Acc: 0.9916 | Prec: 0.9942 | Recall: 0.9877 | F1: 0.9909 | ROC-AUC: 0.9988 || Test Loss: 0.2325 | Acc: 0.9326 | Prec: 0.9818 | Recall: 0.9449 | F1: 0.9630 | ROC-AUC: 0.9365
Max val ROC-AUC: 0.9532
Training with d_model=64, nhead=4, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0264 | Acc: 0.9943 | Prec: 0.9935 | Recall: 0.9942 | F1: 0.9938 | ROC-AUC: 0.9990 || Test Loss: 0.4738 | Acc: 0.9372 | Prec: 0.9769 | Recall: 0.9549 | F1: 0.9658 | ROC-AUC: 0.9162
Max val ROC-AUC: 0.9475
Training with d_model=64, nhead=4, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0339 | Acc: 0.9899 | Prec: 0.9927 | Recall: 0.9855 | F1: 0.9891 | ROC-AUC: 0.9986 || Test Loss: 0.3318 | Acc: 0.9372 | Prec: 0.9794 | Recall: 0.9524 | F1: 0.9657 | ROC-AUC: 0.9235
Max val ROC-AUC: 0.9572
Training with d_model=64, nhead=4, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0115 | Acc: 0.9993 | Prec: 0.9993 | Recall: 0.9993 | F1: 0.9993 | ROC-AUC: 0.9995 || Test Loss: 0.4184 | Acc: 0.9372 | Prec: 0.9769 | Recall: 0.9549 | F1: 0.9658 | ROC-AUC: 0.9453
Max val ROC-AUC: 0.9646
Training with d_model=64, nhead=4, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0319 | Acc: 0.9906 | Prec: 0.9913 | Recall: 0.9884 | F1: 0.9898 | ROC-AUC: 0.9991 || Test Loss: 0.2923 | Acc: 0.9256 | Prec: 0.9791 | Recall: 0.9398 | F1: 0.9591 | ROC-AUC: 0.9289
Max val ROC-AUC: 0.9594
Training with d_model=64, nhead=4, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0308 | Acc: 0.9916 | Prec: 0.9906 | Recall: 0.9913 | F1: 0.9909 | ROC-AUC: 0.9994 || Test Loss: 0.2720 | Acc: 0.9326 | Prec: 0.9843 | Recall: 0.9424 | F1: 0.9629 | ROC-AUC: 0.9445
Max val ROC-AUC: 0.9544
Training with d_model=64, nhead=4, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0348 | Acc: 0.9892 | Prec: 0.9877 | Recall: 0.9891 | F1: 0.9884 | ROC-AUC: 0.9990 || Test Loss: 0.2849 | Acc: 0.9395 | Prec: 0.9819 | Recall: 0.9524 | F1: 0.9669 | ROC-AUC: 0.9476
Max val ROC-AUC: 0.9615
Training with d_model=64, nhead=4, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0242 | Acc: 0.9929 | Prec: 0.9927 | Recall: 0.9920 | F1: 0.9924 | ROC-AUC: 0.9996 || Test Loss: 0.2731 | Acc: 0.9372 | Prec: 0.9769 | Recall: 0.9549 | F1: 0.9658 | ROC-AUC: 0.9521
Max val ROC-AUC: 0.9521
Training with d_model=64, nhead=4, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0245 | Acc: 0.9912 | Prec: 0.9963 | Recall: 0.9847 | F1: 0.9905 | ROC-AUC: 0.9996 || Test Loss: 0.4600 | Acc: 0.9093 | Prec: 0.9787 | Recall: 0.9223 | F1: 0.9497 | ROC-AUC: 0.9175
Max val ROC-AUC: 0.9569
Training with d_model=64, nhead=4, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0328 | Acc: 0.9912 | Prec: 0.9956 | Recall: 0.9855 | F1: 0.9905 | ROC-AUC: 0.9989 || Test Loss: 0.6615 | Acc: 0.9000 | Prec: 0.9785 | Recall: 0.9123 | F1: 0.9442 | ROC-AUC: 0.9034
Max val ROC-AUC: 0.9507
Training with d_model=64, nhead=4, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0406 | Acc: 0.9899 | Prec: 0.9884 | Recall: 0.9898 | F1: 0.9891 | ROC-AUC: 0.9981 || Test Loss: 0.4017 | Acc: 0.9279 | Prec: 0.9767 | Recall: 0.9449 | F1: 0.9605 | ROC-AUC: 0.9305
Max val ROC-AUC: 0.9542
Training with d_model=64, nhead=4, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0278 | Acc: 0.9919 | Prec: 0.9934 | Recall: 0.9891 | F1: 0.9913 | ROC-AUC: 0.9993 || Test Loss: 0.4765 | Acc: 0.9442 | Prec: 0.9771 | Recall: 0.9624 | F1: 0.9697 | ROC-AUC: 0.9207
Max val ROC-AUC: 0.9584
Training with d_model=64, nhead=4, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0813 | Acc: 0.9724 | Prec: 0.9765 | Recall: 0.9637 | F1: 0.9700 | ROC-AUC: 0.9952 || Test Loss: 0.2487 | Acc: 0.9279 | Prec: 0.9842 | Recall: 0.9373 | F1: 0.9602 | ROC-AUC: 0.9410
Max val ROC-AUC: 0.9719
Training with d_model=64, nhead=4, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0713 | Acc: 0.9761 | Prec: 0.9801 | Recall: 0.9680 | F1: 0.9741 | ROC-AUC: 0.9964 || Test Loss: 0.3068 | Acc: 0.8953 | Prec: 0.9836 | Recall: 0.9023 | F1: 0.9412 | ROC-AUC: 0.9378
Max val ROC-AUC: 0.9591
Training with d_model=64, nhead=4, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0858 | Acc: 0.9734 | Prec: 0.9758 | Recall: 0.9666 | F1: 0.9712 | ROC-AUC: 0.9936 || Test Loss: 0.2764 | Acc: 0.9209 | Prec: 0.9867 | Recall: 0.9273 | F1: 0.9561 | ROC-AUC: 0.9394
Max val ROC-AUC: 0.9581
Training with d_model=64, nhead=4, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0657 | Acc: 0.9805 | Prec: 0.9860 | Recall: 0.9717 | F1: 0.9788 | ROC-AUC: 0.9964 || Test Loss: 0.3075 | Acc: 0.9093 | Prec: 0.9813 | Recall: 0.9198 | F1: 0.9495 | ROC-AUC: 0.9406
Max val ROC-AUC: 0.9610
Training with d_model=64, nhead=4, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0140 | Acc: 0.9956 | Prec: 0.9964 | Recall: 0.9942 | F1: 0.9953 | ROC-AUC: 0.9999 || Test Loss: 0.4644 | Acc: 0.9419 | Prec: 0.9746 | Recall: 0.9624 | F1: 0.9685 | ROC-AUC: 0.9086
Max val ROC-AUC: 0.9432
Training with d_model=64, nhead=4, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0217 | Acc: 0.9939 | Prec: 0.9949 | Recall: 0.9920 | F1: 0.9935 | ROC-AUC: 0.9995 || Test Loss: 0.3783 | Acc: 0.9279 | Prec: 0.9817 | Recall: 0.9398 | F1: 0.9603 | ROC-AUC: 0.9252
Max val ROC-AUC: 0.9343
Training with d_model=64, nhead=4, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0469 | Acc: 0.9855 | Prec: 0.9854 | Recall: 0.9833 | F1: 0.9844 | ROC-AUC: 0.9979 || Test Loss: 0.2029 | Acc: 0.9372 | Prec: 0.9721 | Recall: 0.9599 | F1: 0.9660 | ROC-AUC: 0.9442
Max val ROC-AUC: 0.9572
Training with d_model=64, nhead=4, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0362 | Acc: 0.9892 | Prec: 0.9927 | Recall: 0.9840 | F1: 0.9883 | ROC-AUC: 0.9988 || Test Loss: 0.3750 | Acc: 0.9163 | Prec: 0.9866 | Recall: 0.9223 | F1: 0.9534 | ROC-AUC: 0.9390
Max val ROC-AUC: 0.9566
Training with d_model=64, nhead=4, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0329 | Acc: 0.9906 | Prec: 0.9934 | Recall: 0.9862 | F1: 0.9898 | ROC-AUC: 0.9986 || Test Loss: 0.3489 | Acc: 0.9256 | Prec: 0.9791 | Recall: 0.9398 | F1: 0.9591 | ROC-AUC: 0.9317
Max val ROC-AUC: 0.9637
Training with d_model=64, nhead=4, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0231 | Acc: 0.9929 | Prec: 0.9927 | Recall: 0.9920 | F1: 0.9924 | ROC-AUC: 0.9997 || Test Loss: 0.3174 | Acc: 0.9372 | Prec: 0.9819 | Recall: 0.9499 | F1: 0.9656 | ROC-AUC: 0.9232
Max val ROC-AUC: 0.9569
Training with d_model=64, nhead=4, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0279 | Acc: 0.9916 | Prec: 0.9927 | Recall: 0.9891 | F1: 0.9909 | ROC-AUC: 0.9989 || Test Loss: 0.2973 | Acc: 0.9326 | Prec: 0.9818 | Recall: 0.9449 | F1: 0.9630 | ROC-AUC: 0.9508
Max val ROC-AUC: 0.9605
Training with d_model=64, nhead=4, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0169 | Acc: 0.9970 | Prec: 0.9985 | Recall: 0.9949 | F1: 0.9967 | ROC-AUC: 0.9995 || Test Loss: 0.2858 | Acc: 0.9349 | Prec: 0.9744 | Recall: 0.9549 | F1: 0.9646 | ROC-AUC: 0.9190
Max val ROC-AUC: 0.9631
Training with d_model=64, nhead=4, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0422 | Acc: 0.9862 | Prec: 0.9926 | Recall: 0.9775 | F1: 0.9850 | ROC-AUC: 0.9984 || Test Loss: 0.4622 | Acc: 0.9163 | Prec: 0.9866 | Recall: 0.9223 | F1: 0.9534 | ROC-AUC: 0.9341
Max val ROC-AUC: 0.9377
Training with d_model=64, nhead=4, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0396 | Acc: 0.9899 | Prec: 0.9920 | Recall: 0.9862 | F1: 0.9891 | ROC-AUC: 0.9981 || Test Loss: 0.4954 | Acc: 0.9186 | Prec: 0.9840 | Recall: 0.9273 | F1: 0.9548 | ROC-AUC: 0.9299
Max val ROC-AUC: 0.9534
Training with d_model=64, nhead=4, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0471 | Acc: 0.9859 | Prec: 0.9855 | Recall: 0.9840 | F1: 0.9847 | ROC-AUC: 0.9979 || Test Loss: 0.3903 | Acc: 0.9163 | Prec: 0.9764 | Recall: 0.9323 | F1: 0.9538 | ROC-AUC: 0.9063
Max val ROC-AUC: 0.9501
Training with d_model=64, nhead=4, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0547 | Acc: 0.9791 | Prec: 0.9874 | Recall: 0.9673 | F1: 0.9773 | ROC-AUC: 0.9976 || Test Loss: 0.5390 | Acc: 0.8930 | Prec: 0.9862 | Recall: 0.8972 | F1: 0.9396 | ROC-AUC: 0.9118
Max val ROC-AUC: 0.9443
Training with d_model=64, nhead=4, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0677 | Acc: 0.9781 | Prec: 0.9838 | Recall: 0.9688 | F1: 0.9762 | ROC-AUC: 0.9955 || Test Loss: 0.3284 | Acc: 0.9093 | Prec: 0.9839 | Recall: 0.9173 | F1: 0.9494 | ROC-AUC: 0.9329
Max val ROC-AUC: 0.9489
Training with d_model=64, nhead=4, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0700 | Acc: 0.9815 | Prec: 0.9867 | Recall: 0.9731 | F1: 0.9799 | ROC-AUC: 0.9939 || Test Loss: 0.3136 | Acc: 0.9116 | Prec: 0.9839 | Recall: 0.9198 | F1: 0.9508 | ROC-AUC: 0.9293
Max val ROC-AUC: 0.9547
Training with d_model=64, nhead=4, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0701 | Acc: 0.9788 | Prec: 0.9874 | Recall: 0.9666 | F1: 0.9769 | ROC-AUC: 0.9947 || Test Loss: 0.2947 | Acc: 0.9186 | Prec: 0.9789 | Recall: 0.9323 | F1: 0.9551 | ROC-AUC: 0.9306
Max val ROC-AUC: 0.9569
Training with d_model=64, nhead=4, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0628 | Acc: 0.9821 | Prec: 0.9889 | Recall: 0.9724 | F1: 0.9806 | ROC-AUC: 0.9957 || Test Loss: 0.4006 | Acc: 0.8953 | Prec: 0.9836 | Recall: 0.9023 | F1: 0.9412 | ROC-AUC: 0.9286
Max val ROC-AUC: 0.9325
Training with d_model=64, nhead=4, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0255 | Acc: 0.9912 | Prec: 0.9927 | Recall: 0.9884 | F1: 0.9905 | ROC-AUC: 0.9988 || Test Loss: 0.4251 | Acc: 0.9326 | Prec: 0.9818 | Recall: 0.9449 | F1: 0.9630 | ROC-AUC: 0.9268
Max val ROC-AUC: 0.9485
Training with d_model=64, nhead=4, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0360 | Acc: 0.9882 | Prec: 0.9912 | Recall: 0.9833 | F1: 0.9872 | ROC-AUC: 0.9984 || Test Loss: 0.3731 | Acc: 0.9279 | Prec: 0.9792 | Recall: 0.9424 | F1: 0.9604 | ROC-AUC: 0.9259
Max val ROC-AUC: 0.9534
Training with d_model=64, nhead=4, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0373 | Acc: 0.9882 | Prec: 0.9948 | Recall: 0.9797 | F1: 0.9872 | ROC-AUC: 0.9988 || Test Loss: 0.3736 | Acc: 0.9256 | Prec: 0.9766 | Recall: 0.9424 | F1: 0.9592 | ROC-AUC: 0.9344
Max val ROC-AUC: 0.9590
Training with d_model=64, nhead=4, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0519 | Acc: 0.9832 | Prec: 0.9875 | Recall: 0.9760 | F1: 0.9817 | ROC-AUC: 0.9972 || Test Loss: 0.2363 | Acc: 0.9395 | Prec: 0.9746 | Recall: 0.9599 | F1: 0.9672 | ROC-AUC: 0.9251
Max val ROC-AUC: 0.9457
Training with d_model=64, nhead=4, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0440 | Acc: 0.9882 | Prec: 0.9919 | Recall: 0.9826 | F1: 0.9872 | ROC-AUC: 0.9977 || Test Loss: 0.2796 | Acc: 0.9349 | Prec: 0.9793 | Recall: 0.9499 | F1: 0.9644 | ROC-AUC: 0.9274
Max val ROC-AUC: 0.9592
Training with d_model=64, nhead=4, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0323 | Acc: 0.9926 | Prec: 0.9927 | Recall: 0.9913 | F1: 0.9920 | ROC-AUC: 0.9984 || Test Loss: 0.2715 | Acc: 0.9442 | Prec: 0.9795 | Recall: 0.9599 | F1: 0.9696 | ROC-AUC: 0.9092
Max val ROC-AUC: 0.9661
Training with d_model=64, nhead=4, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0281 | Acc: 0.9929 | Prec: 0.9956 | Recall: 0.9891 | F1: 0.9923 | ROC-AUC: 0.9987 || Test Loss: 0.3640 | Acc: 0.9140 | Prec: 0.9840 | Recall: 0.9223 | F1: 0.9521 | ROC-AUC: 0.8767
Max val ROC-AUC: 0.9531
Training with d_model=64, nhead=4, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0236 | Acc: 0.9943 | Prec: 0.9949 | Recall: 0.9927 | F1: 0.9938 | ROC-AUC: 0.9992 || Test Loss: 0.3406 | Acc: 0.9279 | Prec: 0.9767 | Recall: 0.9449 | F1: 0.9605 | ROC-AUC: 0.9314
Max val ROC-AUC: 0.9447
Training with d_model=64, nhead=4, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0472 | Acc: 0.9872 | Prec: 0.9898 | Recall: 0.9826 | F1: 0.9862 | ROC-AUC: 0.9978 || Test Loss: 0.4267 | Acc: 0.9279 | Prec: 0.9817 | Recall: 0.9398 | F1: 0.9603 | ROC-AUC: 0.9439
Max val ROC-AUC: 0.9531
Training with d_model=64, nhead=4, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0289 | Acc: 0.9916 | Prec: 0.9942 | Recall: 0.9877 | F1: 0.9909 | ROC-AUC: 0.9988 || Test Loss: 0.5687 | Acc: 0.9209 | Prec: 0.9790 | Recall: 0.9348 | F1: 0.9564 | ROC-AUC: 0.9426
Max val ROC-AUC: 0.9663
Training with d_model=64, nhead=4, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0385 | Acc: 0.9919 | Prec: 0.9942 | Recall: 0.9884 | F1: 0.9913 | ROC-AUC: 0.9979 || Test Loss: 0.3421 | Acc: 0.9233 | Prec: 0.9766 | Recall: 0.9398 | F1: 0.9579 | ROC-AUC: 0.9381
Max val ROC-AUC: 0.9487
Training with d_model=64, nhead=4, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0444 | Acc: 0.9869 | Prec: 0.9905 | Recall: 0.9811 | F1: 0.9858 | ROC-AUC: 0.9978 || Test Loss: 0.4390 | Acc: 0.9093 | Prec: 0.9762 | Recall: 0.9248 | F1: 0.9498 | ROC-AUC: 0.9057
Max val ROC-AUC: 0.9574
Training with d_model=64, nhead=4, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0589 | Acc: 0.9845 | Prec: 0.9911 | Recall: 0.9753 | F1: 0.9832 | ROC-AUC: 0.9953 || Test Loss: 0.3403 | Acc: 0.9163 | Prec: 0.9840 | Recall: 0.9248 | F1: 0.9535 | ROC-AUC: 0.9376
Max val ROC-AUC: 0.9508
Training with d_model=64, nhead=4, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0762 | Acc: 0.9788 | Prec: 0.9838 | Recall: 0.9702 | F1: 0.9770 | ROC-AUC: 0.9938 || Test Loss: 0.3902 | Acc: 0.8930 | Prec: 0.9809 | Recall: 0.9023 | F1: 0.9399 | ROC-AUC: 0.9270
Max val ROC-AUC: 0.9428
Training with d_model=64, nhead=4, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0779 | Acc: 0.9771 | Prec: 0.9788 | Recall: 0.9717 | F1: 0.9752 | ROC-AUC: 0.9945 || Test Loss: 0.3509 | Acc: 0.9186 | Prec: 0.9840 | Recall: 0.9273 | F1: 0.9548 | ROC-AUC: 0.8852
Max val ROC-AUC: 0.9592
Training with d_model=64, nhead=4, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0653 | Acc: 0.9815 | Prec: 0.9846 | Recall: 0.9753 | F1: 0.9799 | ROC-AUC: 0.9960 || Test Loss: 0.3643 | Acc: 0.9186 | Prec: 0.9866 | Recall: 0.9248 | F1: 0.9547 | ROC-AUC: 0.9205
Max val ROC-AUC: 0.9605
Training with d_model=128, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0402 | Acc: 0.9875 | Prec: 0.9912 | Recall: 0.9818 | F1: 0.9865 | ROC-AUC: 0.9983 || Test Loss: 0.4128 | Acc: 0.9209 | Prec: 0.9716 | Recall: 0.9424 | F1: 0.9567 | ROC-AUC: 0.9100
Max val ROC-AUC: 0.9541
Training with d_model=128, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0350 | Acc: 0.9923 | Prec: 0.9934 | Recall: 0.9898 | F1: 0.9916 | ROC-AUC: 0.9982 || Test Loss: 0.3666 | Acc: 0.9302 | Prec: 0.9792 | Recall: 0.9449 | F1: 0.9617 | ROC-AUC: 0.9258
Max val ROC-AUC: 0.9567
Training with d_model=128, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0644 | Acc: 0.9811 | Prec: 0.9875 | Recall: 0.9717 | F1: 0.9795 | ROC-AUC: 0.9959 || Test Loss: 0.2949 | Acc: 0.9372 | Prec: 0.9819 | Recall: 0.9499 | F1: 0.9656 | ROC-AUC: 0.9431
Max val ROC-AUC: 0.9592
Training with d_model=128, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0429 | Acc: 0.9848 | Prec: 0.9890 | Recall: 0.9782 | F1: 0.9836 | ROC-AUC: 0.9988 || Test Loss: 0.3200 | Acc: 0.9302 | Prec: 0.9817 | Recall: 0.9424 | F1: 0.9616 | ROC-AUC: 0.9415
Max val ROC-AUC: 0.9671
Training with d_model=128, nhead=2, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0226 | Acc: 0.9936 | Prec: 0.9942 | Recall: 0.9920 | F1: 0.9931 | ROC-AUC: 0.9996 || Test Loss: 0.2628 | Acc: 0.9372 | Prec: 0.9844 | Recall: 0.9474 | F1: 0.9655 | ROC-AUC: 0.9213
Max val ROC-AUC: 0.9671
Training with d_model=128, nhead=2, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0182 | Acc: 0.9949 | Prec: 0.9956 | Recall: 0.9935 | F1: 0.9945 | ROC-AUC: 0.9996 || Test Loss: 0.2799 | Acc: 0.9349 | Prec: 0.9720 | Recall: 0.9574 | F1: 0.9646 | ROC-AUC: 0.9310
Max val ROC-AUC: 0.9576
Training with d_model=128, nhead=2, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0173 | Acc: 0.9963 | Prec: 0.9971 | Recall: 0.9949 | F1: 0.9960 | ROC-AUC: 0.9999 || Test Loss: 0.2583 | Acc: 0.9488 | Prec: 0.9796 | Recall: 0.9649 | F1: 0.9722 | ROC-AUC: 0.9116
Max val ROC-AUC: 0.9544
Training with d_model=128, nhead=2, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0242 | Acc: 0.9939 | Prec: 0.9949 | Recall: 0.9920 | F1: 0.9935 | ROC-AUC: 0.9994 || Test Loss: 0.2438 | Acc: 0.9512 | Prec: 0.9821 | Recall: 0.9649 | F1: 0.9735 | ROC-AUC: 0.9172
Max val ROC-AUC: 0.9627
Training with d_model=128, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0322 | Acc: 0.9896 | Prec: 0.9898 | Recall: 0.9877 | F1: 0.9887 | ROC-AUC: 0.9991 || Test Loss: 0.6610 | Acc: 0.9070 | Prec: 0.9838 | Recall: 0.9148 | F1: 0.9481 | ROC-AUC: 0.9164
Max val ROC-AUC: 0.9480
Training with d_model=128, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0344 | Acc: 0.9889 | Prec: 0.9934 | Recall: 0.9826 | F1: 0.9880 | ROC-AUC: 0.9987 || Test Loss: 0.5257 | Acc: 0.9070 | Prec: 0.9761 | Recall: 0.9223 | F1: 0.9485 | ROC-AUC: 0.9318
Max val ROC-AUC: 0.9504
Training with d_model=128, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0302 | Acc: 0.9909 | Prec: 0.9941 | Recall: 0.9862 | F1: 0.9902 | ROC-AUC: 0.9988 || Test Loss: 0.5130 | Acc: 0.9209 | Prec: 0.9740 | Recall: 0.9398 | F1: 0.9566 | ROC-AUC: 0.9000
Max val ROC-AUC: 0.9584
Training with d_model=128, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0634 | Acc: 0.9821 | Prec: 0.9926 | Recall: 0.9688 | F1: 0.9805 | ROC-AUC: 0.9960 || Test Loss: 0.4168 | Acc: 0.9070 | Prec: 0.9864 | Recall: 0.9123 | F1: 0.9479 | ROC-AUC: 0.9521
Max val ROC-AUC: 0.9609
Training with d_model=128, nhead=2, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0489 | Acc: 0.9838 | Prec: 0.9854 | Recall: 0.9797 | F1: 0.9825 | ROC-AUC: 0.9981 || Test Loss: 0.2706 | Acc: 0.9395 | Prec: 0.9819 | Recall: 0.9524 | F1: 0.9669 | ROC-AUC: 0.9403
Max val ROC-AUC: 0.9660
Training with d_model=128, nhead=2, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0440 | Acc: 0.9869 | Prec: 0.9876 | Recall: 0.9840 | F1: 0.9858 | ROC-AUC: 0.9982 || Test Loss: 0.2543 | Acc: 0.9395 | Prec: 0.9819 | Recall: 0.9524 | F1: 0.9669 | ROC-AUC: 0.9374
Max val ROC-AUC: 0.9640
Training with d_model=128, nhead=2, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0453 | Acc: 0.9879 | Prec: 0.9912 | Recall: 0.9826 | F1: 0.9869 | ROC-AUC: 0.9982 || Test Loss: 0.3223 | Acc: 0.9279 | Prec: 0.9792 | Recall: 0.9424 | F1: 0.9604 | ROC-AUC: 0.9045
Max val ROC-AUC: 0.9516
Training with d_model=128, nhead=2, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0588 | Acc: 0.9805 | Prec: 0.9839 | Recall: 0.9739 | F1: 0.9788 | ROC-AUC: 0.9974 || Test Loss: 0.3099 | Acc: 0.9233 | Prec: 0.9816 | Recall: 0.9348 | F1: 0.9576 | ROC-AUC: 0.9156
Max val ROC-AUC: 0.9550
Training with d_model=128, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0267 | Acc: 0.9929 | Prec: 0.9956 | Recall: 0.9891 | F1: 0.9923 | ROC-AUC: 0.9988 || Test Loss: 0.3983 | Acc: 0.9256 | Prec: 0.9816 | Recall: 0.9373 | F1: 0.9590 | ROC-AUC: 0.9286
Max val ROC-AUC: 0.9441
Training with d_model=128, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0346 | Acc: 0.9896 | Prec: 0.9912 | Recall: 0.9862 | F1: 0.9887 | ROC-AUC: 0.9988 || Test Loss: 0.4051 | Acc: 0.9326 | Prec: 0.9744 | Recall: 0.9524 | F1: 0.9632 | ROC-AUC: 0.9201
Max val ROC-AUC: 0.9326
Training with d_model=128, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0288 | Acc: 0.9943 | Prec: 0.9956 | Recall: 0.9920 | F1: 0.9938 | ROC-AUC: 0.9985 || Test Loss: 0.2958 | Acc: 0.9395 | Prec: 0.9794 | Recall: 0.9549 | F1: 0.9670 | ROC-AUC: 0.9059
Max val ROC-AUC: 0.9614
Training with d_model=128, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0182 | Acc: 0.9936 | Prec: 0.9927 | Recall: 0.9935 | F1: 0.9931 | ROC-AUC: 0.9998 || Test Loss: 0.3560 | Acc: 0.9349 | Prec: 0.9793 | Recall: 0.9499 | F1: 0.9644 | ROC-AUC: 0.9180
Max val ROC-AUC: 0.9481
Training with d_model=128, nhead=2, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0158 | Acc: 0.9976 | Prec: 0.9993 | Recall: 0.9956 | F1: 0.9975 | ROC-AUC: 0.9996 || Test Loss: 0.2800 | Acc: 0.9395 | Prec: 0.9819 | Recall: 0.9524 | F1: 0.9669 | ROC-AUC: 0.9294
Max val ROC-AUC: 0.9605
Training with d_model=128, nhead=2, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0212 | Acc: 0.9943 | Prec: 0.9949 | Recall: 0.9927 | F1: 0.9938 | ROC-AUC: 0.9996 || Test Loss: 0.4229 | Acc: 0.9163 | Prec: 0.9840 | Recall: 0.9248 | F1: 0.9535 | ROC-AUC: 0.9559
Max val ROC-AUC: 0.9559
Training with d_model=128, nhead=2, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0213 | Acc: 0.9960 | Prec: 0.9978 | Recall: 0.9935 | F1: 0.9956 | ROC-AUC: 0.9989 || Test Loss: 0.3487 | Acc: 0.9372 | Prec: 0.9819 | Recall: 0.9499 | F1: 0.9656 | ROC-AUC: 0.9269
Max val ROC-AUC: 0.9399
Training with d_model=128, nhead=2, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0116 | Acc: 0.9976 | Prec: 0.9978 | Recall: 0.9971 | F1: 0.9975 | ROC-AUC: 0.9998 || Test Loss: 0.3230 | Acc: 0.9419 | Prec: 0.9770 | Recall: 0.9599 | F1: 0.9684 | ROC-AUC: 0.9229
Max val ROC-AUC: 0.9500
Training with d_model=128, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0512 | Acc: 0.9855 | Prec: 0.9876 | Recall: 0.9811 | F1: 0.9843 | ROC-AUC: 0.9979 || Test Loss: 0.5726 | Acc: 0.9070 | Prec: 0.9812 | Recall: 0.9173 | F1: 0.9482 | ROC-AUC: 0.9060
Max val ROC-AUC: 0.9483
Training with d_model=128, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0594 | Acc: 0.9865 | Prec: 0.9876 | Recall: 0.9833 | F1: 0.9854 | ROC-AUC: 0.9964 || Test Loss: 0.6353 | Acc: 0.8930 | Prec: 0.9862 | Recall: 0.8972 | F1: 0.9396 | ROC-AUC: 0.9292
Max val ROC-AUC: 0.9503
Training with d_model=128, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0784 | Acc: 0.9774 | Prec: 0.9859 | Recall: 0.9651 | F1: 0.9754 | ROC-AUC: 0.9946 || Test Loss: 0.5131 | Acc: 0.8930 | Prec: 0.9809 | Recall: 0.9023 | F1: 0.9399 | ROC-AUC: 0.9152
Max val ROC-AUC: 0.9590
Training with d_model=128, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0418 | Acc: 0.9889 | Prec: 0.9905 | Recall: 0.9855 | F1: 0.9880 | ROC-AUC: 0.9971 || Test Loss: 0.4623 | Acc: 0.9070 | Prec: 0.9761 | Recall: 0.9223 | F1: 0.9485 | ROC-AUC: 0.9180
Max val ROC-AUC: 0.9450
Training with d_model=128, nhead=2, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0466 | Acc: 0.9865 | Prec: 0.9912 | Recall: 0.9797 | F1: 0.9854 | ROC-AUC: 0.9973 || Test Loss: 0.3441 | Acc: 0.9140 | Prec: 0.9840 | Recall: 0.9223 | F1: 0.9521 | ROC-AUC: 0.9433
Max val ROC-AUC: 0.9588
Training with d_model=128, nhead=2, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0511 | Acc: 0.9835 | Prec: 0.9890 | Recall: 0.9753 | F1: 0.9821 | ROC-AUC: 0.9978 || Test Loss: 0.3085 | Acc: 0.9372 | Prec: 0.9819 | Recall: 0.9499 | F1: 0.9656 | ROC-AUC: 0.9317
Max val ROC-AUC: 0.9390
Training with d_model=128, nhead=2, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0453 | Acc: 0.9885 | Prec: 0.9891 | Recall: 0.9862 | F1: 0.9876 | ROC-AUC: 0.9971 || Test Loss: 0.2894 | Acc: 0.9442 | Prec: 0.9845 | Recall: 0.9549 | F1: 0.9695 | ROC-AUC: 0.9187
Max val ROC-AUC: 0.9647
Training with d_model=128, nhead=2, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0361 | Acc: 0.9906 | Prec: 0.9941 | Recall: 0.9855 | F1: 0.9898 | ROC-AUC: 0.9991 || Test Loss: 0.4087 | Acc: 0.9070 | Prec: 0.9838 | Recall: 0.9148 | F1: 0.9481 | ROC-AUC: 0.9459
Max val ROC-AUC: 0.9521
Training with d_model=128, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0465 | Acc: 0.9879 | Prec: 0.9934 | Recall: 0.9804 | F1: 0.9868 | ROC-AUC: 0.9963 || Test Loss: 0.4500 | Acc: 0.9163 | Prec: 0.9814 | Recall: 0.9273 | F1: 0.9536 | ROC-AUC: 0.9087
Max val ROC-AUC: 0.9527
Training with d_model=128, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0385 | Acc: 0.9875 | Prec: 0.9919 | Recall: 0.9811 | F1: 0.9865 | ROC-AUC: 0.9988 || Test Loss: 0.4035 | Acc: 0.9140 | Prec: 0.9866 | Recall: 0.9198 | F1: 0.9520 | ROC-AUC: 0.9365
Max val ROC-AUC: 0.9677
Training with d_model=128, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0269 | Acc: 0.9923 | Prec: 0.9956 | Recall: 0.9877 | F1: 0.9916 | ROC-AUC: 0.9990 || Test Loss: 0.4073 | Acc: 0.9302 | Prec: 0.9817 | Recall: 0.9424 | F1: 0.9616 | ROC-AUC: 0.9031
Max val ROC-AUC: 0.9500
Training with d_model=128, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0412 | Acc: 0.9899 | Prec: 0.9920 | Recall: 0.9862 | F1: 0.9891 | ROC-AUC: 0.9975 || Test Loss: 0.2944 | Acc: 0.9256 | Prec: 0.9842 | Recall: 0.9348 | F1: 0.9589 | ROC-AUC: 0.9458
Max val ROC-AUC: 0.9586
Training with d_model=128, nhead=2, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0120 | Acc: 0.9976 | Prec: 0.9971 | Recall: 0.9978 | F1: 0.9975 | ROC-AUC: 0.9999 || Test Loss: 0.3588 | Acc: 0.9279 | Prec: 0.9817 | Recall: 0.9398 | F1: 0.9603 | ROC-AUC: 0.9378
Max val ROC-AUC: 0.9606
Training with d_model=128, nhead=2, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0195 | Acc: 0.9946 | Prec: 0.9964 | Recall: 0.9920 | F1: 0.9942 | ROC-AUC: 0.9995 || Test Loss: 0.3191 | Acc: 0.9302 | Prec: 0.9843 | Recall: 0.9398 | F1: 0.9615 | ROC-AUC: 0.9560
Max val ROC-AUC: 0.9617
Training with d_model=128, nhead=2, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0171 | Acc: 0.9946 | Prec: 0.9956 | Recall: 0.9927 | F1: 0.9942 | ROC-AUC: 0.9994 || Test Loss: 0.4167 | Acc: 0.9186 | Prec: 0.9764 | Recall: 0.9348 | F1: 0.9552 | ROC-AUC: 0.9371
Max val ROC-AUC: 0.9529
Training with d_model=128, nhead=2, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0122 | Acc: 0.9976 | Prec: 0.9985 | Recall: 0.9964 | F1: 0.9975 | ROC-AUC: 0.9997 || Test Loss: 0.2973 | Acc: 0.9442 | Prec: 0.9771 | Recall: 0.9624 | F1: 0.9697 | ROC-AUC: 0.9251
Max val ROC-AUC: 0.9525
Training with d_model=128, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0556 | Acc: 0.9832 | Prec: 0.9904 | Recall: 0.9731 | F1: 0.9817 | ROC-AUC: 0.9967 || Test Loss: 0.5005 | Acc: 0.8907 | Prec: 0.9889 | Recall: 0.8922 | F1: 0.9381 | ROC-AUC: 0.9465
Max val ROC-AUC: 0.9499
Training with d_model=128, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0591 | Acc: 0.9859 | Prec: 0.9890 | Recall: 0.9804 | F1: 0.9847 | ROC-AUC: 0.9958 || Test Loss: 0.3998 | Acc: 0.9256 | Prec: 0.9791 | Recall: 0.9398 | F1: 0.9591 | ROC-AUC: 0.9375
Max val ROC-AUC: 0.9538
Training with d_model=128, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0463 | Acc: 0.9852 | Prec: 0.9926 | Recall: 0.9753 | F1: 0.9839 | ROC-AUC: 0.9980 || Test Loss: 0.4756 | Acc: 0.9186 | Prec: 0.9840 | Recall: 0.9273 | F1: 0.9548 | ROC-AUC: 0.9225
Max val ROC-AUC: 0.9492
Training with d_model=128, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0524 | Acc: 0.9862 | Prec: 0.9926 | Recall: 0.9775 | F1: 0.9850 | ROC-AUC: 0.9969 || Test Loss: 0.4230 | Acc: 0.9093 | Prec: 0.9865 | Recall: 0.9148 | F1: 0.9493 | ROC-AUC: 0.9250
Max val ROC-AUC: 0.9610
Training with d_model=128, nhead=2, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0451 | Acc: 0.9879 | Prec: 0.9948 | Recall: 0.9789 | F1: 0.9868 | ROC-AUC: 0.9976 || Test Loss: 0.3167 | Acc: 0.9256 | Prec: 0.9816 | Recall: 0.9373 | F1: 0.9590 | ROC-AUC: 0.9141
Max val ROC-AUC: 0.9515
Training with d_model=128, nhead=2, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0459 | Acc: 0.9879 | Prec: 0.9912 | Recall: 0.9826 | F1: 0.9869 | ROC-AUC: 0.9971 || Test Loss: 0.3029 | Acc: 0.9209 | Prec: 0.9815 | Recall: 0.9323 | F1: 0.9563 | ROC-AUC: 0.9407
Max val ROC-AUC: 0.9477
Training with d_model=128, nhead=2, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0391 | Acc: 0.9885 | Prec: 0.9891 | Recall: 0.9862 | F1: 0.9876 | ROC-AUC: 0.9983 || Test Loss: 0.3572 | Acc: 0.9023 | Prec: 0.9837 | Recall: 0.9098 | F1: 0.9453 | ROC-AUC: 0.9357
Max val ROC-AUC: 0.9525
Training with d_model=128, nhead=2, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0382 | Acc: 0.9912 | Prec: 0.9941 | Recall: 0.9869 | F1: 0.9905 | ROC-AUC: 0.9980 || Test Loss: 0.3492 | Acc: 0.9163 | Prec: 0.9866 | Recall: 0.9223 | F1: 0.9534 | ROC-AUC: 0.9457
Max val ROC-AUC: 0.9544
Training with d_model=128, nhead=4, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0437 | Acc: 0.9848 | Prec: 0.9912 | Recall: 0.9760 | F1: 0.9835 | ROC-AUC: 0.9985 || Test Loss: 0.3892 | Acc: 0.9279 | Prec: 0.9817 | Recall: 0.9398 | F1: 0.9603 | ROC-AUC: 0.9097
Max val ROC-AUC: 0.9455
Training with d_model=128, nhead=4, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0310 | Acc: 0.9919 | Prec: 0.9956 | Recall: 0.9869 | F1: 0.9912 | ROC-AUC: 0.9987 || Test Loss: 0.3931 | Acc: 0.9349 | Prec: 0.9793 | Recall: 0.9499 | F1: 0.9644 | ROC-AUC: 0.9487
Max val ROC-AUC: 0.9561
Training with d_model=128, nhead=4, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0238 | Acc: 0.9936 | Prec: 0.9963 | Recall: 0.9898 | F1: 0.9931 | ROC-AUC: 0.9993 || Test Loss: 0.3830 | Acc: 0.9233 | Prec: 0.9791 | Recall: 0.9373 | F1: 0.9577 | ROC-AUC: 0.9473
Max val ROC-AUC: 0.9555
Training with d_model=128, nhead=4, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0367 | Acc: 0.9875 | Prec: 0.9912 | Recall: 0.9818 | F1: 0.9865 | ROC-AUC: 0.9988 || Test Loss: 0.3159 | Acc: 0.9279 | Prec: 0.9817 | Recall: 0.9398 | F1: 0.9603 | ROC-AUC: 0.9552
Max val ROC-AUC: 0.9552
Training with d_model=128, nhead=4, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0215 | Acc: 0.9960 | Prec: 0.9971 | Recall: 0.9942 | F1: 0.9956 | ROC-AUC: 0.9989 || Test Loss: 0.2579 | Acc: 0.9395 | Prec: 0.9819 | Recall: 0.9524 | F1: 0.9669 | ROC-AUC: 0.9529
Max val ROC-AUC: 0.9648
Training with d_model=128, nhead=4, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0214 | Acc: 0.9933 | Prec: 0.9949 | Recall: 0.9906 | F1: 0.9927 | ROC-AUC: 0.9995 || Test Loss: 0.3007 | Acc: 0.9442 | Prec: 0.9795 | Recall: 0.9599 | F1: 0.9696 | ROC-AUC: 0.9116
Max val ROC-AUC: 0.9567
Training with d_model=128, nhead=4, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0188 | Acc: 0.9939 | Prec: 0.9942 | Recall: 0.9927 | F1: 0.9935 | ROC-AUC: 0.9997 || Test Loss: 0.2808 | Acc: 0.9349 | Prec: 0.9769 | Recall: 0.9524 | F1: 0.9645 | ROC-AUC: 0.9263
Max val ROC-AUC: 0.9540
Training with d_model=128, nhead=4, num_layers=2, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0163 | Acc: 0.9956 | Prec: 0.9971 | Recall: 0.9935 | F1: 0.9953 | ROC-AUC: 0.9994 || Test Loss: 0.2730 | Acc: 0.9372 | Prec: 0.9844 | Recall: 0.9474 | F1: 0.9655 | ROC-AUC: 0.9496
Max val ROC-AUC: 0.9526
Training with d_model=128, nhead=4, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0315 | Acc: 0.9902 | Prec: 0.9941 | Recall: 0.9847 | F1: 0.9894 | ROC-AUC: 0.9990 || Test Loss: 0.4867 | Acc: 0.9000 | Prec: 0.9785 | Recall: 0.9123 | F1: 0.9442 | ROC-AUC: 0.9499
Max val ROC-AUC: 0.9583
Training with d_model=128, nhead=4, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0408 | Acc: 0.9875 | Prec: 0.9926 | Recall: 0.9804 | F1: 0.9865 | ROC-AUC: 0.9987 || Test Loss: 0.4670 | Acc: 0.9186 | Prec: 0.9764 | Recall: 0.9348 | F1: 0.9552 | ROC-AUC: 0.9131
Max val ROC-AUC: 0.9351
Training with d_model=128, nhead=4, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0276 | Acc: 0.9919 | Prec: 0.9942 | Recall: 0.9884 | F1: 0.9913 | ROC-AUC: 0.9993 || Test Loss: 0.5691 | Acc: 0.9047 | Prec: 0.9812 | Recall: 0.9148 | F1: 0.9468 | ROC-AUC: 0.9240
Max val ROC-AUC: 0.9429
Training with d_model=128, nhead=4, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0458 | Acc: 0.9882 | Prec: 0.9891 | Recall: 0.9855 | F1: 0.9873 | ROC-AUC: 0.9974 || Test Loss: 0.4517 | Acc: 0.9093 | Prec: 0.9839 | Recall: 0.9173 | F1: 0.9494 | ROC-AUC: 0.9452
Max val ROC-AUC: 0.9452
Training with d_model=128, nhead=4, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0516 | Acc: 0.9859 | Prec: 0.9862 | Recall: 0.9833 | F1: 0.9847 | ROC-AUC: 0.9973 || Test Loss: 0.2636 | Acc: 0.9349 | Prec: 0.9818 | Recall: 0.9474 | F1: 0.9643 | ROC-AUC: 0.9130
Max val ROC-AUC: 0.9634
Training with d_model=128, nhead=4, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0485 | Acc: 0.9848 | Prec: 0.9897 | Recall: 0.9775 | F1: 0.9836 | ROC-AUC: 0.9984 || Test Loss: 0.2761 | Acc: 0.9256 | Prec: 0.9842 | Recall: 0.9348 | F1: 0.9589 | ROC-AUC: 0.9462
Max val ROC-AUC: 0.9561
Training with d_model=128, nhead=4, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0587 | Acc: 0.9805 | Prec: 0.9824 | Recall: 0.9753 | F1: 0.9789 | ROC-AUC: 0.9974 || Test Loss: 0.3102 | Acc: 0.9302 | Prec: 0.9843 | Recall: 0.9398 | F1: 0.9615 | ROC-AUC: 0.9317
Max val ROC-AUC: 0.9611
Training with d_model=128, nhead=4, num_layers=2, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0564 | Acc: 0.9818 | Prec: 0.9875 | Recall: 0.9731 | F1: 0.9802 | ROC-AUC: 0.9974 || Test Loss: 0.2622 | Acc: 0.9256 | Prec: 0.9816 | Recall: 0.9373 | F1: 0.9590 | ROC-AUC: 0.9267
Max val ROC-AUC: 0.9645
Training with d_model=128, nhead=4, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0317 | Acc: 0.9889 | Prec: 0.9891 | Recall: 0.9869 | F1: 0.9880 | ROC-AUC: 0.9989 || Test Loss: 0.5424 | Acc: 0.8930 | Prec: 0.9809 | Recall: 0.9023 | F1: 0.9399 | ROC-AUC: 0.9128
Max val ROC-AUC: 0.9473
Training with d_model=128, nhead=4, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0309 | Acc: 0.9919 | Prec: 0.9942 | Recall: 0.9884 | F1: 0.9913 | ROC-AUC: 0.9983 || Test Loss: 0.2743 | Acc: 0.9349 | Prec: 0.9869 | Recall: 0.9424 | F1: 0.9641 | ROC-AUC: 0.9594
Max val ROC-AUC: 0.9594
Training with d_model=128, nhead=4, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0329 | Acc: 0.9912 | Prec: 0.9941 | Recall: 0.9869 | F1: 0.9905 | ROC-AUC: 0.9991 || Test Loss: 0.2915 | Acc: 0.9302 | Prec: 0.9743 | Recall: 0.9499 | F1: 0.9619 | ROC-AUC: 0.9341
Max val ROC-AUC: 0.9513
Training with d_model=128, nhead=4, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0327 | Acc: 0.9923 | Prec: 0.9934 | Recall: 0.9898 | F1: 0.9916 | ROC-AUC: 0.9988 || Test Loss: 0.3717 | Acc: 0.9256 | Prec: 0.9842 | Recall: 0.9348 | F1: 0.9589 | ROC-AUC: 0.9339
Max val ROC-AUC: 0.9543
Training with d_model=128, nhead=4, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0213 | Acc: 0.9949 | Prec: 0.9964 | Recall: 0.9927 | F1: 0.9945 | ROC-AUC: 0.9989 || Test Loss: 0.3743 | Acc: 0.9140 | Prec: 0.9866 | Recall: 0.9198 | F1: 0.9520 | ROC-AUC: 0.9387
Max val ROC-AUC: 0.9723
Training with d_model=128, nhead=4, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0286 | Acc: 0.9909 | Prec: 0.9920 | Recall: 0.9884 | F1: 0.9902 | ROC-AUC: 0.9993 || Test Loss: 0.4477 | Acc: 0.9070 | Prec: 0.9812 | Recall: 0.9173 | F1: 0.9482 | ROC-AUC: 0.9171
Max val ROC-AUC: 0.9603
Training with d_model=128, nhead=4, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0271 | Acc: 0.9919 | Prec: 0.9942 | Recall: 0.9884 | F1: 0.9913 | ROC-AUC: 0.9991 || Test Loss: 0.2943 | Acc: 0.9419 | Prec: 0.9795 | Recall: 0.9574 | F1: 0.9683 | ROC-AUC: 0.9084
Max val ROC-AUC: 0.9542
Training with d_model=128, nhead=4, num_layers=4, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0181 | Acc: 0.9973 | Prec: 0.9978 | Recall: 0.9964 | F1: 0.9971 | ROC-AUC: 0.9996 || Test Loss: 0.3942 | Acc: 0.9279 | Prec: 0.9817 | Recall: 0.9398 | F1: 0.9603 | ROC-AUC: 0.9197
Max val ROC-AUC: 0.9426
Training with d_model=128, nhead=4, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0383 | Acc: 0.9892 | Prec: 0.9912 | Recall: 0.9855 | F1: 0.9883 | ROC-AUC: 0.9984 || Test Loss: 0.3635 | Acc: 0.9372 | Prec: 0.9794 | Recall: 0.9524 | F1: 0.9657 | ROC-AUC: 0.8984
Max val ROC-AUC: 0.9567
Training with d_model=128, nhead=4, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0576 | Acc: 0.9811 | Prec: 0.9911 | Recall: 0.9680 | F1: 0.9794 | ROC-AUC: 0.9968 || Test Loss: 0.3227 | Acc: 0.9256 | Prec: 0.9867 | Recall: 0.9323 | F1: 0.9588 | ROC-AUC: 0.9599
Max val ROC-AUC: 0.9599
Training with d_model=128, nhead=4, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0530 | Acc: 0.9852 | Prec: 0.9926 | Recall: 0.9753 | F1: 0.9839 | ROC-AUC: 0.9971 || Test Loss: 0.4218 | Acc: 0.9116 | Prec: 0.9813 | Recall: 0.9223 | F1: 0.9509 | ROC-AUC: 0.9020
Max val ROC-AUC: 0.9459
Training with d_model=128, nhead=4, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0430 | Acc: 0.9865 | Prec: 0.9848 | Recall: 0.9862 | F1: 0.9855 | ROC-AUC: 0.9983 || Test Loss: 0.4444 | Acc: 0.9233 | Prec: 0.9716 | Recall: 0.9449 | F1: 0.9581 | ROC-AUC: 0.9295
Max val ROC-AUC: 0.9618
Training with d_model=128, nhead=4, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0337 | Acc: 0.9906 | Prec: 0.9956 | Recall: 0.9840 | F1: 0.9898 | ROC-AUC: 0.9986 || Test Loss: 0.2816 | Acc: 0.9465 | Prec: 0.9821 | Recall: 0.9599 | F1: 0.9708 | ROC-AUC: 0.9297
Max val ROC-AUC: 0.9540
Training with d_model=128, nhead=4, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0509 | Acc: 0.9875 | Prec: 0.9934 | Recall: 0.9797 | F1: 0.9865 | ROC-AUC: 0.9965 || Test Loss: 0.3112 | Acc: 0.9349 | Prec: 0.9843 | Recall: 0.9449 | F1: 0.9642 | ROC-AUC: 0.9489
Max val ROC-AUC: 0.9569
Training with d_model=128, nhead=4, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0447 | Acc: 0.9855 | Prec: 0.9904 | Recall: 0.9782 | F1: 0.9843 | ROC-AUC: 0.9982 || Test Loss: 0.3835 | Acc: 0.8977 | Prec: 0.9863 | Recall: 0.9023 | F1: 0.9424 | ROC-AUC: 0.9629
Max val ROC-AUC: 0.9629
Training with d_model=128, nhead=4, num_layers=4, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0379 | Acc: 0.9892 | Prec: 0.9912 | Recall: 0.9855 | F1: 0.9883 | ROC-AUC: 0.9989 || Test Loss: 0.3707 | Acc: 0.9116 | Prec: 0.9839 | Recall: 0.9198 | F1: 0.9508 | ROC-AUC: 0.9512
Max val ROC-AUC: 0.9605
Training with d_model=128, nhead=4, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0242 | Acc: 0.9946 | Prec: 0.9949 | Recall: 0.9935 | F1: 0.9942 | ROC-AUC: 0.9993 || Test Loss: 0.4290 | Acc: 0.9279 | Prec: 0.9817 | Recall: 0.9398 | F1: 0.9603 | ROC-AUC: 0.9485
Max val ROC-AUC: 0.9605
Training with d_model=128, nhead=4, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0577 | Acc: 0.9848 | Prec: 0.9861 | Recall: 0.9811 | F1: 0.9836 | ROC-AUC: 0.9970 || Test Loss: 0.2622 | Acc: 0.9163 | Prec: 0.9764 | Recall: 0.9323 | F1: 0.9538 | ROC-AUC: 0.8935
Max val ROC-AUC: 0.9572
Training with d_model=128, nhead=4, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0421 | Acc: 0.9875 | Prec: 0.9912 | Recall: 0.9818 | F1: 0.9865 | ROC-AUC: 0.9982 || Test Loss: 0.3484 | Acc: 0.9140 | Prec: 0.9788 | Recall: 0.9273 | F1: 0.9524 | ROC-AUC: 0.9516
Max val ROC-AUC: 0.9516
Training with d_model=128, nhead=4, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0417 | Acc: 0.9885 | Prec: 0.9905 | Recall: 0.9847 | F1: 0.9876 | ROC-AUC: 0.9982 || Test Loss: 0.3654 | Acc: 0.8814 | Prec: 0.9833 | Recall: 0.8872 | F1: 0.9328 | ROC-AUC: 0.9310
Max val ROC-AUC: 0.9491
Training with d_model=128, nhead=4, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0318 | Acc: 0.9912 | Prec: 0.9941 | Recall: 0.9869 | F1: 0.9905 | ROC-AUC: 0.9983 || Test Loss: 0.2870 | Acc: 0.9488 | Prec: 0.9772 | Recall: 0.9674 | F1: 0.9723 | ROC-AUC: 0.9488
Max val ROC-AUC: 0.9628
Training with d_model=128, nhead=4, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0136 | Acc: 0.9980 | Prec: 1.0000 | Recall: 0.9956 | F1: 0.9978 | ROC-AUC: 0.9996 || Test Loss: 0.2969 | Acc: 0.9465 | Prec: 0.9845 | Recall: 0.9574 | F1: 0.9708 | ROC-AUC: 0.9080
Max val ROC-AUC: 0.9541
Training with d_model=128, nhead=4, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0433 | Acc: 0.9872 | Prec: 0.9912 | Recall: 0.9811 | F1: 0.9861 | ROC-AUC: 0.9970 || Test Loss: 0.3435 | Acc: 0.9186 | Prec: 0.9840 | Recall: 0.9273 | F1: 0.9548 | ROC-AUC: 0.9390
Max val ROC-AUC: 0.9576
Training with d_model=128, nhead=4, num_layers=6, dropout=0.1, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0130 | Acc: 0.9973 | Prec: 0.9993 | Recall: 0.9949 | F1: 0.9971 | ROC-AUC: 0.9998 || Test Loss: 0.3459 | Acc: 0.9302 | Prec: 0.9817 | Recall: 0.9424 | F1: 0.9616 | ROC-AUC: 0.9522
Max val ROC-AUC: 0.9586
Training with d_model=128, nhead=4, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0523 | Acc: 0.9852 | Prec: 0.9955 | Recall: 0.9724 | F1: 0.9838 | ROC-AUC: 0.9967 || Test Loss: 0.3325 | Acc: 0.9326 | Prec: 0.9868 | Recall: 0.9398 | F1: 0.9628 | ROC-AUC: 0.9428
Max val ROC-AUC: 0.9480
Training with d_model=128, nhead=4, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0527 | Acc: 0.9855 | Prec: 0.9955 | Recall: 0.9731 | F1: 0.9842 | ROC-AUC: 0.9967 || Test Loss: 0.4291 | Acc: 0.9140 | Prec: 0.9814 | Recall: 0.9248 | F1: 0.9523 | ROC-AUC: 0.9323
Max val ROC-AUC: 0.9396
Training with d_model=128, nhead=4, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0523 | Acc: 0.9818 | Prec: 0.9846 | Recall: 0.9760 | F1: 0.9803 | ROC-AUC: 0.9976 || Test Loss: 0.3798 | Acc: 0.9209 | Prec: 0.9815 | Recall: 0.9323 | F1: 0.9563 | ROC-AUC: 0.9251
Max val ROC-AUC: 0.9553
Training with d_model=128, nhead=4, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0658 | Acc: 0.9774 | Prec: 0.9903 | Recall: 0.9608 | F1: 0.9753 | ROC-AUC: 0.9964 || Test Loss: 0.4778 | Acc: 0.8698 | Prec: 0.9886 | Recall: 0.8697 | F1: 0.9253 | ROC-AUC: 0.9238
Max val ROC-AUC: 0.9436
Training with d_model=128, nhead=4, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0565 | Acc: 0.9825 | Prec: 0.9882 | Recall: 0.9739 | F1: 0.9810 | ROC-AUC: 0.9975 || Test Loss: 0.3058 | Acc: 0.9302 | Prec: 0.9843 | Recall: 0.9398 | F1: 0.9615 | ROC-AUC: 0.9436
Max val ROC-AUC: 0.9586
Training with d_model=128, nhead=4, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0510 | Acc: 0.9848 | Prec: 0.9904 | Recall: 0.9768 | F1: 0.9835 | ROC-AUC: 0.9977 || Test Loss: 0.3717 | Acc: 0.9163 | Prec: 0.9840 | Recall: 0.9248 | F1: 0.9535 | ROC-AUC: 0.9365
Max val ROC-AUC: 0.9698
Training with d_model=128, nhead=4, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0571 | Acc: 0.9848 | Prec: 0.9868 | Recall: 0.9804 | F1: 0.9836 | ROC-AUC: 0.9958 || Test Loss: 0.3753 | Acc: 0.9186 | Prec: 0.9840 | Recall: 0.9273 | F1: 0.9548 | ROC-AUC: 0.9210
Max val ROC-AUC: 0.9577
Training with d_model=128, nhead=4, num_layers=6, dropout=0.3, lr=0.0001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0337 | Acc: 0.9906 | Prec: 0.9941 | Recall: 0.9855 | F1: 0.9898 | ROC-AUC: 0.9985 || Test Loss: 0.3949 | Acc: 0.9186 | Prec: 0.9866 | Recall: 0.9248 | F1: 0.9547 | ROC-AUC: 0.9394
Max val ROC-AUC: 0.9550

Best params: {'d_model': 128, 'nhead': 4, 'num_layers': 4, 'dropout': 0.1, 'lr': 0.0001, 'weight_decay': 0, 'batch_size': 16}
Best validation ROC-AUC: 0.9723


### Best params: 
Best params: {'d_model': 128, 'nhead': 4, 'num_layers': 4, 'dropout': 0.1, 'lr': 0.0001, 'weight_decay': 0, 'batch_size': 16}
Best validation ROC-AUC: 0.9723