# Import Libraries

In [7]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
from typing import Union
import random
from pathlib import Path

## Import Models and Data Loaders

In [8]:
from Return_dataloader import pass_dataloader
from Transformer_model import TabTransformer
from Plot_Accuracies import plot_loss_curves
from engine import train

# Device Agnostics

In [9]:
# Clear Cache
torch.cuda.empty_cache()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Available device is: {device}")

Available device is: cpu


### Data Loader

In [10]:
train_dataloader, test_dataloader = pass_dataloader()

### Parameter Tune Function

In [11]:
from copy import deepcopy

def hyperparameter_tuning(param_grid,
                          train_dataloader,
                          test_dataloader,
                          device,
                          epochs=50):

    best_val_score = -float('inf')
    best_params = None
    best_results = None

    for params in param_grid:
        print(f"Training with d_model={params['d_model']}, nhead={params['nhead']}, "
              f"num_layers={params['num_layers']}, dropout={params['dropout']}, lr={params['lr']}")

        # Create model with given hyperparameters
        model = TabTransformer(input_dim=100,
                               d_model=params['d_model'],
                               nhead=params['nhead'],
                               num_layers=params['num_layers'],
                               dropout=params['dropout']).to(device)

        loss_fn = nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=params['lr'],
                                     weight_decay=1e-4)

        # Train model and get results dictionary
        results = train(model=model,
                        train_dataloader=train_dataloader,
                        test_dataloader=test_dataloader,
                        optimizer=optimizer,
                        loss_fn=loss_fn,
                        device=device,
                        epochs=epochs)

        # Pick best validation score (ROC-AUC here) from all epochs
        max_val_roc_auc = max(results['test_roc_auc'])

        print(f"Max val ROC-AUC: {max_val_roc_auc:.4f}")

        if max_val_roc_auc > best_val_score:
            best_val_score = max_val_roc_auc
            best_params = params
            best_results = deepcopy(results)

    print(f"\nBest params: {best_params}")
    print(f"Best validation ROC-AUC: {best_val_score:.4f}")

    return best_params, best_results


## Hyperparameter Tuning

In [None]:
from itertools import product
NUM_EPOCHS = 10


param_grid = {
    "d_model": [64, 128, 25],
    "nhead": [2, 4, 8, 16],
    "num_layers": [i * 2 for i in range(1,51)],
    "dropout": [0.0, 0.1 , 0.3 , 0.5],
    "lr": [0.1, 0.01, 0.001],
    "weight_decay": [0, 1e-4],
    "batch_size": [4, 8, 16, 32, 64]
}

param_grid = [
    dict(zip(param_grid.keys(), values))
    for values in product(*param_grid.values())
]
best_params, best_results = hyperparameter_tuning(
    param_grid,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    device=device,
    epochs=NUM_EPOCHS
)

Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1




  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1702 | Acc: 0.9434 | Prec: 0.9549 | Recall: 0.9216 | F1: 0.9379 | ROC-AUC: 0.9691 || Test Loss: 0.2882 | Acc: 0.9047 | Prec: 0.9812 | Recall: 0.9148 | F1: 0.9468 | ROC-AUC: 0.9220
Max val ROC-AUC: 0.9276
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1830 | Acc: 0.9400 | Prec: 0.9622 | Recall: 0.9063 | F1: 0.9334 | ROC-AUC: 0.9783 || Test Loss: 0.2551 | Acc: 0.9372 | Prec: 0.9869 | Recall: 0.9449 | F1: 0.9654 | ROC-AUC: 0.9601
Max val ROC-AUC: 0.9620
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1418 | Acc: 0.9569 | Prec: 0.9664 | Recall: 0.9397 | F1: 0.9529 | ROC-AUC: 0.9866 || Test Loss: 0.4261 | Acc: 0.8698 | Prec: 0.9942 | Recall: 0.8647 | F1: 0.9249 | ROC-AUC: 0.9546
Max val ROC-AUC: 0.9690
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1755 | Acc: 0.9424 | Prec: 0.9561 | Recall: 0.9179 | F1: 0.9366 | ROC-AUC: 0.9783 || Test Loss: 0.3036 | Acc: 0.8884 | Prec: 0.9835 | Recall: 0.8947 | F1: 0.9370 | ROC-AUC: 0.9295
Max val ROC-AUC: 0.9302
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1739 | Acc: 0.9438 | Prec: 0.9727 | Recall: 0.9041 | F1: 0.9371 | ROC-AUC: 0.9701 || Test Loss: 0.4443 | Acc: 0.8442 | Prec: 0.9826 | Recall: 0.8471 | F1: 0.9098 | ROC-AUC: 0.8909
Max val ROC-AUC: 0.9507
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1390 | Acc: 0.9603 | Prec: 0.9744 | Recall: 0.9390 | F1: 0.9564 | ROC-AUC: 0.9776 || Test Loss: 0.8087 | Acc: 0.7116 | Prec: 0.9928 | Recall: 0.6942 | F1: 0.8171 | ROC-AUC: 0.9337
Max val ROC-AUC: 0.9431
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2186 | Acc: 0.9299 | Prec: 0.9570 | Recall: 0.8889 | F1: 0.9217 | ROC-AUC: 0.9618 || Test Loss: 0.2795 | Acc: 0.9233 | Prec: 0.9841 | Recall: 0.9323 | F1: 0.9575 | ROC-AUC: 0.9053
Max val ROC-AUC: 0.9517
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1539 | Acc: 0.9518 | Prec: 0.9710 | Recall: 0.9237 | F1: 0.9468 | ROC-AUC: 0.9786 || Test Loss: 0.4201 | Acc: 0.8395 | Prec: 0.9911 | Recall: 0.8346 | F1: 0.9061 | ROC-AUC: 0.9432
Max val ROC-AUC: 0.9743
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1818 | Acc: 0.9431 | Prec: 0.9726 | Recall: 0.9027 | F1: 0.9363 | ROC-AUC: 0.9698 || Test Loss: 0.4845 | Acc: 0.8791 | Prec: 0.9806 | Recall: 0.8872 | F1: 0.9316 | ROC-AUC: 0.8995
Max val ROC-AUC: 0.9562
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1580 | Acc: 0.9502 | Prec: 0.9694 | Recall: 0.9216 | F1: 0.9449 | ROC-AUC: 0.9775 || Test Loss: 0.3985 | Acc: 0.8814 | Prec: 0.9807 | Recall: 0.8897 | F1: 0.9330 | ROC-AUC: 0.8773
Max val ROC-AUC: 0.9600
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0406 | Acc: 0.9879 | Prec: 0.9862 | Recall: 0.9877 | F1: 0.9869 | ROC-AUC: 0.9986 || Test Loss: 0.6322 | Acc: 0.8651 | Prec: 0.9858 | Recall: 0.8672 | F1: 0.9227 | ROC-AUC: 0.9273
Max val ROC-AUC: 0.9466
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0526 | Acc: 0.9842 | Prec: 0.9812 | Recall: 0.9847 | F1: 0.9830 | ROC-AUC: 0.9973 || Test Loss: 0.3243 | Acc: 0.9163 | Prec: 0.9919 | Recall: 0.9173 | F1: 0.9531 | ROC-AUC: 0.9613
Max val ROC-AUC: 0.9639
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0496 | Acc: 0.9865 | Prec: 0.9897 | Recall: 0.9811 | F1: 0.9854 | ROC-AUC: 0.9973 || Test Loss: 0.4243 | Acc: 0.9116 | Prec: 0.9892 | Recall: 0.9148 | F1: 0.9505 | ROC-AUC: 0.9552
Max val ROC-AUC: 0.9758
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0336 | Acc: 0.9899 | Prec: 0.9891 | Recall: 0.9891 | F1: 0.9891 | ROC-AUC: 0.9992 || Test Loss: 0.2670 | Acc: 0.9442 | Prec: 0.9820 | Recall: 0.9574 | F1: 0.9695 | ROC-AUC: 0.9171
Max val ROC-AUC: 0.9335
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0399 | Acc: 0.9869 | Prec: 0.9876 | Recall: 0.9840 | F1: 0.9858 | ROC-AUC: 0.9984 || Test Loss: 0.3978 | Acc: 0.9279 | Prec: 0.9920 | Recall: 0.9298 | F1: 0.9599 | ROC-AUC: 0.9626
Max val ROC-AUC: 0.9724
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0545 | Acc: 0.9845 | Prec: 0.9904 | Recall: 0.9760 | F1: 0.9832 | ROC-AUC: 0.9956 || Test Loss: 0.4507 | Acc: 0.9116 | Prec: 0.9918 | Recall: 0.9123 | F1: 0.9504 | ROC-AUC: 0.9573
Max val ROC-AUC: 0.9711
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0334 | Acc: 0.9889 | Prec: 0.9905 | Recall: 0.9855 | F1: 0.9880 | ROC-AUC: 0.9991 || Test Loss: 0.3684 | Acc: 0.9093 | Prec: 0.9891 | Recall: 0.9123 | F1: 0.9492 | ROC-AUC: 0.9542
Max val ROC-AUC: 0.9764
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0483 | Acc: 0.9848 | Prec: 0.9861 | Recall: 0.9811 | F1: 0.9836 | ROC-AUC: 0.9981 || Test Loss: 0.3822 | Acc: 0.9070 | Prec: 0.9918 | Recall: 0.9073 | F1: 0.9476 | ROC-AUC: 0.9464
Max val ROC-AUC: 0.9698
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0799 | Acc: 0.9717 | Prec: 0.9879 | Recall: 0.9506 | F1: 0.9689 | ROC-AUC: 0.9939 || Test Loss: 0.3408 | Acc: 0.8767 | Prec: 0.9943 | Recall: 0.8722 | F1: 0.9292 | ROC-AUC: 0.9584
Max val ROC-AUC: 0.9592
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0470 | Acc: 0.9845 | Prec: 0.9819 | Recall: 0.9847 | F1: 0.9833 | ROC-AUC: 0.9980 || Test Loss: 0.2475 | Acc: 0.9209 | Prec: 0.9867 | Recall: 0.9273 | F1: 0.9561 | ROC-AUC: 0.9681
Max val ROC-AUC: 0.9750
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0242 | Acc: 0.9923 | Prec: 0.9934 | Recall: 0.9898 | F1: 0.9916 | ROC-AUC: 0.9996 || Test Loss: 0.2944 | Acc: 0.9302 | Prec: 0.9743 | Recall: 0.9499 | F1: 0.9619 | ROC-AUC: 0.9149
Max val ROC-AUC: 0.9440
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0121 | Acc: 0.9963 | Prec: 0.9964 | Recall: 0.9956 | F1: 0.9960 | ROC-AUC: 0.9998 || Test Loss: 0.4405 | Acc: 0.9140 | Prec: 0.9840 | Recall: 0.9223 | F1: 0.9521 | ROC-AUC: 0.9416
Max val ROC-AUC: 0.9515
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0299 | Acc: 0.9919 | Prec: 0.9906 | Recall: 0.9920 | F1: 0.9913 | ROC-AUC: 0.9989 || Test Loss: 0.2912 | Acc: 0.9349 | Prec: 0.9818 | Recall: 0.9474 | F1: 0.9643 | ROC-AUC: 0.9538
Max val ROC-AUC: 0.9656
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0150 | Acc: 0.9963 | Prec: 0.9964 | Recall: 0.9956 | F1: 0.9960 | ROC-AUC: 0.9998 || Test Loss: 0.4879 | Acc: 0.9233 | Prec: 0.9841 | Recall: 0.9323 | F1: 0.9575 | ROC-AUC: 0.9240
Max val ROC-AUC: 0.9421
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0225 | Acc: 0.9939 | Prec: 0.9956 | Recall: 0.9913 | F1: 0.9934 | ROC-AUC: 0.9994 || Test Loss: 0.4022 | Acc: 0.9256 | Prec: 0.9791 | Recall: 0.9398 | F1: 0.9591 | ROC-AUC: 0.9310
Max val ROC-AUC: 0.9445
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0202 | Acc: 0.9946 | Prec: 0.9935 | Recall: 0.9949 | F1: 0.9942 | ROC-AUC: 0.9993 || Test Loss: 0.3058 | Acc: 0.9279 | Prec: 0.9894 | Recall: 0.9323 | F1: 0.9600 | ROC-AUC: 0.9486
Max val ROC-AUC: 0.9765
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0087 | Acc: 0.9987 | Prec: 0.9985 | Recall: 0.9985 | F1: 0.9985 | ROC-AUC: 0.9995 || Test Loss: 0.3513 | Acc: 0.9442 | Prec: 0.9820 | Recall: 0.9574 | F1: 0.9695 | ROC-AUC: 0.9238
Max val ROC-AUC: 0.9548
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0256 | Acc: 0.9906 | Prec: 0.9927 | Recall: 0.9869 | F1: 0.9898 | ROC-AUC: 0.9995 || Test Loss: 0.3533 | Acc: 0.9233 | Prec: 0.9791 | Recall: 0.9373 | F1: 0.9577 | ROC-AUC: 0.9320
Max val ROC-AUC: 0.9588
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0430 | Acc: 0.9865 | Prec: 0.9834 | Recall: 0.9877 | F1: 0.9855 | ROC-AUC: 0.9979 || Test Loss: 0.3902 | Acc: 0.9233 | Prec: 0.9841 | Recall: 0.9323 | F1: 0.9575 | ROC-AUC: 0.9602
Max val ROC-AUC: 0.9602
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0240 | Acc: 0.9933 | Prec: 0.9963 | Recall: 0.9891 | F1: 0.9927 | ROC-AUC: 0.9989 || Test Loss: 0.3633 | Acc: 0.9349 | Prec: 0.9720 | Recall: 0.9574 | F1: 0.9646 | ROC-AUC: 0.9204
Max val ROC-AUC: 0.9558
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2987 | Acc: 0.8963 | Prec: 0.9465 | Recall: 0.8228 | F1: 0.8803 | ROC-AUC: 0.9401 || Test Loss: 0.3389 | Acc: 0.8395 | Prec: 0.9825 | Recall: 0.8421 | F1: 0.9069 | ROC-AUC: 0.8800
Max val ROC-AUC: 0.9603
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3700 | Acc: 0.8545 | Prec: 0.8783 | Recall: 0.7967 | F1: 0.8355 | ROC-AUC: 0.9290 || Test Loss: 0.2264 | Acc: 0.9163 | Prec: 0.9814 | Recall: 0.9273 | F1: 0.9536 | ROC-AUC: 0.9325
Max val ROC-AUC: 0.9325
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2548 | Acc: 0.9074 | Prec: 0.9561 | Recall: 0.8388 | F1: 0.8936 | ROC-AUC: 0.9491 || Test Loss: 0.4484 | Acc: 0.8465 | Prec: 0.9912 | Recall: 0.8421 | F1: 0.9106 | ROC-AUC: 0.9028
Max val ROC-AUC: 0.9547
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2343 | Acc: 0.9155 | Prec: 0.9364 | Recall: 0.8773 | F1: 0.9059 | ROC-AUC: 0.9659 || Test Loss: 0.3882 | Acc: 0.7930 | Prec: 0.9905 | Recall: 0.7845 | F1: 0.8755 | ROC-AUC: 0.9098
Max val ROC-AUC: 0.9601
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2486 | Acc: 0.9225 | Prec: 0.9220 | Recall: 0.9099 | F1: 0.9159 | ROC-AUC: 0.9597 || Test Loss: 0.5585 | Acc: 0.8023 | Prec: 0.9906 | Recall: 0.7945 | F1: 0.8818 | ROC-AUC: 0.9243
Max val ROC-AUC: 0.9531
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3551 | Acc: 0.8575 | Prec: 0.8816 | Recall: 0.8003 | F1: 0.8390 | ROC-AUC: 0.9279 || Test Loss: 0.4075 | Acc: 0.8000 | Prec: 0.9937 | Recall: 0.7895 | F1: 0.8799 | ROC-AUC: 0.9326
Max val ROC-AUC: 0.9390
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2410 | Acc: 0.9205 | Prec: 0.9496 | Recall: 0.8751 | F1: 0.9108 | ROC-AUC: 0.9484 || Test Loss: 1.9476 | Acc: 0.7930 | Prec: 0.9936 | Recall: 0.7820 | F1: 0.8752 | ROC-AUC: 0.8866
Max val ROC-AUC: 0.9337
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3488 | Acc: 0.8892 | Prec: 0.8738 | Recall: 0.8896 | F1: 0.8816 | ROC-AUC: 0.9207 || Test Loss: 0.3662 | Acc: 0.8674 | Prec: 0.9914 | Recall: 0.8647 | F1: 0.9237 | ROC-AUC: 0.9024
Max val ROC-AUC: 0.9525
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3004 | Acc: 0.9091 | Prec: 0.9140 | Recall: 0.8874 | F1: 0.9005 | ROC-AUC: 0.9426 || Test Loss: 0.4304 | Acc: 0.8930 | Prec: 0.9732 | Recall: 0.9098 | F1: 0.9404 | ROC-AUC: 0.8085
Max val ROC-AUC: 0.9570
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2937 | Acc: 0.9000 | Prec: 0.9463 | Recall: 0.8315 | F1: 0.8852 | ROC-AUC: 0.9367 || Test Loss: 1.0761 | Acc: 0.8442 | Prec: 0.9911 | Recall: 0.8396 | F1: 0.9091 | ROC-AUC: 0.9020
Max val ROC-AUC: 0.9302
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1398 | Acc: 0.9555 | Prec: 0.9698 | Recall: 0.9332 | F1: 0.9511 | ROC-AUC: 0.9843 || Test Loss: 0.3751 | Acc: 0.9047 | Prec: 0.9864 | Recall: 0.9098 | F1: 0.9465 | ROC-AUC: 0.9155
Max val ROC-AUC: 0.9340
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1068 | Acc: 0.9633 | Prec: 0.9767 | Recall: 0.9434 | F1: 0.9597 | ROC-AUC: 0.9907 || Test Loss: 0.4058 | Acc: 0.8930 | Prec: 0.9916 | Recall: 0.8922 | F1: 0.9393 | ROC-AUC: 0.9481
Max val ROC-AUC: 0.9533
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1639 | Acc: 0.9481 | Prec: 0.9500 | Recall: 0.9375 | F1: 0.9437 | ROC-AUC: 0.9820 || Test Loss: 0.3019 | Acc: 0.9256 | Prec: 0.9742 | Recall: 0.9449 | F1: 0.9593 | ROC-AUC: 0.9095
Max val ROC-AUC: 0.9577
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1020 | Acc: 0.9670 | Prec: 0.9848 | Recall: 0.9434 | F1: 0.9636 | ROC-AUC: 0.9912 || Test Loss: 0.3841 | Acc: 0.9093 | Prec: 0.9839 | Recall: 0.9173 | F1: 0.9494 | ROC-AUC: 0.9224
Max val ROC-AUC: 0.9561
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1830 | Acc: 0.9353 | Prec: 0.9533 | Recall: 0.9049 | F1: 0.9285 | ROC-AUC: 0.9759 || Test Loss: 0.1982 | Acc: 0.9209 | Prec: 0.9765 | Recall: 0.9373 | F1: 0.9565 | ROC-AUC: 0.9379
Max val ROC-AUC: 0.9728
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1128 | Acc: 0.9616 | Prec: 0.9802 | Recall: 0.9361 | F1: 0.9577 | ROC-AUC: 0.9896 || Test Loss: 0.4068 | Acc: 0.8744 | Prec: 0.9887 | Recall: 0.8747 | F1: 0.9282 | ROC-AUC: 0.9136
Max val ROC-AUC: 0.9428
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1080 | Acc: 0.9609 | Prec: 0.9824 | Recall: 0.9325 | F1: 0.9568 | ROC-AUC: 0.9900 || Test Loss: 0.4238 | Acc: 0.8791 | Prec: 0.9860 | Recall: 0.8822 | F1: 0.9312 | ROC-AUC: 0.9397
Max val ROC-AUC: 0.9580
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1069 | Acc: 0.9630 | Prec: 0.9840 | Recall: 0.9354 | F1: 0.9590 | ROC-AUC: 0.9904 || Test Loss: 0.3203 | Acc: 0.9093 | Prec: 0.9813 | Recall: 0.9198 | F1: 0.9495 | ROC-AUC: 0.9445
Max val ROC-AUC: 0.9517
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1256 | Acc: 0.9495 | Prec: 0.9782 | Recall: 0.9114 | F1: 0.9436 | ROC-AUC: 0.9879 || Test Loss: 0.4048 | Acc: 0.8907 | Prec: 0.9731 | Recall: 0.9073 | F1: 0.9390 | ROC-AUC: 0.8432
Max val ROC-AUC: 0.9479
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1176 | Acc: 0.9596 | Prec: 0.9743 | Recall: 0.9375 | F1: 0.9556 | ROC-AUC: 0.9891 || Test Loss: 0.3521 | Acc: 0.9349 | Prec: 0.9843 | Recall: 0.9449 | F1: 0.9642 | ROC-AUC: 0.9354
Max val ROC-AUC: 0.9533
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0309 | Acc: 0.9929 | Prec: 0.9920 | Recall: 0.9927 | F1: 0.9924 | ROC-AUC: 0.9990 || Test Loss: 0.3549 | Acc: 0.9116 | Prec: 0.9688 | Recall: 0.9348 | F1: 0.9515 | ROC-AUC: 0.9334
Max val ROC-AUC: 0.9480
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0157 | Acc: 0.9953 | Prec: 0.9949 | Recall: 0.9949 | F1: 0.9949 | ROC-AUC: 0.9994 || Test Loss: 0.4524 | Acc: 0.9140 | Prec: 0.9738 | Recall: 0.9323 | F1: 0.9526 | ROC-AUC: 0.9496
Max val ROC-AUC: 0.9496
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0212 | Acc: 0.9939 | Prec: 0.9935 | Recall: 0.9935 | F1: 0.9935 | ROC-AUC: 0.9994 || Test Loss: 0.4705 | Acc: 0.9256 | Prec: 0.9842 | Recall: 0.9348 | F1: 0.9589 | ROC-AUC: 0.9471
Max val ROC-AUC: 0.9741
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0196 | Acc: 0.9939 | Prec: 0.9963 | Recall: 0.9906 | F1: 0.9934 | ROC-AUC: 0.9998 || Test Loss: 0.4177 | Acc: 0.9395 | Prec: 0.9674 | Recall: 0.9674 | F1: 0.9674 | ROC-AUC: 0.8790
Max val ROC-AUC: 0.9500
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0403 | Acc: 0.9875 | Prec: 0.9898 | Recall: 0.9833 | F1: 0.9865 | ROC-AUC: 0.9987 || Test Loss: 0.3759 | Acc: 0.9349 | Prec: 0.9696 | Recall: 0.9599 | F1: 0.9647 | ROC-AUC: 0.9342
Max val ROC-AUC: 0.9477
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0147 | Acc: 0.9963 | Prec: 0.9956 | Recall: 0.9964 | F1: 0.9960 | ROC-AUC: 0.9998 || Test Loss: 0.4367 | Acc: 0.9349 | Prec: 0.9744 | Recall: 0.9549 | F1: 0.9646 | ROC-AUC: 0.9367
Max val ROC-AUC: 0.9550
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0147 | Acc: 0.9949 | Prec: 0.9964 | Recall: 0.9927 | F1: 0.9945 | ROC-AUC: 0.9999 || Test Loss: 0.4384 | Acc: 0.9302 | Prec: 0.9767 | Recall: 0.9474 | F1: 0.9618 | ROC-AUC: 0.9331
Max val ROC-AUC: 0.9503
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0246 | Acc: 0.9939 | Prec: 0.9942 | Recall: 0.9927 | F1: 0.9935 | ROC-AUC: 0.9993 || Test Loss: 0.3100 | Acc: 0.9349 | Prec: 0.9696 | Recall: 0.9599 | F1: 0.9647 | ROC-AUC: 0.9445
Max val ROC-AUC: 0.9517
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0095 | Acc: 0.9976 | Prec: 0.9985 | Recall: 0.9964 | F1: 0.9975 | ROC-AUC: 0.9999 || Test Loss: 0.4288 | Acc: 0.9256 | Prec: 0.9867 | Recall: 0.9323 | F1: 0.9588 | ROC-AUC: 0.9518
Max val ROC-AUC: 0.9639
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0147 | Acc: 0.9963 | Prec: 0.9971 | Recall: 0.9949 | F1: 0.9960 | ROC-AUC: 0.9996 || Test Loss: 0.3367 | Acc: 0.9488 | Prec: 0.9796 | Recall: 0.9649 | F1: 0.9722 | ROC-AUC: 0.9573
Max val ROC-AUC: 0.9582
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6765 | Acc: 0.6820 | Prec: 0.8336 | Recall: 0.3929 | F1: 0.5341 | ROC-AUC: 0.7242 || Test Loss: 0.8410 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.9441
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6950 | Acc: 0.5298 | Prec: 0.4828 | Recall: 0.1939 | F1: 0.2767 | ROC-AUC: 0.4893 || Test Loss: 0.9733 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.7698
Max val ROC-AUC: 0.9536
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4496 | Acc: 0.8346 | Prec: 0.7685 | Recall: 0.9208 | F1: 0.8378 | ROC-AUC: 0.8585 || Test Loss: 0.4239 | Acc: 0.9302 | Prec: 0.9671 | Recall: 0.9574 | F1: 0.9622 | ROC-AUC: 0.7693
Max val ROC-AUC: 0.8974
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4986 | Acc: 0.8107 | Prec: 0.7529 | Recall: 0.8809 | F1: 0.8119 | ROC-AUC: 0.8455 || Test Loss: 0.5667 | Acc: 0.8302 | Prec: 0.9880 | Recall: 0.8271 | F1: 0.9004 | ROC-AUC: 0.8501
Max val ROC-AUC: 0.9500
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4973 | Acc: 0.7885 | Prec: 0.9652 | Recall: 0.5643 | F1: 0.7122 | ROC-AUC: 0.7879 || Test Loss: 0.6128 | Acc: 0.6140 | Prec: 0.9957 | Recall: 0.5865 | F1: 0.7382 | ROC-AUC: 0.8898
Max val ROC-AUC: 0.9412
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6558 | Acc: 0.6541 | Prec: 0.6328 | Recall: 0.6057 | F1: 0.6189 | ROC-AUC: 0.6867 || Test Loss: 0.7038 | Acc: 0.7442 | Prec: 0.9966 | Recall: 0.7268 | F1: 0.8406 | ROC-AUC: 0.9169
Max val ROC-AUC: 0.9676
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3828 | Acc: 0.8703 | Prec: 0.8833 | Recall: 0.8301 | F1: 0.8559 | ROC-AUC: 0.9037 || Test Loss: 0.4298 | Acc: 0.8163 | Prec: 0.9938 | Recall: 0.8070 | F1: 0.8907 | ROC-AUC: 0.8713
Max val ROC-AUC: 0.9048
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.5547 | Acc: 0.7504 | Prec: 0.6695 | Recall: 0.9121 | F1: 0.7722 | ROC-AUC: 0.7820 || Test Loss: 0.8070 | Acc: 0.7791 | Prec: 0.9967 | Recall: 0.7644 | F1: 0.8652 | ROC-AUC: 0.9020
Max val ROC-AUC: 0.9426
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3979 | Acc: 0.8430 | Prec: 0.9376 | Recall: 0.7088 | F1: 0.8073 | ROC-AUC: 0.8962 || Test Loss: 0.4488 | Acc: 0.8302 | Prec: 0.9851 | Recall: 0.8296 | F1: 0.9007 | ROC-AUC: 0.9027
Max val ROC-AUC: 0.9470
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7005 | Acc: 0.5470 | Prec: 0.5537 | Recall: 0.1198 | F1: 0.1970 | ROC-AUC: 0.5257 || Test Loss: 0.5802 | Acc: 0.7837 | Prec: 0.9904 | Recall: 0.7744 | F1: 0.8692 | ROC-AUC: 0.9139
Max val ROC-AUC: 0.9467
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1426 | Acc: 0.9539 | Prec: 0.9791 | Recall: 0.9201 | F1: 0.9487 | ROC-AUC: 0.9832 || Test Loss: 0.3705 | Acc: 0.8837 | Prec: 0.9915 | Recall: 0.8822 | F1: 0.9337 | ROC-AUC: 0.9398
Max val ROC-AUC: 0.9451
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1735 | Acc: 0.9444 | Prec: 0.9727 | Recall: 0.9056 | F1: 0.9379 | ROC-AUC: 0.9777 || Test Loss: 0.2625 | Acc: 0.8977 | Prec: 0.9733 | Recall: 0.9148 | F1: 0.9432 | ROC-AUC: 0.8954
Max val ROC-AUC: 0.9271
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1333 | Acc: 0.9626 | Prec: 0.9795 | Recall: 0.9390 | F1: 0.9588 | ROC-AUC: 0.9847 || Test Loss: 0.3237 | Acc: 0.8814 | Prec: 0.9807 | Recall: 0.8897 | F1: 0.9330 | ROC-AUC: 0.9062
Max val ROC-AUC: 0.9397
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1445 | Acc: 0.9555 | Prec: 0.9822 | Recall: 0.9208 | F1: 0.9505 | ROC-AUC: 0.9812 || Test Loss: 0.3574 | Acc: 0.9302 | Prec: 0.9817 | Recall: 0.9424 | F1: 0.9616 | ROC-AUC: 0.9339
Max val ROC-AUC: 0.9453
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1512 | Acc: 0.9616 | Prec: 0.9847 | Recall: 0.9317 | F1: 0.9575 | ROC-AUC: 0.9803 || Test Loss: 0.4527 | Acc: 0.8326 | Prec: 0.9910 | Recall: 0.8271 | F1: 0.9016 | ROC-AUC: 0.9333
Max val ROC-AUC: 0.9440
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1304 | Acc: 0.9592 | Prec: 0.9801 | Recall: 0.9310 | F1: 0.9549 | ROC-AUC: 0.9867 || Test Loss: 0.3975 | Acc: 0.8814 | Prec: 0.9888 | Recall: 0.8822 | F1: 0.9325 | ROC-AUC: 0.9502
Max val ROC-AUC: 0.9502
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1336 | Acc: 0.9606 | Prec: 0.9780 | Recall: 0.9361 | F1: 0.9566 | ROC-AUC: 0.9851 || Test Loss: 0.3815 | Acc: 0.9209 | Prec: 0.9841 | Recall: 0.9298 | F1: 0.9562 | ROC-AUC: 0.9407
Max val ROC-AUC: 0.9565
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1105 | Acc: 0.9646 | Prec: 0.9833 | Recall: 0.9397 | F1: 0.9610 | ROC-AUC: 0.9886 || Test Loss: 0.5639 | Acc: 0.8837 | Prec: 0.9781 | Recall: 0.8947 | F1: 0.9346 | ROC-AUC: 0.9166
Max val ROC-AUC: 0.9455
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1662 | Acc: 0.9454 | Prec: 0.9669 | Recall: 0.9136 | F1: 0.9395 | ROC-AUC: 0.9781 || Test Loss: 0.3869 | Acc: 0.8930 | Prec: 0.9889 | Recall: 0.8947 | F1: 0.9395 | ROC-AUC: 0.9043
Max val ROC-AUC: 0.9597
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1341 | Acc: 0.9619 | Prec: 0.9766 | Recall: 0.9405 | F1: 0.9582 | ROC-AUC: 0.9849 || Test Loss: 0.3798 | Acc: 0.8860 | Prec: 0.9808 | Recall: 0.8947 | F1: 0.9358 | ROC-AUC: 0.9608
Max val ROC-AUC: 0.9679
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0414 | Acc: 0.9879 | Prec: 0.9934 | Recall: 0.9804 | F1: 0.9868 | ROC-AUC: 0.9981 || Test Loss: 0.3224 | Acc: 0.9163 | Prec: 0.9840 | Recall: 0.9248 | F1: 0.9535 | ROC-AUC: 0.9316
Max val ROC-AUC: 0.9486
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0375 | Acc: 0.9885 | Prec: 0.9927 | Recall: 0.9826 | F1: 0.9876 | ROC-AUC: 0.9987 || Test Loss: 0.4198 | Acc: 0.9256 | Prec: 0.9791 | Recall: 0.9398 | F1: 0.9591 | ROC-AUC: 0.9471
Max val ROC-AUC: 0.9577
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0359 | Acc: 0.9896 | Prec: 0.9905 | Recall: 0.9869 | F1: 0.9887 | ROC-AUC: 0.9988 || Test Loss: 0.5261 | Acc: 0.9372 | Prec: 0.9794 | Recall: 0.9524 | F1: 0.9657 | ROC-AUC: 0.9086
Max val ROC-AUC: 0.9536
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0437 | Acc: 0.9885 | Prec: 0.9927 | Recall: 0.9826 | F1: 0.9876 | ROC-AUC: 0.9975 || Test Loss: 0.4201 | Acc: 0.9256 | Prec: 0.9742 | Recall: 0.9449 | F1: 0.9593 | ROC-AUC: 0.9221
Max val ROC-AUC: 0.9547
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0341 | Acc: 0.9926 | Prec: 0.9927 | Recall: 0.9913 | F1: 0.9920 | ROC-AUC: 0.9981 || Test Loss: 0.4745 | Acc: 0.9233 | Prec: 0.9791 | Recall: 0.9373 | F1: 0.9577 | ROC-AUC: 0.9230
Max val ROC-AUC: 0.9617
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0239 | Acc: 0.9946 | Prec: 0.9949 | Recall: 0.9935 | F1: 0.9942 | ROC-AUC: 0.9991 || Test Loss: 0.5654 | Acc: 0.8907 | Prec: 0.9783 | Recall: 0.9023 | F1: 0.9387 | ROC-AUC: 0.8905
Max val ROC-AUC: 0.9356
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0214 | Acc: 0.9943 | Prec: 0.9978 | Recall: 0.9898 | F1: 0.9938 | ROC-AUC: 0.9994 || Test Loss: 0.4698 | Acc: 0.9209 | Prec: 0.9790 | Recall: 0.9348 | F1: 0.9564 | ROC-AUC: 0.9455
Max val ROC-AUC: 0.9515
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0305 | Acc: 0.9902 | Prec: 0.9934 | Recall: 0.9855 | F1: 0.9894 | ROC-AUC: 0.9993 || Test Loss: 0.5123 | Acc: 0.9279 | Prec: 0.9792 | Recall: 0.9424 | F1: 0.9604 | ROC-AUC: 0.9318
Max val ROC-AUC: 0.9558
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0144 | Acc: 0.9963 | Prec: 0.9971 | Recall: 0.9949 | F1: 0.9960 | ROC-AUC: 0.9997 || Test Loss: 0.4901 | Acc: 0.9279 | Prec: 0.9767 | Recall: 0.9449 | F1: 0.9605 | ROC-AUC: 0.9212
Max val ROC-AUC: 0.9445
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0418 | Acc: 0.9896 | Prec: 0.9920 | Recall: 0.9855 | F1: 0.9887 | ROC-AUC: 0.9980 || Test Loss: 0.5853 | Acc: 0.8767 | Prec: 0.9806 | Recall: 0.8847 | F1: 0.9302 | ROC-AUC: 0.9114
Max val ROC-AUC: 0.9537
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6931 | Acc: 0.5217 | Prec: 0.4581 | Recall: 0.1707 | F1: 0.2487 | ROC-AUC: 0.5106 || Test Loss: 0.8629 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6918 | Acc: 0.5342 | Prec: 0.4918 | Recall: 0.1314 | F1: 0.2074 | ROC-AUC: 0.5012 || Test Loss: 0.8455 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6944 | Acc: 0.5153 | Prec: 0.4365 | Recall: 0.1547 | F1: 0.2284 | ROC-AUC: 0.4925 || Test Loss: 0.7183 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6972 | Acc: 0.5248 | Prec: 0.4627 | Recall: 0.1532 | F1: 0.2302 | ROC-AUC: 0.4872 || Test Loss: 0.7468 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.2845
Max val ROC-AUC: 0.9612
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6929 | Acc: 0.5271 | Prec: 0.4802 | Recall: 0.2375 | F1: 0.3178 | ROC-AUC: 0.5105 || Test Loss: 0.6351 | Acc: 0.9279 | Prec: 0.9279 | Recall: 1.0000 | F1: 0.9626 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.9214
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7211 | Acc: 0.5086 | Prec: 0.3915 | Recall: 0.1075 | F1: 0.1687 | ROC-AUC: 0.4923 || Test Loss: 0.7129 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6947 | Acc: 0.5190 | Prec: 0.4602 | Recall: 0.2142 | F1: 0.2924 | ROC-AUC: 0.4962 || Test Loss: 0.7923 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.9378
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6995 | Acc: 0.5147 | Prec: 0.4604 | Recall: 0.2702 | F1: 0.3405 | ROC-AUC: 0.4800 || Test Loss: 0.8243 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6973 | Acc: 0.5147 | Prec: 0.4459 | Recall: 0.1917 | F1: 0.2682 | ROC-AUC: 0.4789 || Test Loss: 0.7522 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6929 | Acc: 0.5113 | Prec: 0.4662 | Recall: 0.3711 | F1: 0.4133 | ROC-AUC: 0.5210 || Test Loss: 1.0140 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.9152
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1340 | Acc: 0.9613 | Prec: 0.9802 | Recall: 0.9354 | F1: 0.9573 | ROC-AUC: 0.9829 || Test Loss: 0.5160 | Acc: 0.8558 | Prec: 0.9856 | Recall: 0.8571 | F1: 0.9169 | ROC-AUC: 0.9185
Max val ROC-AUC: 0.9533
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1461 | Acc: 0.9582 | Prec: 0.9764 | Recall: 0.9325 | F1: 0.9539 | ROC-AUC: 0.9850 || Test Loss: 0.4183 | Acc: 0.8628 | Prec: 0.9830 | Recall: 0.8672 | F1: 0.9214 | ROC-AUC: 0.9176
Max val ROC-AUC: 0.9379
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1926 | Acc: 0.9340 | Prec: 0.9646 | Recall: 0.8903 | F1: 0.9260 | ROC-AUC: 0.9706 || Test Loss: 0.5107 | Acc: 0.8674 | Prec: 0.9858 | Recall: 0.8697 | F1: 0.9241 | ROC-AUC: 0.9082
Max val ROC-AUC: 0.9420
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2115 | Acc: 0.9279 | Prec: 0.9648 | Recall: 0.8765 | F1: 0.9186 | ROC-AUC: 0.9685 || Test Loss: 0.3209 | Acc: 0.8814 | Prec: 0.9888 | Recall: 0.8822 | F1: 0.9325 | ROC-AUC: 0.9320
Max val ROC-AUC: 0.9566
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1636 | Acc: 0.9586 | Prec: 0.9736 | Recall: 0.9361 | F1: 0.9545 | ROC-AUC: 0.9826 || Test Loss: 0.4104 | Acc: 0.9023 | Prec: 0.9864 | Recall: 0.9073 | F1: 0.9452 | ROC-AUC: 0.9113
Max val ROC-AUC: 0.9330
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1639 | Acc: 0.9572 | Prec: 0.9699 | Recall: 0.9368 | F1: 0.9531 | ROC-AUC: 0.9813 || Test Loss: 0.2817 | Acc: 0.9070 | Prec: 0.9864 | Recall: 0.9123 | F1: 0.9479 | ROC-AUC: 0.9637
Max val ROC-AUC: 0.9637
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1241 | Acc: 0.9653 | Prec: 0.9855 | Recall: 0.9390 | F1: 0.9617 | ROC-AUC: 0.9838 || Test Loss: 0.5383 | Acc: 0.8977 | Prec: 0.9810 | Recall: 0.9073 | F1: 0.9427 | ROC-AUC: 0.9065
Max val ROC-AUC: 0.9673
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1263 | Acc: 0.9653 | Prec: 0.9833 | Recall: 0.9412 | F1: 0.9618 | ROC-AUC: 0.9844 || Test Loss: 0.4896 | Acc: 0.8721 | Prec: 0.9831 | Recall: 0.8772 | F1: 0.9272 | ROC-AUC: 0.9226
Max val ROC-AUC: 0.9415
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1145 | Acc: 0.9656 | Prec: 0.9841 | Recall: 0.9412 | F1: 0.9621 | ROC-AUC: 0.9891 || Test Loss: 0.4639 | Acc: 0.8698 | Prec: 0.9831 | Recall: 0.8747 | F1: 0.9257 | ROC-AUC: 0.9196
Max val ROC-AUC: 0.9486
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1506 | Acc: 0.9559 | Prec: 0.9741 | Recall: 0.9296 | F1: 0.9513 | ROC-AUC: 0.9824 || Test Loss: 0.6201 | Acc: 0.8837 | Prec: 0.9861 | Recall: 0.8872 | F1: 0.9340 | ROC-AUC: 0.9543
Max val ROC-AUC: 0.9543
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0533 | Acc: 0.9821 | Prec: 0.9882 | Recall: 0.9731 | F1: 0.9806 | ROC-AUC: 0.9976 || Test Loss: 0.5268 | Acc: 0.9140 | Prec: 0.9866 | Recall: 0.9198 | F1: 0.9520 | ROC-AUC: 0.9449
Max val ROC-AUC: 0.9609
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0551 | Acc: 0.9842 | Prec: 0.9890 | Recall: 0.9768 | F1: 0.9828 | ROC-AUC: 0.9974 || Test Loss: 0.3759 | Acc: 0.9186 | Prec: 0.9866 | Recall: 0.9248 | F1: 0.9547 | ROC-AUC: 0.9584
Max val ROC-AUC: 0.9584
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0565 | Acc: 0.9842 | Prec: 0.9897 | Recall: 0.9760 | F1: 0.9828 | ROC-AUC: 0.9967 || Test Loss: 0.4499 | Acc: 0.9140 | Prec: 0.9840 | Recall: 0.9223 | F1: 0.9521 | ROC-AUC: 0.9249
Max val ROC-AUC: 0.9609
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0682 | Acc: 0.9808 | Prec: 0.9889 | Recall: 0.9695 | F1: 0.9791 | ROC-AUC: 0.9954 || Test Loss: 0.4079 | Acc: 0.9163 | Prec: 0.9789 | Recall: 0.9298 | F1: 0.9537 | ROC-AUC: 0.9137
Max val ROC-AUC: 0.9484
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0507 | Acc: 0.9832 | Prec: 0.9875 | Recall: 0.9760 | F1: 0.9817 | ROC-AUC: 0.9979 || Test Loss: 0.5117 | Acc: 0.9256 | Prec: 0.9766 | Recall: 0.9424 | F1: 0.9592 | ROC-AUC: 0.9371
Max val ROC-AUC: 0.9389
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0505 | Acc: 0.9801 | Prec: 0.9846 | Recall: 0.9724 | F1: 0.9784 | ROC-AUC: 0.9981 || Test Loss: 0.5942 | Acc: 0.9000 | Prec: 0.9811 | Recall: 0.9098 | F1: 0.9441 | ROC-AUC: 0.9214
Max val ROC-AUC: 0.9521
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0410 | Acc: 0.9882 | Prec: 0.9934 | Recall: 0.9811 | F1: 0.9872 | ROC-AUC: 0.9980 || Test Loss: 0.4639 | Acc: 0.9209 | Prec: 0.9815 | Recall: 0.9323 | F1: 0.9563 | ROC-AUC: 0.9026
Max val ROC-AUC: 0.9477
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0561 | Acc: 0.9855 | Prec: 0.9883 | Recall: 0.9804 | F1: 0.9843 | ROC-AUC: 0.9961 || Test Loss: 0.4010 | Acc: 0.9256 | Prec: 0.9791 | Recall: 0.9398 | F1: 0.9591 | ROC-AUC: 0.9321
Max val ROC-AUC: 0.9541
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0448 | Acc: 0.9875 | Prec: 0.9919 | Recall: 0.9811 | F1: 0.9865 | ROC-AUC: 0.9976 || Test Loss: 0.4474 | Acc: 0.9349 | Prec: 0.9769 | Recall: 0.9524 | F1: 0.9645 | ROC-AUC: 0.9198
Max val ROC-AUC: 0.9578
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0565 | Acc: 0.9842 | Prec: 0.9933 | Recall: 0.9724 | F1: 0.9828 | ROC-AUC: 0.9970 || Test Loss: 0.4531 | Acc: 0.9302 | Prec: 0.9843 | Recall: 0.9398 | F1: 0.9615 | ROC-AUC: 0.9375
Max val ROC-AUC: 0.9375
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1971 | Acc: 0.9350 | Prec: 0.9625 | Recall: 0.8947 | F1: 0.9274 | ROC-AUC: 0.9730 || Test Loss: 0.5678 | Acc: 0.9488 | Prec: 0.9821 | Recall: 0.9624 | F1: 0.9722 | ROC-AUC: 0.9002
Max val ROC-AUC: 0.9821
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1815 | Acc: 0.9481 | Prec: 0.9781 | Recall: 0.9085 | F1: 0.9420 | ROC-AUC: 0.9624 || Test Loss: 0.5640 | Acc: 0.7930 | Prec: 0.9936 | Recall: 0.7820 | F1: 0.8752 | ROC-AUC: 0.8833
Max val ROC-AUC: 0.9522
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2648 | Acc: 0.9128 | Prec: 0.9313 | Recall: 0.8765 | F1: 0.9031 | ROC-AUC: 0.9592 || Test Loss: 0.2867 | Acc: 0.9279 | Prec: 0.9444 | Recall: 0.9799 | F1: 0.9619 | ROC-AUC: 0.9294
Max val ROC-AUC: 0.9570
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1620 | Acc: 0.9444 | Prec: 0.9543 | Recall: 0.9245 | F1: 0.9391 | ROC-AUC: 0.9755 || Test Loss: 0.2935 | Acc: 0.8953 | Prec: 0.9836 | Recall: 0.9023 | F1: 0.9412 | ROC-AUC: 0.8966
Max val ROC-AUC: 0.9460
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1129 | Acc: 0.9619 | Prec: 0.9661 | Recall: 0.9513 | F1: 0.9587 | ROC-AUC: 0.9904 || Test Loss: 0.2891 | Acc: 0.9000 | Prec: 0.9944 | Recall: 0.8972 | F1: 0.9433 | ROC-AUC: 0.9542
Max val ROC-AUC: 0.9705
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6938 | Acc: 0.5268 | Prec: 0.4635 | Recall: 0.1293 | F1: 0.2022 | ROC-AUC: 0.4908 || Test Loss: 0.8544 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.9486
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1396 | Acc: 0.9522 | Prec: 0.9632 | Recall: 0.9325 | F1: 0.9476 | ROC-AUC: 0.9868 || Test Loss: 0.8928 | Acc: 0.6721 | Prec: 1.0000 | Recall: 0.6466 | F1: 0.7854 | ROC-AUC: 0.9673
Max val ROC-AUC: 0.9673
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1495 | Acc: 0.9532 | Prec: 0.9592 | Recall: 0.9390 | F1: 0.9490 | ROC-AUC: 0.9810 || Test Loss: 1.4528 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.9497
Max val ROC-AUC: 0.9623
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4340 | Acc: 0.8424 | Prec: 0.9759 | Recall: 0.6768 | F1: 0.7993 | ROC-AUC: 0.8551 || Test Loss: 0.5266 | Acc: 0.7930 | Prec: 0.9905 | Recall: 0.7845 | F1: 0.8755 | ROC-AUC: 0.8776
Max val ROC-AUC: 0.9386
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2565 | Acc: 0.9286 | Prec: 0.9605 | Recall: 0.8824 | F1: 0.9198 | ROC-AUC: 0.9457 || Test Loss: 0.2661 | Acc: 0.9233 | Prec: 0.9766 | Recall: 0.9398 | F1: 0.9579 | ROC-AUC: 0.8719
Max val ROC-AUC: 0.9559
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0481 | Acc: 0.9832 | Prec: 0.9804 | Recall: 0.9833 | F1: 0.9819 | ROC-AUC: 0.9981 || Test Loss: 0.3212 | Acc: 0.9256 | Prec: 0.9791 | Recall: 0.9398 | F1: 0.9591 | ROC-AUC: 0.9375
Max val ROC-AUC: 0.9439
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0660 | Acc: 0.9778 | Prec: 0.9859 | Recall: 0.9659 | F1: 0.9758 | ROC-AUC: 0.9958 || Test Loss: 0.2892 | Acc: 0.9140 | Prec: 0.9814 | Recall: 0.9248 | F1: 0.9523 | ROC-AUC: 0.9285
Max val ROC-AUC: 0.9493
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0651 | Acc: 0.9805 | Prec: 0.9824 | Recall: 0.9753 | F1: 0.9789 | ROC-AUC: 0.9949 || Test Loss: 0.2832 | Acc: 0.9163 | Prec: 0.9892 | Recall: 0.9198 | F1: 0.9532 | ROC-AUC: 0.9521
Max val ROC-AUC: 0.9580
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0614 | Acc: 0.9821 | Prec: 0.9860 | Recall: 0.9753 | F1: 0.9806 | ROC-AUC: 0.9962 || Test Loss: 0.2532 | Acc: 0.9140 | Prec: 0.9892 | Recall: 0.9173 | F1: 0.9519 | ROC-AUC: 0.9686
Max val ROC-AUC: 0.9686
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0453 | Acc: 0.9879 | Prec: 0.9898 | Recall: 0.9840 | F1: 0.9869 | ROC-AUC: 0.9979 || Test Loss: 0.4755 | Acc: 0.8651 | Prec: 0.9885 | Recall: 0.8647 | F1: 0.9225 | ROC-AUC: 0.9468
Max val ROC-AUC: 0.9601
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0406 | Acc: 0.9872 | Prec: 0.9862 | Recall: 0.9862 | F1: 0.9862 | ROC-AUC: 0.9985 || Test Loss: 0.2319 | Acc: 0.9419 | Prec: 0.9895 | Recall: 0.9474 | F1: 0.9680 | ROC-AUC: 0.9636
Max val ROC-AUC: 0.9731
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0426 | Acc: 0.9869 | Prec: 0.9876 | Recall: 0.9840 | F1: 0.9858 | ROC-AUC: 0.9984 || Test Loss: 0.2637 | Acc: 0.9326 | Prec: 0.9744 | Recall: 0.9524 | F1: 0.9632 | ROC-AUC: 0.9576
Max val ROC-AUC: 0.9576
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0512 | Acc: 0.9842 | Prec: 0.9840 | Recall: 0.9818 | F1: 0.9829 | ROC-AUC: 0.9976 || Test Loss: 0.1762 | Acc: 0.9512 | Prec: 0.9922 | Recall: 0.9549 | F1: 0.9732 | ROC-AUC: 0.9779
Max val ROC-AUC: 0.9779
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0504 | Acc: 0.9865 | Prec: 0.9883 | Recall: 0.9826 | F1: 0.9854 | ROC-AUC: 0.9973 || Test Loss: 0.2655 | Acc: 0.9372 | Prec: 0.9769 | Recall: 0.9549 | F1: 0.9658 | ROC-AUC: 0.9386
Max val ROC-AUC: 0.9481
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0528 | Acc: 0.9865 | Prec: 0.9890 | Recall: 0.9818 | F1: 0.9854 | ROC-AUC: 0.9968 || Test Loss: 0.3166 | Acc: 0.9419 | Prec: 0.9820 | Recall: 0.9549 | F1: 0.9682 | ROC-AUC: 0.9605
Max val ROC-AUC: 0.9694
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0211 | Acc: 0.9966 | Prec: 0.9964 | Recall: 0.9964 | F1: 0.9964 | ROC-AUC: 0.9991 || Test Loss: 0.4057 | Acc: 0.9256 | Prec: 0.9816 | Recall: 0.9373 | F1: 0.9590 | ROC-AUC: 0.9354
Max val ROC-AUC: 0.9443
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0205 | Acc: 0.9936 | Prec: 0.9942 | Recall: 0.9920 | F1: 0.9931 | ROC-AUC: 0.9995 || Test Loss: 0.3147 | Acc: 0.9395 | Prec: 0.9722 | Recall: 0.9624 | F1: 0.9673 | ROC-AUC: 0.9307
Max val ROC-AUC: 0.9563
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0638 | Acc: 0.9828 | Prec: 0.9749 | Recall: 0.9884 | F1: 0.9816 | ROC-AUC: 0.9961 || Test Loss: 0.5028 | Acc: 0.8628 | Prec: 0.9802 | Recall: 0.8697 | F1: 0.9216 | ROC-AUC: 0.8909
Max val ROC-AUC: 0.9572
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0422 | Acc: 0.9879 | Prec: 0.9891 | Recall: 0.9847 | F1: 0.9869 | ROC-AUC: 0.9982 || Test Loss: 0.3585 | Acc: 0.9116 | Prec: 0.9892 | Recall: 0.9148 | F1: 0.9505 | ROC-AUC: 0.9333
Max val ROC-AUC: 0.9591
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0135 | Acc: 0.9960 | Prec: 0.9964 | Recall: 0.9949 | F1: 0.9956 | ROC-AUC: 0.9998 || Test Loss: 0.3623 | Acc: 0.9326 | Prec: 0.9818 | Recall: 0.9449 | F1: 0.9630 | ROC-AUC: 0.9223
Max val ROC-AUC: 0.9599
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0225 | Acc: 0.9939 | Prec: 0.9913 | Recall: 0.9956 | F1: 0.9935 | ROC-AUC: 0.9992 || Test Loss: 0.4496 | Acc: 0.9070 | Prec: 0.9812 | Recall: 0.9173 | F1: 0.9482 | ROC-AUC: 0.9533
Max val ROC-AUC: 0.9661
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0149 | Acc: 0.9946 | Prec: 0.9942 | Recall: 0.9942 | F1: 0.9942 | ROC-AUC: 0.9997 || Test Loss: 0.5797 | Acc: 0.8930 | Prec: 0.9809 | Recall: 0.9023 | F1: 0.9399 | ROC-AUC: 0.9217
Max val ROC-AUC: 0.9527
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0276 | Acc: 0.9923 | Prec: 0.9927 | Recall: 0.9906 | F1: 0.9916 | ROC-AUC: 0.9988 || Test Loss: 0.3527 | Acc: 0.9209 | Prec: 0.9815 | Recall: 0.9323 | F1: 0.9563 | ROC-AUC: 0.9409
Max val ROC-AUC: 0.9587
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0289 | Acc: 0.9916 | Prec: 0.9913 | Recall: 0.9906 | F1: 0.9909 | ROC-AUC: 0.9994 || Test Loss: 0.5522 | Acc: 0.9023 | Prec: 0.9811 | Recall: 0.9123 | F1: 0.9455 | ROC-AUC: 0.9383
Max val ROC-AUC: 0.9552
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0134 | Acc: 0.9973 | Prec: 0.9971 | Recall: 0.9971 | F1: 0.9971 | ROC-AUC: 0.9996 || Test Loss: 0.4025 | Acc: 0.9279 | Prec: 0.9842 | Recall: 0.9373 | F1: 0.9602 | ROC-AUC: 0.9360
Max val ROC-AUC: 0.9618
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2293 | Acc: 0.9340 | Prec: 0.9631 | Recall: 0.8918 | F1: 0.9261 | ROC-AUC: 0.9648 || Test Loss: 0.4298 | Acc: 0.8209 | Prec: 0.9909 | Recall: 0.8145 | F1: 0.8941 | ROC-AUC: 0.9066
Max val ROC-AUC: 0.9460
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2373 | Acc: 0.9259 | Prec: 0.9753 | Recall: 0.8620 | F1: 0.9152 | ROC-AUC: 0.9465 || Test Loss: 0.5209 | Acc: 0.8163 | Prec: 0.9908 | Recall: 0.8095 | F1: 0.8910 | ROC-AUC: 0.8763
Max val ROC-AUC: 0.9068
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3504 | Acc: 0.8747 | Prec: 0.9491 | Recall: 0.7712 | F1: 0.8510 | ROC-AUC: 0.9121 || Test Loss: 0.4995 | Acc: 0.7023 | Prec: 0.9927 | Recall: 0.6842 | F1: 0.8101 | ROC-AUC: 0.8388
Max val ROC-AUC: 0.9416
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4048 | Acc: 0.8602 | Prec: 0.8099 | Recall: 0.9129 | F1: 0.8583 | ROC-AUC: 0.9095 || Test Loss: 0.6704 | Acc: 0.9279 | Prec: 0.9842 | Recall: 0.9373 | F1: 0.9602 | ROC-AUC: 0.8799
Max val ROC-AUC: 0.9586
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2619 | Acc: 0.9017 | Prec: 0.9400 | Recall: 0.8417 | F1: 0.8881 | ROC-AUC: 0.9557 || Test Loss: 0.4021 | Acc: 0.8000 | Prec: 0.9937 | Recall: 0.7895 | F1: 0.8799 | ROC-AUC: 0.9163
Max val ROC-AUC: 0.9472
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2530 | Acc: 0.9087 | Prec: 0.8777 | Recall: 0.9332 | F1: 0.9046 | ROC-AUC: 0.9650 || Test Loss: 0.2699 | Acc: 0.9326 | Prec: 0.9768 | Recall: 0.9499 | F1: 0.9632 | ROC-AUC: 0.9275
Max val ROC-AUC: 0.9550
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6958 | Acc: 0.5069 | Prec: 0.4383 | Recall: 0.2244 | F1: 0.2968 | ROC-AUC: 0.4982 || Test Loss: 0.7983 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2525 | Acc: 0.9128 | Prec: 0.9530 | Recall: 0.8540 | F1: 0.9008 | ROC-AUC: 0.9476 || Test Loss: 0.5329 | Acc: 0.7372 | Prec: 0.9965 | Recall: 0.7193 | F1: 0.8355 | ROC-AUC: 0.8448
Max val ROC-AUC: 0.9357
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2278 | Acc: 0.9272 | Prec: 0.9510 | Recall: 0.8889 | F1: 0.9189 | ROC-AUC: 0.9656 || Test Loss: 0.4117 | Acc: 0.7744 | Prec: 0.9903 | Recall: 0.7644 | F1: 0.8628 | ROC-AUC: 0.9213
Max val ROC-AUC: 0.9424
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2770 | Acc: 0.9107 | Prec: 0.9399 | Recall: 0.8627 | F1: 0.8997 | ROC-AUC: 0.9469 || Test Loss: 0.4873 | Acc: 0.7419 | Prec: 0.9932 | Recall: 0.7268 | F1: 0.8394 | ROC-AUC: 0.8737
Max val ROC-AUC: 0.9536
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1405 | Acc: 0.9572 | Prec: 0.9786 | Recall: 0.9281 | F1: 0.9527 | ROC-AUC: 0.9824 || Test Loss: 0.2874 | Acc: 0.9093 | Prec: 0.9839 | Recall: 0.9173 | F1: 0.9494 | ROC-AUC: 0.9327
Max val ROC-AUC: 0.9599
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1621 | Acc: 0.9424 | Prec: 0.9548 | Recall: 0.9194 | F1: 0.9367 | ROC-AUC: 0.9826 || Test Loss: 0.3820 | Acc: 0.8884 | Prec: 0.9808 | Recall: 0.8972 | F1: 0.9372 | ROC-AUC: 0.8820
Max val ROC-AUC: 0.9618
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1349 | Acc: 0.9579 | Prec: 0.9793 | Recall: 0.9288 | F1: 0.9534 | ROC-AUC: 0.9847 || Test Loss: 0.3056 | Acc: 0.8953 | Prec: 0.9863 | Recall: 0.8997 | F1: 0.9410 | ROC-AUC: 0.9076
Max val ROC-AUC: 0.9557
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0811 | Acc: 0.9754 | Prec: 0.9902 | Recall: 0.9564 | F1: 0.9730 | ROC-AUC: 0.9931 || Test Loss: 0.3718 | Acc: 0.9256 | Prec: 0.9816 | Recall: 0.9373 | F1: 0.9590 | ROC-AUC: 0.8911
Max val ROC-AUC: 0.9358
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1125 | Acc: 0.9606 | Prec: 0.9751 | Recall: 0.9390 | F1: 0.9567 | ROC-AUC: 0.9888 || Test Loss: 0.2811 | Acc: 0.8721 | Prec: 0.9914 | Recall: 0.8697 | F1: 0.9266 | ROC-AUC: 0.9450
Max val ROC-AUC: 0.9450
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0946 | Acc: 0.9741 | Prec: 0.9851 | Recall: 0.9586 | F1: 0.9717 | ROC-AUC: 0.9902 || Test Loss: 0.2778 | Acc: 0.9349 | Prec: 0.9869 | Recall: 0.9424 | F1: 0.9641 | ROC-AUC: 0.9610
Max val ROC-AUC: 0.9610
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1308 | Acc: 0.9552 | Prec: 0.9770 | Recall: 0.9252 | F1: 0.9504 | ROC-AUC: 0.9851 || Test Loss: 0.5684 | Acc: 0.6698 | Prec: 0.9923 | Recall: 0.6491 | F1: 0.7848 | ROC-AUC: 0.9268
Max val ROC-AUC: 0.9452
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1421 | Acc: 0.9542 | Prec: 0.9792 | Recall: 0.9208 | F1: 0.9491 | ROC-AUC: 0.9829 || Test Loss: 0.3814 | Acc: 0.9093 | Prec: 0.9865 | Recall: 0.9148 | F1: 0.9493 | ROC-AUC: 0.9238
Max val ROC-AUC: 0.9561
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1717 | Acc: 0.9394 | Prec: 0.9701 | Recall: 0.8969 | F1: 0.9321 | ROC-AUC: 0.9767 || Test Loss: 0.4019 | Acc: 0.8767 | Prec: 0.9887 | Recall: 0.8772 | F1: 0.9296 | ROC-AUC: 0.9333
Max val ROC-AUC: 0.9509
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1086 | Acc: 0.9673 | Prec: 0.9834 | Recall: 0.9455 | F1: 0.9641 | ROC-AUC: 0.9904 || Test Loss: 0.3626 | Acc: 0.9023 | Prec: 0.9837 | Recall: 0.9098 | F1: 0.9453 | ROC-AUC: 0.9517
Max val ROC-AUC: 0.9517
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0552 | Acc: 0.9825 | Prec: 0.9875 | Recall: 0.9746 | F1: 0.9810 | ROC-AUC: 0.9972 || Test Loss: 0.4550 | Acc: 0.8977 | Prec: 0.9784 | Recall: 0.9098 | F1: 0.9429 | ROC-AUC: 0.9234
Max val ROC-AUC: 0.9568
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0358 | Acc: 0.9879 | Prec: 0.9876 | Recall: 0.9862 | F1: 0.9869 | ROC-AUC: 0.9987 || Test Loss: 0.2921 | Acc: 0.9326 | Prec: 0.9818 | Recall: 0.9449 | F1: 0.9630 | ROC-AUC: 0.9112
Max val ROC-AUC: 0.9537
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0187 | Acc: 0.9946 | Prec: 0.9978 | Recall: 0.9906 | F1: 0.9942 | ROC-AUC: 0.9995 || Test Loss: 0.5126 | Acc: 0.9256 | Prec: 0.9766 | Recall: 0.9424 | F1: 0.9592 | ROC-AUC: 0.9319
Max val ROC-AUC: 0.9707
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0085 | Acc: 0.9976 | Prec: 0.9985 | Recall: 0.9964 | F1: 0.9975 | ROC-AUC: 0.9999 || Test Loss: 0.5757 | Acc: 0.9163 | Prec: 0.9789 | Recall: 0.9298 | F1: 0.9537 | ROC-AUC: 0.8991
Max val ROC-AUC: 0.9476
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0407 | Acc: 0.9882 | Prec: 0.9912 | Recall: 0.9833 | F1: 0.9872 | ROC-AUC: 0.9983 || Test Loss: 0.4432 | Acc: 0.8721 | Prec: 0.9859 | Recall: 0.8747 | F1: 0.9270 | ROC-AUC: 0.9374
Max val ROC-AUC: 0.9404
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0253 | Acc: 0.9919 | Prec: 0.9920 | Recall: 0.9906 | F1: 0.9913 | ROC-AUC: 0.9993 || Test Loss: 0.4121 | Acc: 0.9326 | Prec: 0.9843 | Recall: 0.9424 | F1: 0.9629 | ROC-AUC: 0.9382
Max val ROC-AUC: 0.9382
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0287 | Acc: 0.9899 | Prec: 0.9877 | Recall: 0.9906 | F1: 0.9891 | ROC-AUC: 0.9994 || Test Loss: 0.4529 | Acc: 0.9093 | Prec: 0.9813 | Recall: 0.9198 | F1: 0.9495 | ROC-AUC: 0.9264
Max val ROC-AUC: 0.9553
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0283 | Acc: 0.9923 | Prec: 0.9906 | Recall: 0.9927 | F1: 0.9917 | ROC-AUC: 0.9993 || Test Loss: 0.4645 | Acc: 0.9163 | Prec: 0.9840 | Recall: 0.9248 | F1: 0.9535 | ROC-AUC: 0.9449
Max val ROC-AUC: 0.9449
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0521 | Acc: 0.9848 | Prec: 0.9819 | Recall: 0.9855 | F1: 0.9837 | ROC-AUC: 0.9972 || Test Loss: 0.5553 | Acc: 0.8977 | Prec: 0.9863 | Recall: 0.9023 | F1: 0.9424 | ROC-AUC: 0.9262
Max val ROC-AUC: 0.9549
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0218 | Acc: 0.9939 | Prec: 0.9963 | Recall: 0.9906 | F1: 0.9934 | ROC-AUC: 0.9994 || Test Loss: 0.3476 | Acc: 0.9349 | Prec: 0.9720 | Recall: 0.9574 | F1: 0.9646 | ROC-AUC: 0.9332
Max val ROC-AUC: 0.9511
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4789 | Acc: 0.7868 | Prec: 0.7004 | Recall: 0.9441 | F1: 0.8042 | ROC-AUC: 0.8381 || Test Loss: 0.6883 | Acc: 0.9442 | Prec: 0.9820 | Recall: 0.9574 | F1: 0.9695 | ROC-AUC: 0.8643
Max val ROC-AUC: 0.9324
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.5764 | Acc: 0.7127 | Prec: 0.6454 | Recall: 0.8446 | F1: 0.7317 | ROC-AUC: 0.7723 || Test Loss: 0.6319 | Acc: 0.7326 | Prec: 0.9931 | Recall: 0.7168 | F1: 0.8326 | ROC-AUC: 0.9142
Max val ROC-AUC: 0.9365
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7017 | Acc: 0.5285 | Prec: 0.4744 | Recall: 0.1547 | F1: 0.2333 | ROC-AUC: 0.5068 || Test Loss: 0.7713 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.7672
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6575 | Acc: 0.6093 | Prec: 0.5752 | Recall: 0.6028 | F1: 0.5887 | ROC-AUC: 0.6561 || Test Loss: 0.4197 | Acc: 0.9186 | Prec: 0.9840 | Recall: 0.9273 | F1: 0.9548 | ROC-AUC: 0.8909
Max val ROC-AUC: 0.9616
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.5136 | Acc: 0.7578 | Prec: 0.8257 | Recall: 0.6057 | F1: 0.6988 | ROC-AUC: 0.8390 || Test Loss: 0.8099 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5002
Max val ROC-AUC: 0.9537
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6979 | Acc: 0.5258 | Prec: 0.4793 | Recall: 0.2600 | F1: 0.3371 | ROC-AUC: 0.4930 || Test Loss: 0.6736 | Acc: 0.9279 | Prec: 0.9279 | Recall: 1.0000 | F1: 0.9626 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.9324
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4847 | Acc: 0.8107 | Prec: 0.9249 | Recall: 0.6442 | F1: 0.7594 | ROC-AUC: 0.8499 || Test Loss: 0.3572 | Acc: 0.7070 | Prec: 0.9928 | Recall: 0.6892 | F1: 0.8136 | ROC-AUC: 0.9050
Max val ROC-AUC: 0.9485
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4571 | Acc: 0.8249 | Prec: 0.8219 | Recall: 0.7945 | F1: 0.8080 | ROC-AUC: 0.8808 || Test Loss: 0.9840 | Acc: 0.6860 | Prec: 0.9962 | Recall: 0.6642 | F1: 0.7970 | ROC-AUC: 0.9244
Max val ROC-AUC: 0.9546
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6948 | Acc: 0.5224 | Prec: 0.4489 | Recall: 0.1307 | F1: 0.2025 | ROC-AUC: 0.4945 || Test Loss: 0.8076 | Acc: 0.0837 | Prec: 1.0000 | Recall: 0.0125 | F1: 0.0248 | ROC-AUC: 0.5075
Max val ROC-AUC: 0.7643
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.5516 | Acc: 0.7305 | Prec: 0.6627 | Recall: 0.8533 | F1: 0.7460 | ROC-AUC: 0.7811 || Test Loss: 0.7272 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.7826
Max val ROC-AUC: 0.9325
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1663 | Acc: 0.9498 | Prec: 0.9637 | Recall: 0.9267 | F1: 0.9448 | ROC-AUC: 0.9784 || Test Loss: 0.8269 | Acc: 0.7953 | Prec: 0.9875 | Recall: 0.7895 | F1: 0.8774 | ROC-AUC: 0.8976
Max val ROC-AUC: 0.9514
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1692 | Acc: 0.9464 | Prec: 0.9656 | Recall: 0.9172 | F1: 0.9408 | ROC-AUC: 0.9800 || Test Loss: 0.6767 | Acc: 0.8372 | Prec: 0.9910 | Recall: 0.8321 | F1: 0.9046 | ROC-AUC: 0.8951
Max val ROC-AUC: 0.9648
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2078 | Acc: 0.9326 | Prec: 0.9455 | Recall: 0.9070 | F1: 0.9259 | ROC-AUC: 0.9741 || Test Loss: 0.4386 | Acc: 0.7930 | Prec: 0.9905 | Recall: 0.7845 | F1: 0.8755 | ROC-AUC: 0.9191
Max val ROC-AUC: 0.9241
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1191 | Acc: 0.9673 | Prec: 0.9769 | Recall: 0.9521 | F1: 0.9643 | ROC-AUC: 0.9883 || Test Loss: 0.3409 | Acc: 0.9093 | Prec: 0.9813 | Recall: 0.9198 | F1: 0.9495 | ROC-AUC: 0.9358
Max val ROC-AUC: 0.9466
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1270 | Acc: 0.9572 | Prec: 0.9742 | Recall: 0.9325 | F1: 0.9529 | ROC-AUC: 0.9857 || Test Loss: 0.4631 | Acc: 0.8465 | Prec: 0.9883 | Recall: 0.8446 | F1: 0.9108 | ROC-AUC: 0.9266
Max val ROC-AUC: 0.9500
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1610 | Acc: 0.9502 | Prec: 0.9731 | Recall: 0.9179 | F1: 0.9447 | ROC-AUC: 0.9801 || Test Loss: 0.3519 | Acc: 0.8953 | Prec: 0.9863 | Recall: 0.8997 | F1: 0.9410 | ROC-AUC: 0.9412
Max val ROC-AUC: 0.9530
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1346 | Acc: 0.9552 | Prec: 0.9698 | Recall: 0.9325 | F1: 0.9508 | ROC-AUC: 0.9852 || Test Loss: 0.4376 | Acc: 0.8744 | Prec: 0.9805 | Recall: 0.8822 | F1: 0.9288 | ROC-AUC: 0.8827
Max val ROC-AUC: 0.9377
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1124 | Acc: 0.9667 | Prec: 0.9812 | Recall: 0.9463 | F1: 0.9634 | ROC-AUC: 0.9868 || Test Loss: 0.3896 | Acc: 0.8930 | Prec: 0.9836 | Recall: 0.8997 | F1: 0.9398 | ROC-AUC: 0.9138
Max val ROC-AUC: 0.9416
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1322 | Acc: 0.9562 | Prec: 0.9822 | Recall: 0.9223 | F1: 0.9513 | ROC-AUC: 0.9860 || Test Loss: 0.3372 | Acc: 0.8884 | Prec: 0.9889 | Recall: 0.8897 | F1: 0.9367 | ROC-AUC: 0.9556
Max val ROC-AUC: 0.9556
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1975 | Acc: 0.9293 | Prec: 0.9281 | Recall: 0.9187 | F1: 0.9234 | ROC-AUC: 0.9755 || Test Loss: 0.5688 | Acc: 0.8465 | Prec: 0.9941 | Recall: 0.8396 | F1: 0.9103 | ROC-AUC: 0.9264
Max val ROC-AUC: 0.9610
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0547 | Acc: 0.9848 | Prec: 0.9890 | Recall: 0.9782 | F1: 0.9836 | ROC-AUC: 0.9970 || Test Loss: 0.2271 | Acc: 0.9233 | Prec: 0.9741 | Recall: 0.9424 | F1: 0.9580 | ROC-AUC: 0.9517
Max val ROC-AUC: 0.9678
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0544 | Acc: 0.9838 | Prec: 0.9904 | Recall: 0.9746 | F1: 0.9824 | ROC-AUC: 0.9977 || Test Loss: 0.3840 | Acc: 0.9395 | Prec: 0.9794 | Recall: 0.9549 | F1: 0.9670 | ROC-AUC: 0.9228
Max val ROC-AUC: 0.9568
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0683 | Acc: 0.9784 | Prec: 0.9874 | Recall: 0.9659 | F1: 0.9765 | ROC-AUC: 0.9950 || Test Loss: 0.3499 | Acc: 0.9163 | Prec: 0.9789 | Recall: 0.9298 | F1: 0.9537 | ROC-AUC: 0.9392
Max val ROC-AUC: 0.9545
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0602 | Acc: 0.9821 | Prec: 0.9790 | Recall: 0.9826 | F1: 0.9808 | ROC-AUC: 0.9972 || Test Loss: 0.4620 | Acc: 0.8953 | Prec: 0.9784 | Recall: 0.9073 | F1: 0.9415 | ROC-AUC: 0.8581
Max val ROC-AUC: 0.9404
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0520 | Acc: 0.9852 | Prec: 0.9904 | Recall: 0.9775 | F1: 0.9839 | ROC-AUC: 0.9967 || Test Loss: 0.2862 | Acc: 0.9256 | Prec: 0.9693 | Recall: 0.9499 | F1: 0.9595 | ROC-AUC: 0.9145
Max val ROC-AUC: 0.9414
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0307 | Acc: 0.9902 | Prec: 0.9934 | Recall: 0.9855 | F1: 0.9894 | ROC-AUC: 0.9990 || Test Loss: 0.5119 | Acc: 0.9000 | Prec: 0.9837 | Recall: 0.9073 | F1: 0.9439 | ROC-AUC: 0.9553
Max val ROC-AUC: 0.9553
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0287 | Acc: 0.9926 | Prec: 0.9934 | Recall: 0.9906 | F1: 0.9920 | ROC-AUC: 0.9987 || Test Loss: 0.5382 | Acc: 0.9023 | Prec: 0.9837 | Recall: 0.9098 | F1: 0.9453 | ROC-AUC: 0.9023
Max val ROC-AUC: 0.9677
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0412 | Acc: 0.9889 | Prec: 0.9934 | Recall: 0.9826 | F1: 0.9880 | ROC-AUC: 0.9981 || Test Loss: 0.4646 | Acc: 0.9209 | Prec: 0.9765 | Recall: 0.9373 | F1: 0.9565 | ROC-AUC: 0.9126
Max val ROC-AUC: 0.9348
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0391 | Acc: 0.9885 | Prec: 0.9934 | Recall: 0.9818 | F1: 0.9876 | ROC-AUC: 0.9982 || Test Loss: 0.2688 | Acc: 0.9488 | Prec: 0.9846 | Recall: 0.9599 | F1: 0.9721 | ROC-AUC: 0.9568
Max val ROC-AUC: 0.9568
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0449 | Acc: 0.9875 | Prec: 0.9912 | Recall: 0.9818 | F1: 0.9865 | ROC-AUC: 0.9974 || Test Loss: 0.3914 | Acc: 0.9186 | Prec: 0.9840 | Recall: 0.9273 | F1: 0.9548 | ROC-AUC: 0.9403
Max val ROC-AUC: 0.9605
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6916 | Acc: 0.5453 | Prec: 0.5228 | Recall: 0.2251 | F1: 0.3147 | ROC-AUC: 0.5146 || Test Loss: 0.8173 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7014 | Acc: 0.5224 | Prec: 0.4529 | Recall: 0.1431 | F1: 0.2174 | ROC-AUC: 0.4892 || Test Loss: 0.7397 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7237 | Acc: 0.5349 | Prec: 0.4938 | Recall: 0.1147 | F1: 0.1862 | ROC-AUC: 0.5098 || Test Loss: 0.8602 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5002
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7560 | Acc: 0.5227 | Prec: 0.4623 | Recall: 0.1779 | F1: 0.2569 | ROC-AUC: 0.4916 || Test Loss: 0.8164 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.6638
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6967 | Acc: 0.5217 | Prec: 0.4512 | Recall: 0.1445 | F1: 0.2189 | ROC-AUC: 0.4863 || Test Loss: 0.6970 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6939 | Acc: 0.5234 | Prec: 0.4340 | Recall: 0.0908 | F1: 0.1502 | ROC-AUC: 0.4825 || Test Loss: 0.7979 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.6496
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6976 | Acc: 0.5211 | Prec: 0.4450 | Recall: 0.1322 | F1: 0.2038 | ROC-AUC: 0.5040 || Test Loss: 0.6397 | Acc: 0.9279 | Prec: 0.9279 | Recall: 1.0000 | F1: 0.9626 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6954 | Acc: 0.5254 | Prec: 0.4677 | Recall: 0.1685 | F1: 0.2477 | ROC-AUC: 0.4899 || Test Loss: 0.7558 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5715
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7120 | Acc: 0.5133 | Prec: 0.4469 | Recall: 0.2077 | F1: 0.2836 | ROC-AUC: 0.4920 || Test Loss: 0.7201 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5013
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7008 | Acc: 0.5231 | Prec: 0.4657 | Recall: 0.1924 | F1: 0.2724 | ROC-AUC: 0.5023 || Test Loss: 0.6204 | Acc: 0.9279 | Prec: 0.9279 | Recall: 1.0000 | F1: 0.9626 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1849 | Acc: 0.9353 | Prec: 0.9568 | Recall: 0.9012 | F1: 0.9282 | ROC-AUC: 0.9772 || Test Loss: 0.3155 | Acc: 0.9302 | Prec: 0.9843 | Recall: 0.9398 | F1: 0.9615 | ROC-AUC: 0.8889
Max val ROC-AUC: 0.9593
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1418 | Acc: 0.9549 | Prec: 0.9770 | Recall: 0.9245 | F1: 0.9500 | ROC-AUC: 0.9832 || Test Loss: 0.6230 | Acc: 0.8488 | Prec: 0.9855 | Recall: 0.8496 | F1: 0.9125 | ROC-AUC: 0.9291
Max val ROC-AUC: 0.9597
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1712 | Acc: 0.9508 | Prec: 0.9760 | Recall: 0.9165 | F1: 0.9453 | ROC-AUC: 0.9770 || Test Loss: 0.3886 | Acc: 0.8814 | Prec: 0.9860 | Recall: 0.8847 | F1: 0.9326 | ROC-AUC: 0.9100
Max val ROC-AUC: 0.9423
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1571 | Acc: 0.9572 | Prec: 0.9800 | Recall: 0.9267 | F1: 0.9526 | ROC-AUC: 0.9813 || Test Loss: 0.3189 | Acc: 0.9186 | Prec: 0.9892 | Recall: 0.9223 | F1: 0.9546 | ROC-AUC: 0.9434
Max val ROC-AUC: 0.9572
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1623 | Acc: 0.9491 | Prec: 0.9701 | Recall: 0.9187 | F1: 0.9437 | ROC-AUC: 0.9799 || Test Loss: 0.3195 | Acc: 0.8674 | Prec: 0.9914 | Recall: 0.8647 | F1: 0.9237 | ROC-AUC: 0.9580
Max val ROC-AUC: 0.9580
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1533 | Acc: 0.9518 | Prec: 0.9724 | Recall: 0.9223 | F1: 0.9467 | ROC-AUC: 0.9819 || Test Loss: 0.5962 | Acc: 0.8326 | Prec: 0.9910 | Recall: 0.8271 | F1: 0.9016 | ROC-AUC: 0.9473
Max val ROC-AUC: 0.9513
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1490 | Acc: 0.9562 | Prec: 0.9763 | Recall: 0.9281 | F1: 0.9516 | ROC-AUC: 0.9815 || Test Loss: 0.3387 | Acc: 0.8860 | Prec: 0.9888 | Recall: 0.8872 | F1: 0.9353 | ROC-AUC: 0.9635
Max val ROC-AUC: 0.9635
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1725 | Acc: 0.9498 | Prec: 0.9738 | Recall: 0.9165 | F1: 0.9443 | ROC-AUC: 0.9774 || Test Loss: 0.4733 | Acc: 0.8767 | Prec: 0.9832 | Recall: 0.8822 | F1: 0.9300 | ROC-AUC: 0.9239
Max val ROC-AUC: 0.9496
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1562 | Acc: 0.9566 | Prec: 0.9629 | Recall: 0.9426 | F1: 0.9527 | ROC-AUC: 0.9840 || Test Loss: 0.4361 | Acc: 0.9140 | Prec: 0.9866 | Recall: 0.9198 | F1: 0.9520 | ROC-AUC: 0.9273
Max val ROC-AUC: 0.9550
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1669 | Acc: 0.9478 | Prec: 0.9629 | Recall: 0.9230 | F1: 0.9425 | ROC-AUC: 0.9809 || Test Loss: 0.3802 | Acc: 0.8744 | Prec: 0.9943 | Recall: 0.8697 | F1: 0.9278 | ROC-AUC: 0.9481
Max val ROC-AUC: 0.9512
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0746 | Acc: 0.9791 | Prec: 0.9896 | Recall: 0.9651 | F1: 0.9772 | ROC-AUC: 0.9946 || Test Loss: 0.4853 | Acc: 0.9163 | Prec: 0.9840 | Recall: 0.9248 | F1: 0.9535 | ROC-AUC: 0.9346
Max val ROC-AUC: 0.9413
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0655 | Acc: 0.9808 | Prec: 0.9889 | Recall: 0.9695 | F1: 0.9791 | ROC-AUC: 0.9961 || Test Loss: 0.4336 | Acc: 0.9163 | Prec: 0.9764 | Recall: 0.9323 | F1: 0.9538 | ROC-AUC: 0.9018
Max val ROC-AUC: 0.9420
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0766 | Acc: 0.9798 | Prec: 0.9903 | Recall: 0.9659 | F1: 0.9779 | ROC-AUC: 0.9943 || Test Loss: 0.4637 | Acc: 0.9116 | Prec: 0.9738 | Recall: 0.9298 | F1: 0.9513 | ROC-AUC: 0.9146
Max val ROC-AUC: 0.9490
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0634 | Acc: 0.9828 | Prec: 0.9897 | Recall: 0.9731 | F1: 0.9813 | ROC-AUC: 0.9959 || Test Loss: 0.5187 | Acc: 0.9093 | Prec: 0.9787 | Recall: 0.9223 | F1: 0.9497 | ROC-AUC: 0.9179
Max val ROC-AUC: 0.9555
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0679 | Acc: 0.9798 | Prec: 0.9874 | Recall: 0.9688 | F1: 0.9780 | ROC-AUC: 0.9957 || Test Loss: 0.6361 | Acc: 0.8465 | Prec: 0.9854 | Recall: 0.8471 | F1: 0.9111 | ROC-AUC: 0.9369
Max val ROC-AUC: 0.9466
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0917 | Acc: 0.9731 | Prec: 0.9917 | Recall: 0.9499 | F1: 0.9703 | ROC-AUC: 0.9927 || Test Loss: 0.4836 | Acc: 0.9140 | Prec: 0.9814 | Recall: 0.9248 | F1: 0.9523 | ROC-AUC: 0.9428
Max val ROC-AUC: 0.9518
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0857 | Acc: 0.9764 | Prec: 0.9881 | Recall: 0.9608 | F1: 0.9742 | ROC-AUC: 0.9936 || Test Loss: 0.6565 | Acc: 0.8395 | Prec: 0.9797 | Recall: 0.8446 | F1: 0.9071 | ROC-AUC: 0.9112
Max val ROC-AUC: 0.9432
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0666 | Acc: 0.9838 | Prec: 0.9911 | Recall: 0.9739 | F1: 0.9824 | ROC-AUC: 0.9951 || Test Loss: 0.4411 | Acc: 0.9233 | Prec: 0.9841 | Recall: 0.9323 | F1: 0.9575 | ROC-AUC: 0.9366
Max val ROC-AUC: 0.9366
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0537 | Acc: 0.9845 | Prec: 0.9904 | Recall: 0.9760 | F1: 0.9832 | ROC-AUC: 0.9970 || Test Loss: 0.5166 | Acc: 0.9047 | Prec: 0.9812 | Recall: 0.9148 | F1: 0.9468 | ROC-AUC: 0.9429
Max val ROC-AUC: 0.9643
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0656 | Acc: 0.9811 | Prec: 0.9925 | Recall: 0.9666 | F1: 0.9794 | ROC-AUC: 0.9967 || Test Loss: 0.4588 | Acc: 0.9233 | Prec: 0.9791 | Recall: 0.9373 | F1: 0.9577 | ROC-AUC: 0.9242
Max val ROC-AUC: 0.9310
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3048 | Acc: 0.9054 | Prec: 0.9145 | Recall: 0.8780 | F1: 0.8959 | ROC-AUC: 0.9327 || Test Loss: 0.4140 | Acc: 0.8791 | Prec: 0.9833 | Recall: 0.8847 | F1: 0.9314 | ROC-AUC: 0.9075
Max val ROC-AUC: 0.9269
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1583 | Acc: 0.9505 | Prec: 0.9569 | Recall: 0.9354 | F1: 0.9460 | ROC-AUC: 0.9825 || Test Loss: 0.4704 | Acc: 0.8488 | Prec: 0.9883 | Recall: 0.8471 | F1: 0.9123 | ROC-AUC: 0.9306
Max val ROC-AUC: 0.9622
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3745 | Acc: 0.8020 | Prec: 0.7986 | Recall: 0.7662 | F1: 0.7821 | ROC-AUC: 0.9084 || Test Loss: 0.5875 | Acc: 0.7581 | Prec: 1.0000 | Recall: 0.7393 | F1: 0.8501 | ROC-AUC: 0.9029
Max val ROC-AUC: 0.9601
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2361 | Acc: 0.9188 | Prec: 0.9271 | Recall: 0.8954 | F1: 0.9110 | ROC-AUC: 0.9696 || Test Loss: 0.6399 | Acc: 0.7581 | Prec: 0.9900 | Recall: 0.7469 | F1: 0.8514 | ROC-AUC: 0.9202
Max val ROC-AUC: 0.9374
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1211 | Acc: 0.9613 | Prec: 0.9688 | Recall: 0.9470 | F1: 0.9578 | ROC-AUC: 0.9882 || Test Loss: 0.2376 | Acc: 0.9326 | Prec: 0.9868 | Recall: 0.9398 | F1: 0.9628 | ROC-AUC: 0.9566
Max val ROC-AUC: 0.9736
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1505 | Acc: 0.9532 | Prec: 0.9626 | Recall: 0.9354 | F1: 0.9488 | ROC-AUC: 0.9788 || Test Loss: 0.4315 | Acc: 0.8651 | Prec: 0.9885 | Recall: 0.8647 | F1: 0.9225 | ROC-AUC: 0.9268
Max val ROC-AUC: 0.9605
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1987 | Acc: 0.9400 | Prec: 0.9709 | Recall: 0.8976 | F1: 0.9328 | ROC-AUC: 0.9594 || Test Loss: 0.2454 | Acc: 0.9233 | Prec: 0.9766 | Recall: 0.9398 | F1: 0.9579 | ROC-AUC: 0.9224
Max val ROC-AUC: 0.9399
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1552 | Acc: 0.9562 | Prec: 0.9792 | Recall: 0.9252 | F1: 0.9515 | ROC-AUC: 0.9658 || Test Loss: 0.3756 | Acc: 0.8767 | Prec: 0.9806 | Recall: 0.8847 | F1: 0.9302 | ROC-AUC: 0.9053
Max val ROC-AUC: 0.9322
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1795 | Acc: 0.9485 | Prec: 0.9650 | Recall: 0.9223 | F1: 0.9432 | ROC-AUC: 0.9638 || Test Loss: 0.6479 | Acc: 0.7837 | Prec: 0.9904 | Recall: 0.7744 | F1: 0.8692 | ROC-AUC: 0.8959
Max val ROC-AUC: 0.9542
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2857 | Acc: 0.9131 | Prec: 0.9472 | Recall: 0.8606 | F1: 0.9018 | ROC-AUC: 0.9274 || Test Loss: 0.4554 | Acc: 0.8419 | Prec: 0.9853 | Recall: 0.8421 | F1: 0.9081 | ROC-AUC: 0.8432
Max val ROC-AUC: 0.9563
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0511 | Acc: 0.9818 | Prec: 0.9868 | Recall: 0.9739 | F1: 0.9803 | ROC-AUC: 0.9976 || Test Loss: 0.2355 | Acc: 0.9372 | Prec: 0.9769 | Recall: 0.9549 | F1: 0.9658 | ROC-AUC: 0.9497
Max val ROC-AUC: 0.9610
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0570 | Acc: 0.9784 | Prec: 0.9782 | Recall: 0.9753 | F1: 0.9767 | ROC-AUC: 0.9976 || Test Loss: 0.3735 | Acc: 0.9093 | Prec: 0.9891 | Recall: 0.9123 | F1: 0.9492 | ROC-AUC: 0.9735
Max val ROC-AUC: 0.9755
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0698 | Acc: 0.9791 | Prec: 0.9810 | Recall: 0.9739 | F1: 0.9774 | ROC-AUC: 0.9957 || Test Loss: 0.2582 | Acc: 0.9372 | Prec: 0.9921 | Recall: 0.9398 | F1: 0.9653 | ROC-AUC: 0.9725
Max val ROC-AUC: 0.9725
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0730 | Acc: 0.9771 | Prec: 0.9788 | Recall: 0.9717 | F1: 0.9752 | ROC-AUC: 0.9949 || Test Loss: 0.2925 | Acc: 0.9186 | Prec: 0.9973 | Recall: 0.9148 | F1: 0.9542 | ROC-AUC: 0.9736
Max val ROC-AUC: 0.9736
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0548 | Acc: 0.9865 | Prec: 0.9855 | Recall: 0.9855 | F1: 0.9855 | ROC-AUC: 0.9972 || Test Loss: 0.3148 | Acc: 0.9372 | Prec: 0.9794 | Recall: 0.9524 | F1: 0.9657 | ROC-AUC: 0.9467
Max val ROC-AUC: 0.9723
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0417 | Acc: 0.9845 | Prec: 0.9840 | Recall: 0.9826 | F1: 0.9833 | ROC-AUC: 0.9986 || Test Loss: 0.3344 | Acc: 0.9070 | Prec: 0.9918 | Recall: 0.9073 | F1: 0.9476 | ROC-AUC: 0.9626
Max val ROC-AUC: 0.9626
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0494 | Acc: 0.9852 | Prec: 0.9876 | Recall: 0.9804 | F1: 0.9840 | ROC-AUC: 0.9979 || Test Loss: 0.3463 | Acc: 0.9186 | Prec: 0.9919 | Recall: 0.9198 | F1: 0.9545 | ROC-AUC: 0.9643
Max val ROC-AUC: 0.9741
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0774 | Acc: 0.9707 | Prec: 0.9864 | Recall: 0.9499 | F1: 0.9678 | ROC-AUC: 0.9946 || Test Loss: 0.3731 | Acc: 0.8884 | Prec: 0.9756 | Recall: 0.9023 | F1: 0.9375 | ROC-AUC: 0.9215
Max val ROC-AUC: 0.9289
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0589 | Acc: 0.9821 | Prec: 0.9860 | Recall: 0.9753 | F1: 0.9806 | ROC-AUC: 0.9961 || Test Loss: 0.4164 | Acc: 0.9186 | Prec: 0.9740 | Recall: 0.9373 | F1: 0.9553 | ROC-AUC: 0.9164
Max val ROC-AUC: 0.9462
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0606 | Acc: 0.9798 | Prec: 0.9824 | Recall: 0.9739 | F1: 0.9781 | ROC-AUC: 0.9972 || Test Loss: 0.4680 | Acc: 0.9023 | Prec: 0.9890 | Recall: 0.9048 | F1: 0.9450 | ROC-AUC: 0.9497
Max val ROC-AUC: 0.9698
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0366 | Acc: 0.9882 | Prec: 0.9855 | Recall: 0.9891 | F1: 0.9873 | ROC-AUC: 0.9988 || Test Loss: 0.4154 | Acc: 0.9070 | Prec: 0.9864 | Recall: 0.9123 | F1: 0.9479 | ROC-AUC: 0.9341
Max val ROC-AUC: 0.9501
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0581 | Acc: 0.9828 | Prec: 0.9839 | Recall: 0.9789 | F1: 0.9814 | ROC-AUC: 0.9967 || Test Loss: 0.5154 | Acc: 0.8814 | Prec: 0.9833 | Recall: 0.8872 | F1: 0.9328 | ROC-AUC: 0.9140
Max val ROC-AUC: 0.9601
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0065 | Acc: 0.9980 | Prec: 0.9985 | Recall: 0.9971 | F1: 0.9978 | ROC-AUC: 0.9998 || Test Loss: 0.4803 | Acc: 0.9395 | Prec: 0.9794 | Recall: 0.9549 | F1: 0.9670 | ROC-AUC: 0.9001
Max val ROC-AUC: 0.9528
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0072 | Acc: 0.9987 | Prec: 0.9993 | Recall: 0.9978 | F1: 0.9985 | ROC-AUC: 0.9997 || Test Loss: 0.3737 | Acc: 0.9349 | Prec: 0.9793 | Recall: 0.9499 | F1: 0.9644 | ROC-AUC: 0.9468
Max val ROC-AUC: 0.9553
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0092 | Acc: 0.9980 | Prec: 0.9993 | Recall: 0.9964 | F1: 0.9978 | ROC-AUC: 0.9999 || Test Loss: 0.4246 | Acc: 0.9302 | Prec: 0.9767 | Recall: 0.9474 | F1: 0.9618 | ROC-AUC: 0.9311
Max val ROC-AUC: 0.9502
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0359 | Acc: 0.9882 | Prec: 0.9934 | Recall: 0.9811 | F1: 0.9872 | ROC-AUC: 0.9988 || Test Loss: 0.4248 | Acc: 0.9302 | Prec: 0.9843 | Recall: 0.9398 | F1: 0.9615 | ROC-AUC: 0.9506
Max val ROC-AUC: 0.9647
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0227 | Acc: 0.9933 | Prec: 0.9956 | Recall: 0.9898 | F1: 0.9927 | ROC-AUC: 0.9992 || Test Loss: 0.3570 | Acc: 0.9419 | Prec: 0.9795 | Recall: 0.9574 | F1: 0.9683 | ROC-AUC: 0.9143
Max val ROC-AUC: 0.9539
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0129 | Acc: 0.9976 | Prec: 0.9993 | Recall: 0.9956 | F1: 0.9975 | ROC-AUC: 0.9993 || Test Loss: 0.4318 | Acc: 0.9302 | Prec: 0.9743 | Recall: 0.9499 | F1: 0.9619 | ROC-AUC: 0.9445
Max val ROC-AUC: 0.9579
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0406 | Acc: 0.9875 | Prec: 0.9941 | Recall: 0.9789 | F1: 0.9865 | ROC-AUC: 0.9986 || Test Loss: 0.3118 | Acc: 0.9442 | Prec: 0.9845 | Recall: 0.9549 | F1: 0.9695 | ROC-AUC: 0.9480
Max val ROC-AUC: 0.9651
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0176 | Acc: 0.9966 | Prec: 0.9985 | Recall: 0.9942 | F1: 0.9964 | ROC-AUC: 0.9988 || Test Loss: 0.3977 | Acc: 0.9349 | Prec: 0.9818 | Recall: 0.9474 | F1: 0.9643 | ROC-AUC: 0.9249
Max val ROC-AUC: 0.9449
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2554 | Acc: 0.9087 | Prec: 0.9300 | Recall: 0.8686 | F1: 0.8982 | ROC-AUC: 0.9648 || Test Loss: 0.4977 | Acc: 0.8605 | Prec: 0.9857 | Recall: 0.8622 | F1: 0.9198 | ROC-AUC: 0.9265
Max val ROC-AUC: 0.9602
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3159 | Acc: 0.8949 | Prec: 0.9540 | Recall: 0.8126 | F1: 0.8776 | ROC-AUC: 0.9168 || Test Loss: 0.4272 | Acc: 0.7605 | Prec: 0.9966 | Recall: 0.7444 | F1: 0.8522 | ROC-AUC: 0.8651
Max val ROC-AUC: 0.9432
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2229 | Acc: 0.9242 | Prec: 0.9507 | Recall: 0.8824 | F1: 0.9153 | ROC-AUC: 0.9549 || Test Loss: 0.5361 | Acc: 0.7977 | Prec: 0.9815 | Recall: 0.7970 | F1: 0.8797 | ROC-AUC: 0.8549
Max val ROC-AUC: 0.9149
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2966 | Acc: 0.8872 | Prec: 0.8633 | Recall: 0.8991 | F1: 0.8808 | ROC-AUC: 0.9498 || Test Loss: 0.4245 | Acc: 0.8070 | Prec: 0.9907 | Recall: 0.7995 | F1: 0.8849 | ROC-AUC: 0.9184
Max val ROC-AUC: 0.9423
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2806 | Acc: 0.8969 | Prec: 0.9393 | Recall: 0.8315 | F1: 0.8821 | ROC-AUC: 0.9533 || Test Loss: 0.3713 | Acc: 0.8674 | Prec: 0.9914 | Recall: 0.8647 | F1: 0.9237 | ROC-AUC: 0.9203
Max val ROC-AUC: 0.9428
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3891 | Acc: 0.8454 | Prec: 0.8110 | Recall: 0.8693 | F1: 0.8391 | ROC-AUC: 0.9110 || Test Loss: 0.5925 | Acc: 0.7814 | Prec: 0.9781 | Recall: 0.7820 | F1: 0.8691 | ROC-AUC: 0.7818
Max val ROC-AUC: 0.9360
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2837 | Acc: 0.8973 | Prec: 0.9073 | Recall: 0.8671 | F1: 0.8867 | ROC-AUC: 0.9524 || Test Loss: 0.7166 | Acc: 0.8581 | Prec: 0.9884 | Recall: 0.8571 | F1: 0.9181 | ROC-AUC: 0.9329
Max val ROC-AUC: 0.9391
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2414 | Acc: 0.9165 | Prec: 0.9477 | Recall: 0.8678 | F1: 0.9060 | ROC-AUC: 0.9568 || Test Loss: 1.1752 | Acc: 0.8744 | Prec: 0.9832 | Recall: 0.8797 | F1: 0.9286 | ROC-AUC: 0.9171
Max val ROC-AUC: 0.9471
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2854 | Acc: 0.9155 | Prec: 0.9351 | Recall: 0.8787 | F1: 0.9060 | ROC-AUC: 0.9438 || Test Loss: 0.2865 | Acc: 0.9302 | Prec: 0.9792 | Recall: 0.9449 | F1: 0.9617 | ROC-AUC: 0.9453
Max val ROC-AUC: 0.9453
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3230 | Acc: 0.8963 | Prec: 0.8751 | Recall: 0.9056 | F1: 0.8901 | ROC-AUC: 0.9357 || Test Loss: 0.5817 | Acc: 0.9209 | Prec: 0.9867 | Recall: 0.9273 | F1: 0.9561 | ROC-AUC: 0.8959
Max val ROC-AUC: 0.9641
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1431 | Acc: 0.9512 | Prec: 0.9646 | Recall: 0.9288 | F1: 0.9464 | ROC-AUC: 0.9841 || Test Loss: 0.3265 | Acc: 0.9000 | Prec: 0.9863 | Recall: 0.9048 | F1: 0.9438 | ROC-AUC: 0.8940
Max val ROC-AUC: 0.9572
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0612 | Acc: 0.9828 | Prec: 0.9918 | Recall: 0.9710 | F1: 0.9813 | ROC-AUC: 0.9956 || Test Loss: 0.7141 | Acc: 0.8767 | Prec: 0.9860 | Recall: 0.8797 | F1: 0.9298 | ROC-AUC: 0.9474
Max val ROC-AUC: 0.9474
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1392 | Acc: 0.9663 | Prec: 0.9776 | Recall: 0.9492 | F1: 0.9632 | ROC-AUC: 0.9815 || Test Loss: 0.2426 | Acc: 0.9140 | Prec: 0.9918 | Recall: 0.9148 | F1: 0.9518 | ROC-AUC: 0.9535
Max val ROC-AUC: 0.9535
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1071 | Acc: 0.9579 | Prec: 0.9707 | Recall: 0.9375 | F1: 0.9538 | ROC-AUC: 0.9917 || Test Loss: 0.3111 | Acc: 0.9093 | Prec: 0.9787 | Recall: 0.9223 | F1: 0.9497 | ROC-AUC: 0.8869
Max val ROC-AUC: 0.9419
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1455 | Acc: 0.9478 | Prec: 0.9700 | Recall: 0.9158 | F1: 0.9421 | ROC-AUC: 0.9837 || Test Loss: 0.3123 | Acc: 0.8628 | Prec: 0.9942 | Recall: 0.8571 | F1: 0.9206 | ROC-AUC: 0.9584
Max val ROC-AUC: 0.9584
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0895 | Acc: 0.9737 | Prec: 0.9858 | Recall: 0.9572 | F1: 0.9713 | ROC-AUC: 0.9930 || Test Loss: 0.3923 | Acc: 0.8884 | Prec: 0.9861 | Recall: 0.8922 | F1: 0.9368 | ROC-AUC: 0.9332
Max val ROC-AUC: 0.9539
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1223 | Acc: 0.9619 | Prec: 0.9862 | Recall: 0.9310 | F1: 0.9578 | ROC-AUC: 0.9852 || Test Loss: 0.4649 | Acc: 0.8628 | Prec: 0.9802 | Recall: 0.8697 | F1: 0.9216 | ROC-AUC: 0.9091
Max val ROC-AUC: 0.9269
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0705 | Acc: 0.9798 | Prec: 0.9889 | Recall: 0.9673 | F1: 0.9780 | ROC-AUC: 0.9947 || Test Loss: 0.4873 | Acc: 0.9023 | Prec: 0.9890 | Recall: 0.9048 | F1: 0.9450 | ROC-AUC: 0.9479
Max val ROC-AUC: 0.9507
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1000 | Acc: 0.9707 | Prec: 0.9835 | Recall: 0.9528 | F1: 0.9679 | ROC-AUC: 0.9903 || Test Loss: 0.3466 | Acc: 0.8953 | Prec: 0.9836 | Recall: 0.9023 | F1: 0.9412 | ROC-AUC: 0.9138
Max val ROC-AUC: 0.9462
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1086 | Acc: 0.9670 | Prec: 0.9797 | Recall: 0.9484 | F1: 0.9638 | ROC-AUC: 0.9905 || Test Loss: 0.5162 | Acc: 0.9000 | Prec: 0.9837 | Recall: 0.9073 | F1: 0.9439 | ROC-AUC: 0.9248
Max val ROC-AUC: 0.9701
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0092 | Acc: 0.9970 | Prec: 0.9978 | Recall: 0.9956 | F1: 0.9967 | ROC-AUC: 0.9999 || Test Loss: 0.4643 | Acc: 0.9326 | Prec: 0.9818 | Recall: 0.9449 | F1: 0.9630 | ROC-AUC: 0.9452
Max val ROC-AUC: 0.9586
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0221 | Acc: 0.9933 | Prec: 0.9920 | Recall: 0.9935 | F1: 0.9927 | ROC-AUC: 0.9991 || Test Loss: 0.4093 | Acc: 0.9233 | Prec: 0.9741 | Recall: 0.9424 | F1: 0.9580 | ROC-AUC: 0.9304
Max val ROC-AUC: 0.9631
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0344 | Acc: 0.9919 | Prec: 0.9942 | Recall: 0.9884 | F1: 0.9913 | ROC-AUC: 0.9977 || Test Loss: 0.3117 | Acc: 0.9419 | Prec: 0.9870 | Recall: 0.9499 | F1: 0.9681 | ROC-AUC: 0.9594
Max val ROC-AUC: 0.9594
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0175 | Acc: 0.9949 | Prec: 0.9964 | Recall: 0.9927 | F1: 0.9945 | ROC-AUC: 0.9996 || Test Loss: 0.3754 | Acc: 0.9209 | Prec: 0.9841 | Recall: 0.9298 | F1: 0.9562 | ROC-AUC: 0.9149
Max val ROC-AUC: 0.9540
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0253 | Acc: 0.9916 | Prec: 0.9963 | Recall: 0.9855 | F1: 0.9909 | ROC-AUC: 0.9992 || Test Loss: 0.3899 | Acc: 0.9372 | Prec: 0.9869 | Recall: 0.9449 | F1: 0.9654 | ROC-AUC: 0.9643
Max val ROC-AUC: 0.9643
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0275 | Acc: 0.9943 | Prec: 0.9964 | Recall: 0.9913 | F1: 0.9938 | ROC-AUC: 0.9987 || Test Loss: 0.3270 | Acc: 0.9186 | Prec: 0.9815 | Recall: 0.9298 | F1: 0.9550 | ROC-AUC: 0.9654
Max val ROC-AUC: 0.9654
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0203 | Acc: 0.9939 | Prec: 0.9963 | Recall: 0.9906 | F1: 0.9934 | ROC-AUC: 0.9992 || Test Loss: 0.4466 | Acc: 0.9326 | Prec: 0.9843 | Recall: 0.9424 | F1: 0.9629 | ROC-AUC: 0.9318
Max val ROC-AUC: 0.9517
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0527 | Acc: 0.9859 | Prec: 0.9904 | Recall: 0.9789 | F1: 0.9847 | ROC-AUC: 0.9970 || Test Loss: 0.3253 | Acc: 0.9372 | Prec: 0.9819 | Recall: 0.9499 | F1: 0.9656 | ROC-AUC: 0.9217
Max val ROC-AUC: 0.9398
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0513 | Acc: 0.9865 | Prec: 0.9934 | Recall: 0.9775 | F1: 0.9854 | ROC-AUC: 0.9956 || Test Loss: 0.3061 | Acc: 0.9302 | Prec: 0.9894 | Recall: 0.9348 | F1: 0.9613 | ROC-AUC: 0.9458
Max val ROC-AUC: 0.9538
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0370 | Acc: 0.9896 | Prec: 0.9934 | Recall: 0.9840 | F1: 0.9887 | ROC-AUC: 0.9984 || Test Loss: 0.4459 | Acc: 0.8884 | Prec: 0.9861 | Recall: 0.8922 | F1: 0.9368 | ROC-AUC: 0.9490
Max val ROC-AUC: 0.9519
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6973 | Acc: 0.5133 | Prec: 0.4482 | Recall: 0.2135 | F1: 0.2892 | ROC-AUC: 0.4918 || Test Loss: 0.9272 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5078
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.5606 | Acc: 0.7444 | Prec: 0.8705 | Recall: 0.5272 | F1: 0.6567 | ROC-AUC: 0.7826 || Test Loss: 0.3990 | Acc: 0.7605 | Prec: 0.9966 | Recall: 0.7444 | F1: 0.8522 | ROC-AUC: 0.8665
Max val ROC-AUC: 0.9359
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4421 | Acc: 0.8235 | Prec: 0.7555 | Recall: 0.9158 | F1: 0.8280 | ROC-AUC: 0.8751 || Test Loss: 0.3655 | Acc: 0.9488 | Prec: 0.9724 | Recall: 0.9724 | F1: 0.9724 | ROC-AUC: 0.9165
Max val ROC-AUC: 0.9647
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6471 | Acc: 0.5904 | Prec: 0.5548 | Recall: 0.5919 | F1: 0.5727 | ROC-AUC: 0.6499 || Test Loss: 0.6812 | Acc: 0.6698 | Prec: 0.9886 | Recall: 0.6516 | F1: 0.7855 | ROC-AUC: 0.8897
Max val ROC-AUC: 0.9526
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.5726 | Acc: 0.7460 | Prec: 0.8442 | Recall: 0.5548 | F1: 0.6696 | ROC-AUC: 0.7845 || Test Loss: 0.3760 | Acc: 0.7256 | Prec: 0.9862 | Recall: 0.7143 | F1: 0.8285 | ROC-AUC: 0.8374
Max val ROC-AUC: 0.9507
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6948 | Acc: 0.5244 | Prec: 0.4721 | Recall: 0.2150 | F1: 0.2954 | ROC-AUC: 0.5047 || Test Loss: 0.8155 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.7946
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4839 | Acc: 0.7949 | Prec: 0.7310 | Recall: 0.8824 | F1: 0.7996 | ROC-AUC: 0.8616 || Test Loss: 0.9900 | Acc: 0.9419 | Prec: 0.9845 | Recall: 0.9524 | F1: 0.9682 | ROC-AUC: 0.8957
Max val ROC-AUC: 0.9525
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6942 | Acc: 0.5147 | Prec: 0.4661 | Recall: 0.3195 | F1: 0.3791 | ROC-AUC: 0.5030 || Test Loss: 0.7902 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6658 | Acc: 0.5810 | Prec: 0.6418 | Recall: 0.2186 | F1: 0.3261 | ROC-AUC: 0.5813 || Test Loss: 0.5803 | Acc: 0.3233 | Prec: 1.0000 | Recall: 0.2707 | F1: 0.4260 | ROC-AUC: 0.6366
Max val ROC-AUC: 0.9671
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.5128 | Acc: 0.7922 | Prec: 0.7207 | Recall: 0.9012 | F1: 0.8009 | ROC-AUC: 0.8203 || Test Loss: 0.5417 | Acc: 0.6628 | Prec: 0.9961 | Recall: 0.6391 | F1: 0.7786 | ROC-AUC: 0.8149
Max val ROC-AUC: 0.9377
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1538 | Acc: 0.9505 | Prec: 0.9753 | Recall: 0.9165 | F1: 0.9450 | ROC-AUC: 0.9823 || Test Loss: 0.4702 | Acc: 0.8837 | Prec: 0.9807 | Recall: 0.8922 | F1: 0.9344 | ROC-AUC: 0.9225
Max val ROC-AUC: 0.9550
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1338 | Acc: 0.9603 | Prec: 0.9809 | Recall: 0.9325 | F1: 0.9561 | ROC-AUC: 0.9798 || Test Loss: 0.5352 | Acc: 0.8558 | Prec: 0.9884 | Recall: 0.8546 | F1: 0.9167 | ROC-AUC: 0.9241
Max val ROC-AUC: 0.9532
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1120 | Acc: 0.9656 | Prec: 0.9819 | Recall: 0.9434 | F1: 0.9622 | ROC-AUC: 0.9898 || Test Loss: 0.3391 | Acc: 0.9093 | Prec: 0.9891 | Recall: 0.9123 | F1: 0.9492 | ROC-AUC: 0.9472
Max val ROC-AUC: 0.9556
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1084 | Acc: 0.9714 | Prec: 0.9807 | Recall: 0.9572 | F1: 0.9688 | ROC-AUC: 0.9880 || Test Loss: 0.2779 | Acc: 0.9326 | Prec: 0.9719 | Recall: 0.9549 | F1: 0.9633 | ROC-AUC: 0.8978
Max val ROC-AUC: 0.9609
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1466 | Acc: 0.9603 | Prec: 0.9680 | Recall: 0.9455 | F1: 0.9566 | ROC-AUC: 0.9837 || Test Loss: 0.3083 | Acc: 0.9279 | Prec: 0.9842 | Recall: 0.9373 | F1: 0.9602 | ROC-AUC: 0.9358
Max val ROC-AUC: 0.9426
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1401 | Acc: 0.9623 | Prec: 0.9781 | Recall: 0.9397 | F1: 0.9585 | ROC-AUC: 0.9827 || Test Loss: 0.5837 | Acc: 0.8674 | Prec: 0.9858 | Recall: 0.8697 | F1: 0.9241 | ROC-AUC: 0.9380
Max val ROC-AUC: 0.9601
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1163 | Acc: 0.9640 | Prec: 0.9833 | Recall: 0.9383 | F1: 0.9602 | ROC-AUC: 0.9885 || Test Loss: 0.4286 | Acc: 0.9000 | Prec: 0.9863 | Recall: 0.9048 | F1: 0.9438 | ROC-AUC: 0.9530
Max val ROC-AUC: 0.9530
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1308 | Acc: 0.9552 | Prec: 0.9763 | Recall: 0.9259 | F1: 0.9504 | ROC-AUC: 0.9861 || Test Loss: 0.3374 | Acc: 0.8744 | Prec: 0.9859 | Recall: 0.8772 | F1: 0.9284 | ROC-AUC: 0.9297
Max val ROC-AUC: 0.9641
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1383 | Acc: 0.9579 | Prec: 0.9815 | Recall: 0.9267 | F1: 0.9533 | ROC-AUC: 0.9842 || Test Loss: 0.3474 | Acc: 0.8977 | Prec: 0.9837 | Recall: 0.9048 | F1: 0.9426 | ROC-AUC: 0.8909
Max val ROC-AUC: 0.9413
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1550 | Acc: 0.9535 | Prec: 0.9620 | Recall: 0.9368 | F1: 0.9492 | ROC-AUC: 0.9822 || Test Loss: 0.6743 | Acc: 0.8395 | Prec: 0.9911 | Recall: 0.8346 | F1: 0.9061 | ROC-AUC: 0.8989
Max val ROC-AUC: 0.9585
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0444 | Acc: 0.9869 | Prec: 0.9905 | Recall: 0.9811 | F1: 0.9858 | ROC-AUC: 0.9980 || Test Loss: 0.4402 | Acc: 0.9093 | Prec: 0.9865 | Recall: 0.9148 | F1: 0.9493 | ROC-AUC: 0.9487
Max val ROC-AUC: 0.9487
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0502 | Acc: 0.9852 | Prec: 0.9904 | Recall: 0.9775 | F1: 0.9839 | ROC-AUC: 0.9971 || Test Loss: 0.3982 | Acc: 0.9209 | Prec: 0.9740 | Recall: 0.9398 | F1: 0.9566 | ROC-AUC: 0.9436
Max val ROC-AUC: 0.9752
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0475 | Acc: 0.9862 | Prec: 0.9890 | Recall: 0.9811 | F1: 0.9851 | ROC-AUC: 0.9976 || Test Loss: 0.3619 | Acc: 0.9233 | Prec: 0.9841 | Recall: 0.9323 | F1: 0.9575 | ROC-AUC: 0.9238
Max val ROC-AUC: 0.9244
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0370 | Acc: 0.9909 | Prec: 0.9920 | Recall: 0.9884 | F1: 0.9902 | ROC-AUC: 0.9981 || Test Loss: 0.3810 | Acc: 0.9326 | Prec: 0.9744 | Recall: 0.9524 | F1: 0.9632 | ROC-AUC: 0.9273
Max val ROC-AUC: 0.9665
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0417 | Acc: 0.9902 | Prec: 0.9956 | Recall: 0.9833 | F1: 0.9894 | ROC-AUC: 0.9970 || Test Loss: 0.3381 | Acc: 0.9233 | Prec: 0.9816 | Recall: 0.9348 | F1: 0.9576 | ROC-AUC: 0.9462
Max val ROC-AUC: 0.9600
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0255 | Acc: 0.9929 | Prec: 0.9963 | Recall: 0.9884 | F1: 0.9923 | ROC-AUC: 0.9992 || Test Loss: 0.5755 | Acc: 0.9116 | Prec: 0.9839 | Recall: 0.9198 | F1: 0.9508 | ROC-AUC: 0.9530
Max val ROC-AUC: 0.9599
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0219 | Acc: 0.9939 | Prec: 0.9949 | Recall: 0.9920 | F1: 0.9935 | ROC-AUC: 0.9995 || Test Loss: 0.4620 | Acc: 0.9279 | Prec: 0.9817 | Recall: 0.9398 | F1: 0.9603 | ROC-AUC: 0.9327
Max val ROC-AUC: 0.9506
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0398 | Acc: 0.9838 | Prec: 0.9926 | Recall: 0.9724 | F1: 0.9824 | ROC-AUC: 0.9989 || Test Loss: 0.4634 | Acc: 0.9140 | Prec: 0.9840 | Recall: 0.9223 | F1: 0.9521 | ROC-AUC: 0.9441
Max val ROC-AUC: 0.9673
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0648 | Acc: 0.9811 | Prec: 0.9839 | Recall: 0.9753 | F1: 0.9796 | ROC-AUC: 0.9963 || Test Loss: 0.3238 | Acc: 0.9279 | Prec: 0.9792 | Recall: 0.9424 | F1: 0.9604 | ROC-AUC: 0.9329
Max val ROC-AUC: 0.9508
Training with d_model=64, nhead=2, num_layers=6, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0458 | Acc: 0.9855 | Prec: 0.9926 | Recall: 0.9760 | F1: 0.9843 | ROC-AUC: 0.9980 || Test Loss: 0.5609 | Acc: 0.9047 | Prec: 0.9864 | Recall: 0.9098 | F1: 0.9465 | ROC-AUC: 0.9521
Max val ROC-AUC: 0.9521
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6951 | Acc: 0.5140 | Prec: 0.4588 | Recall: 0.2665 | F1: 0.3372 | ROC-AUC: 0.4934 || Test Loss: 0.7692 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6975 | Acc: 0.5291 | Prec: 0.4851 | Recall: 0.2491 | F1: 0.3292 | ROC-AUC: 0.5095 || Test Loss: 0.8195 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6957 | Acc: 0.5167 | Prec: 0.4658 | Recall: 0.2869 | F1: 0.3551 | ROC-AUC: 0.4987 || Test Loss: 0.7504 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6931 | Acc: 0.5301 | Prec: 0.4713 | Recall: 0.1075 | F1: 0.1750 | ROC-AUC: 0.4995 || Test Loss: 0.6710 | Acc: 0.9279 | Prec: 0.9279 | Recall: 1.0000 | F1: 0.9626 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6938 | Acc: 0.5241 | Prec: 0.4719 | Recall: 0.2193 | F1: 0.2995 | ROC-AUC: 0.5125 || Test Loss: 0.7379 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.8487
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6924 | Acc: 0.5288 | Prec: 0.3854 | Recall: 0.0269 | F1: 0.0502 | ROC-AUC: 0.4928 || Test Loss: 0.7273 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5002
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7027 | Acc: 0.5251 | Prec: 0.4688 | Recall: 0.1801 | F1: 0.2602 | ROC-AUC: 0.4985 || Test Loss: 0.7667 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6946 | Acc: 0.5258 | Prec: 0.4750 | Recall: 0.2142 | F1: 0.2953 | ROC-AUC: 0.4996 || Test Loss: 0.7928 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.6210
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6954 | Acc: 0.5355 | Prec: 0.4972 | Recall: 0.1300 | F1: 0.2061 | ROC-AUC: 0.5162 || Test Loss: 0.8865 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5954
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6956 | Acc: 0.5140 | Prec: 0.4461 | Recall: 0.1983 | F1: 0.2745 | ROC-AUC: 0.4812 || Test Loss: 0.8244 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.7133
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1879 | Acc: 0.9417 | Prec: 0.9740 | Recall: 0.8983 | F1: 0.9346 | ROC-AUC: 0.9723 || Test Loss: 0.3451 | Acc: 0.8721 | Prec: 0.9914 | Recall: 0.8697 | F1: 0.9266 | ROC-AUC: 0.9327
Max val ROC-AUC: 0.9533
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1374 | Acc: 0.9643 | Prec: 0.9789 | Recall: 0.9434 | F1: 0.9608 | ROC-AUC: 0.9822 || Test Loss: 0.4606 | Acc: 0.8698 | Prec: 0.9858 | Recall: 0.8722 | F1: 0.9255 | ROC-AUC: 0.9390
Max val ROC-AUC: 0.9603
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2039 | Acc: 0.9316 | Prec: 0.9688 | Recall: 0.8809 | F1: 0.9228 | ROC-AUC: 0.9681 || Test Loss: 0.3360 | Acc: 0.7581 | Prec: 0.9966 | Recall: 0.7419 | F1: 0.8506 | ROC-AUC: 0.9490
Max val ROC-AUC: 0.9561
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1548 | Acc: 0.9562 | Prec: 0.9763 | Recall: 0.9281 | F1: 0.9516 | ROC-AUC: 0.9816 || Test Loss: 0.4049 | Acc: 0.8628 | Prec: 0.9857 | Recall: 0.8647 | F1: 0.9212 | ROC-AUC: 0.9390
Max val ROC-AUC: 0.9485
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1849 | Acc: 0.9431 | Prec: 0.9734 | Recall: 0.9020 | F1: 0.9363 | ROC-AUC: 0.9721 || Test Loss: 0.5733 | Acc: 0.7651 | Prec: 0.9869 | Recall: 0.7569 | F1: 0.8567 | ROC-AUC: 0.9253
Max val ROC-AUC: 0.9580
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1423 | Acc: 0.9552 | Prec: 0.9777 | Recall: 0.9245 | F1: 0.9504 | ROC-AUC: 0.9809 || Test Loss: 0.5355 | Acc: 0.8930 | Prec: 0.9836 | Recall: 0.8997 | F1: 0.9398 | ROC-AUC: 0.9035
Max val ROC-AUC: 0.9573
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1559 | Acc: 0.9569 | Prec: 0.9742 | Recall: 0.9317 | F1: 0.9525 | ROC-AUC: 0.9808 || Test Loss: 0.4233 | Acc: 0.8791 | Prec: 0.9860 | Recall: 0.8822 | F1: 0.9312 | ROC-AUC: 0.9478
Max val ROC-AUC: 0.9499
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1322 | Acc: 0.9623 | Prec: 0.9788 | Recall: 0.9390 | F1: 0.9585 | ROC-AUC: 0.9828 || Test Loss: 0.5218 | Acc: 0.8419 | Prec: 0.9940 | Recall: 0.8346 | F1: 0.9074 | ROC-AUC: 0.9482
Max val ROC-AUC: 0.9487
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1421 | Acc: 0.9586 | Prec: 0.9794 | Recall: 0.9303 | F1: 0.9542 | ROC-AUC: 0.9842 || Test Loss: 0.5798 | Acc: 0.8465 | Prec: 0.9854 | Recall: 0.8471 | F1: 0.9111 | ROC-AUC: 0.9370
Max val ROC-AUC: 0.9524
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1942 | Acc: 0.9400 | Prec: 0.9559 | Recall: 0.9129 | F1: 0.9339 | ROC-AUC: 0.9752 || Test Loss: 0.3860 | Acc: 0.8814 | Prec: 0.9833 | Recall: 0.8872 | F1: 0.9328 | ROC-AUC: 0.9435
Max val ROC-AUC: 0.9500
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0595 | Acc: 0.9855 | Prec: 0.9941 | Recall: 0.9746 | F1: 0.9842 | ROC-AUC: 0.9960 || Test Loss: 0.4363 | Acc: 0.9186 | Prec: 0.9840 | Recall: 0.9273 | F1: 0.9548 | ROC-AUC: 0.9350
Max val ROC-AUC: 0.9512
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0919 | Acc: 0.9720 | Prec: 0.9836 | Recall: 0.9557 | F1: 0.9694 | ROC-AUC: 0.9932 || Test Loss: 0.3818 | Acc: 0.9047 | Prec: 0.9812 | Recall: 0.9148 | F1: 0.9468 | ROC-AUC: 0.9326
Max val ROC-AUC: 0.9357
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0837 | Acc: 0.9741 | Prec: 0.9873 | Recall: 0.9564 | F1: 0.9716 | ROC-AUC: 0.9937 || Test Loss: 0.4234 | Acc: 0.8837 | Prec: 0.9834 | Recall: 0.8897 | F1: 0.9342 | ROC-AUC: 0.9280
Max val ROC-AUC: 0.9563
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0720 | Acc: 0.9801 | Prec: 0.9860 | Recall: 0.9710 | F1: 0.9784 | ROC-AUC: 0.9953 || Test Loss: 0.4222 | Acc: 0.9116 | Prec: 0.9788 | Recall: 0.9248 | F1: 0.9510 | ROC-AUC: 0.9205
Max val ROC-AUC: 0.9565
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0907 | Acc: 0.9754 | Prec: 0.9858 | Recall: 0.9608 | F1: 0.9732 | ROC-AUC: 0.9934 || Test Loss: 0.6236 | Acc: 0.8535 | Prec: 0.9855 | Recall: 0.8546 | F1: 0.9154 | ROC-AUC: 0.9256
Max val ROC-AUC: 0.9584
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0601 | Acc: 0.9845 | Prec: 0.9919 | Recall: 0.9746 | F1: 0.9832 | ROC-AUC: 0.9954 || Test Loss: 0.4003 | Acc: 0.9372 | Prec: 0.9844 | Recall: 0.9474 | F1: 0.9655 | ROC-AUC: 0.9447
Max val ROC-AUC: 0.9613
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0679 | Acc: 0.9781 | Prec: 0.9874 | Recall: 0.9651 | F1: 0.9761 | ROC-AUC: 0.9961 || Test Loss: 0.4509 | Acc: 0.9140 | Prec: 0.9738 | Recall: 0.9323 | F1: 0.9526 | ROC-AUC: 0.8995
Max val ROC-AUC: 0.9487
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0736 | Acc: 0.9781 | Prec: 0.9874 | Recall: 0.9651 | F1: 0.9761 | ROC-AUC: 0.9952 || Test Loss: 0.5435 | Acc: 0.8721 | Prec: 0.9831 | Recall: 0.8772 | F1: 0.9272 | ROC-AUC: 0.9102
Max val ROC-AUC: 0.9483
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0794 | Acc: 0.9754 | Prec: 0.9895 | Recall: 0.9572 | F1: 0.9731 | ROC-AUC: 0.9947 || Test Loss: 0.5642 | Acc: 0.8791 | Prec: 0.9833 | Recall: 0.8847 | F1: 0.9314 | ROC-AUC: 0.9183
Max val ROC-AUC: 0.9344
Training with d_model=64, nhead=2, num_layers=6, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0913 | Acc: 0.9727 | Prec: 0.9865 | Recall: 0.9542 | F1: 0.9701 | ROC-AUC: 0.9924 || Test Loss: 0.3082 | Acc: 0.9279 | Prec: 0.9767 | Recall: 0.9449 | F1: 0.9605 | ROC-AUC: 0.9391
Max val ROC-AUC: 0.9392
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1619 | Acc: 0.9444 | Prec: 0.9536 | Recall: 0.9252 | F1: 0.9392 | ROC-AUC: 0.9769 || Test Loss: 0.1990 | Acc: 0.9326 | Prec: 0.9843 | Recall: 0.9424 | F1: 0.9629 | ROC-AUC: 0.9606
Max val ROC-AUC: 0.9606
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2162 | Acc: 0.9289 | Prec: 0.9212 | Recall: 0.9259 | F1: 0.9236 | ROC-AUC: 0.9718 || Test Loss: 0.4367 | Acc: 0.8767 | Prec: 0.9240 | Recall: 0.9449 | F1: 0.9343 | ROC-AUC: 0.2473
Max val ROC-AUC: 0.9373
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2479 | Acc: 0.9195 | Prec: 0.9619 | Recall: 0.8606 | F1: 0.9084 | ROC-AUC: 0.9735 || Test Loss: 0.6982 | Acc: 0.8116 | Prec: 0.9907 | Recall: 0.8045 | F1: 0.8880 | ROC-AUC: 0.8954
Max val ROC-AUC: 0.9498
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1801 | Acc: 0.9421 | Prec: 0.9527 | Recall: 0.9208 | F1: 0.9365 | ROC-AUC: 0.9778 || Test Loss: 0.2232 | Acc: 0.9023 | Prec: 0.9890 | Recall: 0.9048 | F1: 0.9450 | ROC-AUC: 0.9292
Max val ROC-AUC: 0.9593
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2891 | Acc: 0.9128 | Prec: 0.9582 | Recall: 0.8489 | F1: 0.9003 | ROC-AUC: 0.9101 || Test Loss: 0.2612 | Acc: 0.9070 | Prec: 0.9687 | Recall: 0.9298 | F1: 0.9488 | ROC-AUC: 0.8381
Max val ROC-AUC: 0.9358
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1768 | Acc: 0.9508 | Prec: 0.9438 | Recall: 0.9506 | F1: 0.9472 | ROC-AUC: 0.9752 || Test Loss: 0.2973 | Acc: 0.9023 | Prec: 0.9837 | Recall: 0.9098 | F1: 0.9453 | ROC-AUC: 0.9363
Max val ROC-AUC: 0.9527
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1600 | Acc: 0.9478 | Prec: 0.9650 | Recall: 0.9208 | F1: 0.9424 | ROC-AUC: 0.9824 || Test Loss: 0.1549 | Acc: 0.9395 | Prec: 0.9844 | Recall: 0.9499 | F1: 0.9668 | ROC-AUC: 0.9592
Max val ROC-AUC: 0.9619
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1338 | Acc: 0.9566 | Prec: 0.9615 | Recall: 0.9441 | F1: 0.9527 | ROC-AUC: 0.9881 || Test Loss: 0.2224 | Acc: 0.9326 | Prec: 0.9894 | Recall: 0.9373 | F1: 0.9627 | ROC-AUC: 0.9457
Max val ROC-AUC: 0.9615
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1385 | Acc: 0.9552 | Prec: 0.9663 | Recall: 0.9361 | F1: 0.9509 | ROC-AUC: 0.9849 || Test Loss: 0.2705 | Acc: 0.8860 | Prec: 0.9834 | Recall: 0.8922 | F1: 0.9356 | ROC-AUC: 0.9386
Max val ROC-AUC: 0.9666
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1380 | Acc: 0.9582 | Prec: 0.9523 | Recall: 0.9579 | F1: 0.9551 | ROC-AUC: 0.9865 || Test Loss: 0.4929 | Acc: 0.9186 | Prec: 0.9973 | Recall: 0.9148 | F1: 0.9542 | ROC-AUC: 0.9602
Max val ROC-AUC: 0.9602
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0454 | Acc: 0.9848 | Prec: 0.9868 | Recall: 0.9804 | F1: 0.9836 | ROC-AUC: 0.9979 || Test Loss: 0.2866 | Acc: 0.9302 | Prec: 0.9743 | Recall: 0.9499 | F1: 0.9619 | ROC-AUC: 0.9420
Max val ROC-AUC: 0.9426
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0699 | Acc: 0.9788 | Prec: 0.9881 | Recall: 0.9659 | F1: 0.9769 | ROC-AUC: 0.9956 || Test Loss: 0.3553 | Acc: 0.9093 | Prec: 0.9839 | Recall: 0.9173 | F1: 0.9494 | ROC-AUC: 0.9603
Max val ROC-AUC: 0.9603
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0441 | Acc: 0.9848 | Prec: 0.9868 | Recall: 0.9804 | F1: 0.9836 | ROC-AUC: 0.9982 || Test Loss: 0.4128 | Acc: 0.9256 | Prec: 0.9920 | Recall: 0.9273 | F1: 0.9585 | ROC-AUC: 0.9656
Max val ROC-AUC: 0.9712
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0468 | Acc: 0.9842 | Prec: 0.9833 | Recall: 0.9826 | F1: 0.9829 | ROC-AUC: 0.9980 || Test Loss: 0.3485 | Acc: 0.9186 | Prec: 0.9840 | Recall: 0.9273 | F1: 0.9548 | ROC-AUC: 0.9590
Max val ROC-AUC: 0.9702
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0331 | Acc: 0.9906 | Prec: 0.9905 | Recall: 0.9891 | F1: 0.9898 | ROC-AUC: 0.9989 || Test Loss: 0.3492 | Acc: 0.9233 | Prec: 0.9816 | Recall: 0.9348 | F1: 0.9576 | ROC-AUC: 0.9225
Max val ROC-AUC: 0.9439
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0494 | Acc: 0.9838 | Prec: 0.9861 | Recall: 0.9789 | F1: 0.9825 | ROC-AUC: 0.9980 || Test Loss: 0.6713 | Acc: 0.8349 | Prec: 0.9970 | Recall: 0.8246 | F1: 0.9026 | ROC-AUC: 0.9580
Max val ROC-AUC: 0.9655
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0571 | Acc: 0.9832 | Prec: 0.9818 | Recall: 0.9818 | F1: 0.9818 | ROC-AUC: 0.9968 || Test Loss: 0.3159 | Acc: 0.9140 | Prec: 0.9892 | Recall: 0.9173 | F1: 0.9519 | ROC-AUC: 0.9687
Max val ROC-AUC: 0.9714
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0488 | Acc: 0.9862 | Prec: 0.9883 | Recall: 0.9818 | F1: 0.9851 | ROC-AUC: 0.9974 || Test Loss: 0.4157 | Acc: 0.9000 | Prec: 0.9917 | Recall: 0.8997 | F1: 0.9435 | ROC-AUC: 0.9694
Max val ROC-AUC: 0.9729
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0296 | Acc: 0.9929 | Prec: 0.9949 | Recall: 0.9898 | F1: 0.9924 | ROC-AUC: 0.9985 || Test Loss: 0.2203 | Acc: 0.9372 | Prec: 0.9697 | Recall: 0.9624 | F1: 0.9660 | ROC-AUC: 0.9306
Max val ROC-AUC: 0.9466
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0494 | Acc: 0.9818 | Prec: 0.9846 | Recall: 0.9760 | F1: 0.9803 | ROC-AUC: 0.9980 || Test Loss: 0.3487 | Acc: 0.9326 | Prec: 0.9894 | Recall: 0.9373 | F1: 0.9627 | ROC-AUC: 0.9423
Max val ROC-AUC: 0.9683
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0093 | Acc: 0.9973 | Prec: 0.9978 | Recall: 0.9964 | F1: 0.9971 | ROC-AUC: 1.0000 || Test Loss: 0.4502 | Acc: 0.9256 | Prec: 0.9867 | Recall: 0.9323 | F1: 0.9588 | ROC-AUC: 0.8921
Max val ROC-AUC: 0.9517
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0385 | Acc: 0.9889 | Prec: 0.9898 | Recall: 0.9862 | F1: 0.9880 | ROC-AUC: 0.9983 || Test Loss: 0.4659 | Acc: 0.8953 | Prec: 0.9863 | Recall: 0.8997 | F1: 0.9410 | ROC-AUC: 0.9459
Max val ROC-AUC: 0.9720
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0293 | Acc: 0.9916 | Prec: 0.9942 | Recall: 0.9877 | F1: 0.9909 | ROC-AUC: 0.9984 || Test Loss: 0.3456 | Acc: 0.9209 | Prec: 0.9790 | Recall: 0.9348 | F1: 0.9564 | ROC-AUC: 0.9411
Max val ROC-AUC: 0.9570
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0357 | Acc: 0.9919 | Prec: 0.9949 | Recall: 0.9877 | F1: 0.9913 | ROC-AUC: 0.9983 || Test Loss: 0.3336 | Acc: 0.9256 | Prec: 0.9816 | Recall: 0.9373 | F1: 0.9590 | ROC-AUC: 0.9467
Max val ROC-AUC: 0.9537
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0376 | Acc: 0.9909 | Prec: 0.9913 | Recall: 0.9891 | F1: 0.9902 | ROC-AUC: 0.9981 || Test Loss: 0.3238 | Acc: 0.9233 | Prec: 0.9766 | Recall: 0.9398 | F1: 0.9579 | ROC-AUC: 0.9455
Max val ROC-AUC: 0.9559
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0251 | Acc: 0.9906 | Prec: 0.9927 | Recall: 0.9869 | F1: 0.9898 | ROC-AUC: 0.9995 || Test Loss: 0.3561 | Acc: 0.9326 | Prec: 0.9868 | Recall: 0.9398 | F1: 0.9628 | ROC-AUC: 0.9272
Max val ROC-AUC: 0.9516
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0147 | Acc: 0.9970 | Prec: 0.9985 | Recall: 0.9949 | F1: 0.9967 | ROC-AUC: 0.9994 || Test Loss: 0.3059 | Acc: 0.9465 | Prec: 0.9796 | Recall: 0.9624 | F1: 0.9709 | ROC-AUC: 0.9153
Max val ROC-AUC: 0.9303
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0291 | Acc: 0.9899 | Prec: 0.9905 | Recall: 0.9877 | F1: 0.9891 | ROC-AUC: 0.9990 || Test Loss: 0.3671 | Acc: 0.9326 | Prec: 0.9818 | Recall: 0.9449 | F1: 0.9630 | ROC-AUC: 0.9648
Max val ROC-AUC: 0.9648
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0188 | Acc: 0.9953 | Prec: 0.9921 | Recall: 0.9978 | F1: 0.9949 | ROC-AUC: 0.9998 || Test Loss: 0.3931 | Acc: 0.9326 | Prec: 0.9768 | Recall: 0.9499 | F1: 0.9632 | ROC-AUC: 0.9447
Max val ROC-AUC: 0.9574
Training with d_model=64, nhead=2, num_layers=8, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0069 | Acc: 0.9980 | Prec: 0.9993 | Recall: 0.9964 | F1: 0.9978 | ROC-AUC: 1.0000 || Test Loss: 0.4473 | Acc: 0.9279 | Prec: 0.9767 | Recall: 0.9449 | F1: 0.9605 | ROC-AUC: 0.9120
Max val ROC-AUC: 0.9591
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3314 | Acc: 0.8750 | Prec: 0.9137 | Recall: 0.8068 | F1: 0.8569 | ROC-AUC: 0.9243 || Test Loss: 0.4776 | Acc: 0.7116 | Prec: 0.9893 | Recall: 0.6967 | F1: 0.8176 | ROC-AUC: 0.8740
Max val ROC-AUC: 0.9571
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3074 | Acc: 0.8956 | Prec: 0.9548 | Recall: 0.8134 | F1: 0.8784 | ROC-AUC: 0.9065 || Test Loss: 0.4870 | Acc: 0.8163 | Prec: 0.9908 | Recall: 0.8095 | F1: 0.8910 | ROC-AUC: 0.8784
Max val ROC-AUC: 0.9521
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2597 | Acc: 0.9171 | Prec: 0.9060 | Recall: 0.9165 | F1: 0.9112 | ROC-AUC: 0.9604 || Test Loss: 0.3130 | Acc: 0.9186 | Prec: 0.9866 | Recall: 0.9248 | F1: 0.9547 | ROC-AUC: 0.9117
Max val ROC-AUC: 0.9536
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2954 | Acc: 0.8912 | Prec: 0.8892 | Recall: 0.8744 | F1: 0.8817 | ROC-AUC: 0.9512 || Test Loss: 0.5597 | Acc: 0.8140 | Prec: 0.9908 | Recall: 0.8070 | F1: 0.8895 | ROC-AUC: 0.9310
Max val ROC-AUC: 0.9410
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2496 | Acc: 0.9202 | Prec: 0.9488 | Recall: 0.8751 | F1: 0.9105 | ROC-AUC: 0.9604 || Test Loss: 0.2943 | Acc: 0.9047 | Prec: 0.9864 | Recall: 0.9098 | F1: 0.9465 | ROC-AUC: 0.9327
Max val ROC-AUC: 0.9424
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3016 | Acc: 0.8990 | Prec: 0.9454 | Recall: 0.8301 | F1: 0.8840 | ROC-AUC: 0.9464 || Test Loss: 0.4952 | Acc: 0.7814 | Prec: 0.9872 | Recall: 0.7744 | F1: 0.8680 | ROC-AUC: 0.8849
Max val ROC-AUC: 0.9517
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3381 | Acc: 0.8720 | Prec: 0.8352 | Recall: 0.9020 | F1: 0.8673 | ROC-AUC: 0.9455 || Test Loss: 0.3974 | Acc: 0.9349 | Prec: 0.9696 | Recall: 0.9599 | F1: 0.9647 | ROC-AUC: 0.9032
Max val ROC-AUC: 0.9546
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2624 | Acc: 0.9114 | Prec: 0.9358 | Recall: 0.8686 | F1: 0.9009 | ROC-AUC: 0.9547 || Test Loss: 0.8322 | Acc: 0.7442 | Prec: 0.9898 | Recall: 0.7318 | F1: 0.8415 | ROC-AUC: 0.9299
Max val ROC-AUC: 0.9318
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3254 | Acc: 0.8942 | Prec: 0.8513 | Recall: 0.9354 | F1: 0.8913 | ROC-AUC: 0.9156 || Test Loss: 0.3788 | Acc: 0.9186 | Prec: 0.9815 | Recall: 0.9298 | F1: 0.9550 | ROC-AUC: 0.8686
Max val ROC-AUC: 0.9434
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2576 | Acc: 0.9171 | Prec: 0.9149 | Recall: 0.9056 | F1: 0.9102 | ROC-AUC: 0.9609 || Test Loss: 0.2641 | Acc: 0.9349 | Prec: 0.9769 | Recall: 0.9524 | F1: 0.9645 | ROC-AUC: 0.8947
Max val ROC-AUC: 0.9599
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1324 | Acc: 0.9522 | Prec: 0.9725 | Recall: 0.9230 | F1: 0.9471 | ROC-AUC: 0.9868 || Test Loss: 0.3400 | Acc: 0.8767 | Prec: 0.9779 | Recall: 0.8872 | F1: 0.9304 | ROC-AUC: 0.9310
Max val ROC-AUC: 0.9496
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1215 | Acc: 0.9656 | Prec: 0.9797 | Recall: 0.9455 | F1: 0.9623 | ROC-AUC: 0.9862 || Test Loss: 0.3811 | Acc: 0.9233 | Prec: 0.9841 | Recall: 0.9323 | F1: 0.9575 | ROC-AUC: 0.9463
Max val ROC-AUC: 0.9620
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1023 | Acc: 0.9741 | Prec: 0.9822 | Recall: 0.9615 | F1: 0.9717 | ROC-AUC: 0.9896 || Test Loss: 0.4008 | Acc: 0.8930 | Prec: 0.9809 | Recall: 0.9023 | F1: 0.9399 | ROC-AUC: 0.9443
Max val ROC-AUC: 0.9573
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0954 | Acc: 0.9737 | Prec: 0.9829 | Recall: 0.9601 | F1: 0.9713 | ROC-AUC: 0.9890 || Test Loss: 0.3512 | Acc: 0.9209 | Prec: 0.9867 | Recall: 0.9273 | F1: 0.9561 | ROC-AUC: 0.9367
Max val ROC-AUC: 0.9593
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1048 | Acc: 0.9663 | Prec: 0.9841 | Recall: 0.9426 | F1: 0.9629 | ROC-AUC: 0.9904 || Test Loss: 0.4675 | Acc: 0.8233 | Prec: 0.9850 | Recall: 0.8221 | F1: 0.8962 | ROC-AUC: 0.8841
Max val ROC-AUC: 0.9534
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1546 | Acc: 0.9363 | Prec: 0.9707 | Recall: 0.8896 | F1: 0.9284 | ROC-AUC: 0.9825 || Test Loss: 0.2480 | Acc: 0.9023 | Prec: 0.9864 | Recall: 0.9073 | F1: 0.9452 | ROC-AUC: 0.9237
Max val ROC-AUC: 0.9654
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1640 | Acc: 0.9353 | Prec: 0.9373 | Recall: 0.9223 | F1: 0.9297 | ROC-AUC: 0.9831 || Test Loss: 0.3895 | Acc: 0.9023 | Prec: 0.9864 | Recall: 0.9073 | F1: 0.9452 | ROC-AUC: 0.9076
Max val ROC-AUC: 0.9558
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1457 | Acc: 0.9562 | Prec: 0.9727 | Recall: 0.9317 | F1: 0.9518 | ROC-AUC: 0.9826 || Test Loss: 0.5764 | Acc: 0.7721 | Prec: 0.9902 | Recall: 0.7619 | F1: 0.8612 | ROC-AUC: 0.9150
Max val ROC-AUC: 0.9453
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1205 | Acc: 0.9603 | Prec: 0.9802 | Recall: 0.9332 | F1: 0.9561 | ROC-AUC: 0.9879 || Test Loss: 0.2573 | Acc: 0.8953 | Prec: 0.9890 | Recall: 0.8972 | F1: 0.9409 | ROC-AUC: 0.9024
Max val ROC-AUC: 0.9428
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1176 | Acc: 0.9599 | Prec: 0.9729 | Recall: 0.9397 | F1: 0.9560 | ROC-AUC: 0.9887 || Test Loss: 0.3981 | Acc: 0.7628 | Prec: 0.9967 | Recall: 0.7469 | F1: 0.8539 | ROC-AUC: 0.9527
Max val ROC-AUC: 0.9527
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0249 | Acc: 0.9919 | Prec: 0.9971 | Recall: 0.9855 | F1: 0.9912 | ROC-AUC: 0.9994 || Test Loss: 0.5187 | Acc: 0.9093 | Prec: 0.9839 | Recall: 0.9173 | F1: 0.9494 | ROC-AUC: 0.9404
Max val ROC-AUC: 0.9404
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0208 | Acc: 0.9933 | Prec: 0.9956 | Recall: 0.9898 | F1: 0.9927 | ROC-AUC: 0.9995 || Test Loss: 0.3667 | Acc: 0.9233 | Prec: 0.9816 | Recall: 0.9348 | F1: 0.9576 | ROC-AUC: 0.9493
Max val ROC-AUC: 0.9655
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0351 | Acc: 0.9889 | Prec: 0.9898 | Recall: 0.9862 | F1: 0.9880 | ROC-AUC: 0.9983 || Test Loss: 0.3393 | Acc: 0.9302 | Prec: 0.9792 | Recall: 0.9449 | F1: 0.9617 | ROC-AUC: 0.9304
Max val ROC-AUC: 0.9504
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0306 | Acc: 0.9929 | Prec: 0.9956 | Recall: 0.9891 | F1: 0.9923 | ROC-AUC: 0.9986 || Test Loss: 0.5718 | Acc: 0.8442 | Prec: 0.9882 | Recall: 0.8421 | F1: 0.9093 | ROC-AUC: 0.9409
Max val ROC-AUC: 0.9505
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0428 | Acc: 0.9869 | Prec: 0.9948 | Recall: 0.9768 | F1: 0.9857 | ROC-AUC: 0.9980 || Test Loss: 0.3530 | Acc: 0.9209 | Prec: 0.9841 | Recall: 0.9298 | F1: 0.9562 | ROC-AUC: 0.9633
Max val ROC-AUC: 0.9633
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0163 | Acc: 0.9963 | Prec: 0.9971 | Recall: 0.9949 | F1: 0.9960 | ROC-AUC: 0.9995 || Test Loss: 0.3902 | Acc: 0.9279 | Prec: 0.9742 | Recall: 0.9474 | F1: 0.9606 | ROC-AUC: 0.9097
Max val ROC-AUC: 0.9464
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0283 | Acc: 0.9919 | Prec: 0.9949 | Recall: 0.9877 | F1: 0.9913 | ROC-AUC: 0.9987 || Test Loss: 0.4523 | Acc: 0.9140 | Prec: 0.9840 | Recall: 0.9223 | F1: 0.9521 | ROC-AUC: 0.9462
Max val ROC-AUC: 0.9462
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0351 | Acc: 0.9896 | Prec: 0.9927 | Recall: 0.9847 | F1: 0.9887 | ROC-AUC: 0.9983 || Test Loss: 0.3714 | Acc: 0.9302 | Prec: 0.9817 | Recall: 0.9424 | F1: 0.9616 | ROC-AUC: 0.9353
Max val ROC-AUC: 0.9615
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0421 | Acc: 0.9912 | Prec: 0.9956 | Recall: 0.9855 | F1: 0.9905 | ROC-AUC: 0.9967 || Test Loss: 0.4353 | Acc: 0.9186 | Prec: 0.9764 | Recall: 0.9348 | F1: 0.9552 | ROC-AUC: 0.9007
Max val ROC-AUC: 0.9523
Training with d_model=64, nhead=2, num_layers=8, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0337 | Acc: 0.9892 | Prec: 0.9920 | Recall: 0.9847 | F1: 0.9883 | ROC-AUC: 0.9988 || Test Loss: 0.3545 | Acc: 0.9326 | Prec: 0.9843 | Recall: 0.9424 | F1: 0.9629 | ROC-AUC: 0.9192
Max val ROC-AUC: 0.9504
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6935 | Acc: 0.5221 | Prec: 0.4715 | Recall: 0.2520 | F1: 0.3284 | ROC-AUC: 0.4989 || Test Loss: 0.8245 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.8327
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6938 | Acc: 0.5234 | Prec: 0.4679 | Recall: 0.2012 | F1: 0.2814 | ROC-AUC: 0.4936 || Test Loss: 0.7311 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.6678
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4684 | Acc: 0.8212 | Prec: 0.9147 | Recall: 0.6776 | F1: 0.7785 | ROC-AUC: 0.8459 || Test Loss: 0.4520 | Acc: 0.8070 | Prec: 0.9788 | Recall: 0.8095 | F1: 0.8861 | ROC-AUC: 0.7820
Max val ROC-AUC: 0.9473
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4827 | Acc: 0.8107 | Prec: 0.7447 | Recall: 0.9005 | F1: 0.8153 | ROC-AUC: 0.8540 || Test Loss: 0.3437 | Acc: 0.8744 | Prec: 0.9915 | Recall: 0.8722 | F1: 0.9280 | ROC-AUC: 0.8881
Max val ROC-AUC: 0.8902
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7307 | Acc: 0.5153 | Prec: 0.4555 | Recall: 0.2302 | F1: 0.3058 | ROC-AUC: 0.5021 || Test Loss: 0.7901 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.8016
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6623 | Acc: 0.6100 | Prec: 0.5919 | Recall: 0.5120 | F1: 0.5491 | ROC-AUC: 0.6760 || Test Loss: 0.5814 | Acc: 0.9279 | Prec: 0.9792 | Recall: 0.9424 | F1: 0.9604 | ROC-AUC: 0.8470
Max val ROC-AUC: 0.9540
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4940 | Acc: 0.7791 | Prec: 0.7052 | Recall: 0.8998 | F1: 0.7907 | ROC-AUC: 0.8324 || Test Loss: 0.5784 | Acc: 0.9349 | Prec: 0.9793 | Recall: 0.9499 | F1: 0.9644 | ROC-AUC: 0.9260
Max val ROC-AUC: 0.9474
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4360 | Acc: 0.8286 | Prec: 0.7544 | Recall: 0.9346 | F1: 0.8349 | ROC-AUC: 0.8578 || Test Loss: 0.4849 | Acc: 0.8930 | Prec: 0.9862 | Recall: 0.8972 | F1: 0.9396 | ROC-AUC: 0.8722
Max val ROC-AUC: 0.9615
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6972 | Acc: 0.5369 | Prec: 0.5027 | Recall: 0.1329 | F1: 0.2102 | ROC-AUC: 0.4954 || Test Loss: 0.6077 | Acc: 0.9279 | Prec: 0.9279 | Recall: 1.0000 | F1: 0.9626 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.8066
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6959 | Acc: 0.5207 | Prec: 0.4553 | Recall: 0.1699 | F1: 0.2475 | ROC-AUC: 0.4917 || Test Loss: 0.6804 | Acc: 0.9279 | Prec: 0.9279 | Recall: 1.0000 | F1: 0.9626 | ROC-AUC: 0.4975
Max val ROC-AUC: 0.7056
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1239 | Acc: 0.9609 | Prec: 0.9795 | Recall: 0.9354 | F1: 0.9569 | ROC-AUC: 0.9874 || Test Loss: 0.4361 | Acc: 0.8628 | Prec: 0.9913 | Recall: 0.8596 | F1: 0.9208 | ROC-AUC: 0.9529
Max val ROC-AUC: 0.9529
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1263 | Acc: 0.9640 | Prec: 0.9774 | Recall: 0.9441 | F1: 0.9605 | ROC-AUC: 0.9855 || Test Loss: 0.4925 | Acc: 0.9093 | Prec: 0.9865 | Recall: 0.9148 | F1: 0.9493 | ROC-AUC: 0.9348
Max val ROC-AUC: 0.9584
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1252 | Acc: 0.9616 | Prec: 0.9824 | Recall: 0.9339 | F1: 0.9576 | ROC-AUC: 0.9845 || Test Loss: 0.4546 | Acc: 0.8698 | Prec: 0.9831 | Recall: 0.8747 | F1: 0.9257 | ROC-AUC: 0.9243
Max val ROC-AUC: 0.9373
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1196 | Acc: 0.9569 | Prec: 0.9808 | Recall: 0.9252 | F1: 0.9522 | ROC-AUC: 0.9884 || Test Loss: 0.4318 | Acc: 0.8535 | Prec: 0.9773 | Recall: 0.8622 | F1: 0.9161 | ROC-AUC: 0.8555
Max val ROC-AUC: 0.9448
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1534 | Acc: 0.9491 | Prec: 0.9774 | Recall: 0.9114 | F1: 0.9433 | ROC-AUC: 0.9803 || Test Loss: 0.4389 | Acc: 0.8395 | Prec: 0.9853 | Recall: 0.8396 | F1: 0.9066 | ROC-AUC: 0.9388
Max val ROC-AUC: 0.9390
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1678 | Acc: 0.9488 | Prec: 0.9693 | Recall: 0.9187 | F1: 0.9433 | ROC-AUC: 0.9782 || Test Loss: 0.4163 | Acc: 0.8884 | Prec: 0.9835 | Recall: 0.8947 | F1: 0.9370 | ROC-AUC: 0.9356
Max val ROC-AUC: 0.9571
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1526 | Acc: 0.9512 | Prec: 0.9688 | Recall: 0.9245 | F1: 0.9461 | ROC-AUC: 0.9831 || Test Loss: 0.3756 | Acc: 0.9093 | Prec: 0.9813 | Recall: 0.9198 | F1: 0.9495 | ROC-AUC: 0.9044
Max val ROC-AUC: 0.9445
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1447 | Acc: 0.9508 | Prec: 0.9702 | Recall: 0.9223 | F1: 0.9456 | ROC-AUC: 0.9856 || Test Loss: 0.3579 | Acc: 0.8721 | Prec: 0.9831 | Recall: 0.8772 | F1: 0.9272 | ROC-AUC: 0.9270
Max val ROC-AUC: 0.9595
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1629 | Acc: 0.9505 | Prec: 0.9680 | Recall: 0.9237 | F1: 0.9454 | ROC-AUC: 0.9803 || Test Loss: 0.3997 | Acc: 0.8488 | Prec: 0.9883 | Recall: 0.8471 | F1: 0.9123 | ROC-AUC: 0.9500
Max val ROC-AUC: 0.9634
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1015 | Acc: 0.9724 | Prec: 0.9909 | Recall: 0.9492 | F1: 0.9696 | ROC-AUC: 0.9886 || Test Loss: 0.3924 | Acc: 0.9023 | Prec: 0.9786 | Recall: 0.9148 | F1: 0.9456 | ROC-AUC: 0.9048
Max val ROC-AUC: 0.9564
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0455 | Acc: 0.9882 | Prec: 0.9934 | Recall: 0.9811 | F1: 0.9872 | ROC-AUC: 0.9976 || Test Loss: 0.4097 | Acc: 0.9186 | Prec: 0.9866 | Recall: 0.9248 | F1: 0.9547 | ROC-AUC: 0.9535
Max val ROC-AUC: 0.9535
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0451 | Acc: 0.9869 | Prec: 0.9898 | Recall: 0.9818 | F1: 0.9858 | ROC-AUC: 0.9975 || Test Loss: 0.2899 | Acc: 0.9256 | Prec: 0.9766 | Recall: 0.9424 | F1: 0.9592 | ROC-AUC: 0.9228
Max val ROC-AUC: 0.9436
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0482 | Acc: 0.9859 | Prec: 0.9912 | Recall: 0.9782 | F1: 0.9846 | ROC-AUC: 0.9980 || Test Loss: 0.5456 | Acc: 0.8860 | Prec: 0.9834 | Recall: 0.8922 | F1: 0.9356 | ROC-AUC: 0.9212
Max val ROC-AUC: 0.9358
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0429 | Acc: 0.9885 | Prec: 0.9927 | Recall: 0.9826 | F1: 0.9876 | ROC-AUC: 0.9980 || Test Loss: 0.4396 | Acc: 0.9163 | Prec: 0.9789 | Recall: 0.9298 | F1: 0.9537 | ROC-AUC: 0.9252
Max val ROC-AUC: 0.9440
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0638 | Acc: 0.9805 | Prec: 0.9817 | Recall: 0.9760 | F1: 0.9789 | ROC-AUC: 0.9962 || Test Loss: 0.3200 | Acc: 0.9372 | Prec: 0.9794 | Recall: 0.9524 | F1: 0.9657 | ROC-AUC: 0.9419
Max val ROC-AUC: 0.9463
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0393 | Acc: 0.9896 | Prec: 0.9927 | Recall: 0.9847 | F1: 0.9887 | ROC-AUC: 0.9986 || Test Loss: 0.3415 | Acc: 0.9233 | Prec: 0.9791 | Recall: 0.9373 | F1: 0.9577 | ROC-AUC: 0.9339
Max val ROC-AUC: 0.9451
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0417 | Acc: 0.9872 | Prec: 0.9926 | Recall: 0.9797 | F1: 0.9861 | ROC-AUC: 0.9978 || Test Loss: 0.3874 | Acc: 0.9233 | Prec: 0.9816 | Recall: 0.9348 | F1: 0.9576 | ROC-AUC: 0.9316
Max val ROC-AUC: 0.9459
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0498 | Acc: 0.9848 | Prec: 0.9904 | Recall: 0.9768 | F1: 0.9835 | ROC-AUC: 0.9970 || Test Loss: 0.3850 | Acc: 0.9233 | Prec: 0.9791 | Recall: 0.9373 | F1: 0.9577 | ROC-AUC: 0.9429
Max val ROC-AUC: 0.9507
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0484 | Acc: 0.9835 | Prec: 0.9911 | Recall: 0.9731 | F1: 0.9820 | ROC-AUC: 0.9976 || Test Loss: 0.5128 | Acc: 0.8721 | Prec: 0.9859 | Recall: 0.8747 | F1: 0.9270 | ROC-AUC: 0.9192
Max val ROC-AUC: 0.9441
Training with d_model=64, nhead=2, num_layers=8, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0411 | Acc: 0.9882 | Prec: 0.9956 | Recall: 0.9789 | F1: 0.9872 | ROC-AUC: 0.9984 || Test Loss: 0.3798 | Acc: 0.9116 | Prec: 0.9813 | Recall: 0.9223 | F1: 0.9509 | ROC-AUC: 0.8733
Max val ROC-AUC: 0.9453
Training with d_model=64, nhead=2, num_layers=8, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6955 | Acc: 0.5180 | Prec: 0.4504 | Recall: 0.1779 | F1: 0.2551 | ROC-AUC: 0.5016 || Test Loss: 0.8196 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5705
Training with d_model=64, nhead=2, num_layers=8, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7260 | Acc: 0.5197 | Prec: 0.4583 | Recall: 0.1954 | F1: 0.2739 | ROC-AUC: 0.4946 || Test Loss: 0.8587 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=8, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6925 | Acc: 0.5264 | Prec: 0.4716 | Recall: 0.1750 | F1: 0.2553 | ROC-AUC: 0.5066 || Test Loss: 0.7366 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=8, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7046 | Acc: 0.5207 | Prec: 0.4706 | Recall: 0.2672 | F1: 0.3409 | ROC-AUC: 0.4995 || Test Loss: 0.8288 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=8, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
# Attention Weights
import torch
import torch.nn as nn

class CustomTransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, nhead, dim_feedforward=256, dropout=0.5):
        super(CustomTransformerEncoderLayer, self).__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=True)
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        self.activation = nn.GELU()

    def forward(self, src):
        # Self-attention with weights
        attn_output, attn_weights = self.self_attn(src, src, src, need_weights=True)
        src = src + self.dropout1(attn_output)
        src = self.norm1(src)

        ff_output = self.linear2(self.dropout(self.activation(self.linear1(src))))
        src = src + self.dropout2(ff_output)
        src = self.norm2(src)

        return src, attn_weights  # Return both output and attention
        

class TabTransformerWithAttention(nn.Module):
    def __init__(self, input_dim,
                 d_model=128,
                 nhead=8,
                 num_layers=4,
                 dim_feedforward=256,
                 dropout=0.5):
        super(TabTransformerWithAttention, self).__init__()

        self.embedding = nn.Linear(input_dim, d_model)
        self.norm = nn.LayerNorm(d_model)

        # Stack custom encoder layers
        self.encoder_layers = nn.ModuleList([
            CustomTransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout)
            for _ in range(num_layers)
        ])

        self.classifier = nn.Sequential(
            nn.Linear(d_model, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        x = x.unsqueeze(1)  # (batch_size, seq_len=1, input_dim)
        x = self.embedding(x)
        x = self.norm(x)

        all_attn_weights = []
        for layer in self.encoder_layers:
            x, attn_weights = layer(x)
            all_attn_weights.append(attn_weights)  # store attention maps

        x = x.mean(dim=1)
        logits = self.classifier(x)
        return logits, all_attn_weights  # return both predictions and attention maps
