# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
from typing import Union
import random
from pathlib import Path

## Import Models and Data Loaders

In [4]:
from Return_dataloader import pass_dataloader
from Transformer_model import TabTransformer
from Plot_Accuracies import plot_loss_curves
from engine import train

# Device Agnostics

In [5]:
# Clear Cache
torch.cuda.empty_cache()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Available device is: {device}")

Available device is: cpu


### Data Loader

In [6]:
train_dataloader, test_dataloader = pass_dataloader()

### Parameter Tune Function

In [7]:
from copy import deepcopy

def hyperparameter_tuning(param_grid,
                          train_dataloader,
                          test_dataloader,
                          device,
                          epochs=50):

    best_val_score = -float('inf')
    best_params = None
    best_results = None

    for params in param_grid:
        print(f"Training with d_model={params['d_model']}, nhead={params['nhead']}, "
              f"num_layers={params['num_layers']}, dropout={params['dropout']}, lr={params['lr']}")

        # Create model with given hyperparameters
        model = TabTransformer(input_dim=100,
                               d_model=params['d_model'],
                               nhead=params['nhead'],
                               num_layers=params['num_layers'],
                               dropout=params['dropout']).to(device)

        loss_fn = nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=params['lr'],
                                     weight_decay=1e-4)

        # Train model and get results dictionary
        results = train(model=model,
                        train_dataloader=train_dataloader,
                        test_dataloader=test_dataloader,
                        optimizer=optimizer,
                        loss_fn=loss_fn,
                        device=device,
                        epochs=epochs)

        # Pick best validation score (ROC-AUC here) from all epochs
        max_val_roc_auc = max(results['test_roc_auc'])

        print(f"Max val ROC-AUC: {max_val_roc_auc:.4f}")

        if max_val_roc_auc > best_val_score:
            best_val_score = max_val_roc_auc
            best_params = params
            best_results = deepcopy(results)

    print(f"\nBest params: {best_params}")
    print(f"Best validation ROC-AUC: {best_val_score:.4f}")

    return best_params, best_results


## Hyperparameter Tuning

In [None]:
from itertools import product
NUM_EPOCHS = 10


param_grid = {
    "d_model": [64, 128, 25],
    "nhead": [2, 4, 8, 16],
    "num_layers": [i * 2 for i in range(1,51)],
    "dropout": [0.0, 0.1 , 0.3 , 0.5],
    "lr": [0.1, 0.01, 0.001],
    "weight_decay": [0, 1e-4],
    "batch_size": [4, 8, 16, 32, 64]
}

param_grid = [
    dict(zip(param_grid.keys(), values))
    for values in product(*param_grid.values())
]
best_params, best_results = hyperparameter_tuning(
    param_grid,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    device=device,
    epochs=NUM_EPOCHS
)

Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1




  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1590 | Acc: 0.9502 | Prec: 0.9652 | Recall: 0.9259 | F1: 0.9451 | ROC-AUC: 0.9762 || Test Loss: 0.3372 | Acc: 0.8884 | Prec: 0.9835 | Recall: 0.8947 | F1: 0.9370 | ROC-AUC: 0.9048
Max val ROC-AUC: 0.9496
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1203 | Acc: 0.9633 | Prec: 0.9789 | Recall: 0.9412 | F1: 0.9596 | ROC-AUC: 0.9807 || Test Loss: 0.3099 | Acc: 0.8860 | Prec: 0.9944 | Recall: 0.8822 | F1: 0.9349 | ROC-AUC: 0.9314
Max val ROC-AUC: 0.9672
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1618 | Acc: 0.9468 | Prec: 0.9614 | Recall: 0.9223 | F1: 0.9414 | ROC-AUC: 0.9799 || Test Loss: 0.3460 | Acc: 0.8930 | Prec: 0.9836 | Recall: 0.8997 | F1: 0.9398 | ROC-AUC: 0.9324
Max val ROC-AUC: 0.9561
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1734 | Acc: 0.9421 | Prec: 0.9596 | Recall: 0.9136 | F1: 0.9360 | ROC-AUC: 0.9762 || Test Loss: 0.9477 | Acc: 0.6860 | Prec: 0.9889 | Recall: 0.6692 | F1: 0.7982 | ROC-AUC: 0.8765
Max val ROC-AUC: 0.9517
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1574 | Acc: 0.9512 | Prec: 0.9738 | Recall: 0.9194 | F1: 0.9458 | ROC-AUC: 0.9767 || Test Loss: 0.4545 | Acc: 0.8535 | Prec: 0.9828 | Recall: 0.8571 | F1: 0.9157 | ROC-AUC: 0.8874
Max val ROC-AUC: 0.9280
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2263 | Acc: 0.9336 | Prec: 0.9638 | Recall: 0.8903 | F1: 0.9256 | ROC-AUC: 0.9548 || Test Loss: 0.1806 | Acc: 0.9465 | Prec: 0.9821 | Recall: 0.9599 | F1: 0.9708 | ROC-AUC: 0.9390
Max val ROC-AUC: 0.9551
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1942 | Acc: 0.9407 | Prec: 0.9616 | Recall: 0.9085 | F1: 0.9343 | ROC-AUC: 0.9706 || Test Loss: 0.2515 | Acc: 0.9047 | Prec: 0.9864 | Recall: 0.9098 | F1: 0.9465 | ROC-AUC: 0.9221
Max val ROC-AUC: 0.9462
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1396 | Acc: 0.9522 | Prec: 0.9682 | Recall: 0.9274 | F1: 0.9473 | ROC-AUC: 0.9811 || Test Loss: 0.1863 | Acc: 0.9256 | Prec: 0.9893 | Recall: 0.9298 | F1: 0.9587 | ROC-AUC: 0.9613
Max val ROC-AUC: 0.9699
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3807 | Acc: 0.8882 | Prec: 0.9072 | Recall: 0.8453 | F1: 0.8752 | ROC-AUC: 0.9266 || Test Loss: 0.3588 | Acc: 0.9000 | Prec: 0.9837 | Recall: 0.9073 | F1: 0.9439 | ROC-AUC: 0.8995
Max val ROC-AUC: 0.9513
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1560 | Acc: 0.9549 | Prec: 0.9755 | Recall: 0.9259 | F1: 0.9501 | ROC-AUC: 0.9724 || Test Loss: 0.3379 | Acc: 0.8814 | Prec: 0.9888 | Recall: 0.8822 | F1: 0.9325 | ROC-AUC: 0.9104
Max val ROC-AUC: 0.9424
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0509 | Acc: 0.9821 | Prec: 0.9839 | Recall: 0.9775 | F1: 0.9807 | ROC-AUC: 0.9976 || Test Loss: 0.4873 | Acc: 0.9023 | Prec: 0.9811 | Recall: 0.9123 | F1: 0.9455 | ROC-AUC: 0.9252
Max val ROC-AUC: 0.9455
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0694 | Acc: 0.9791 | Prec: 0.9810 | Recall: 0.9739 | F1: 0.9774 | ROC-AUC: 0.9955 || Test Loss: 0.3274 | Acc: 0.9209 | Prec: 0.9867 | Recall: 0.9273 | F1: 0.9561 | ROC-AUC: 0.9417
Max val ROC-AUC: 0.9591
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0455 | Acc: 0.9855 | Prec: 0.9854 | Recall: 0.9833 | F1: 0.9844 | ROC-AUC: 0.9984 || Test Loss: 0.3073 | Acc: 0.9279 | Prec: 0.9868 | Recall: 0.9348 | F1: 0.9601 | ROC-AUC: 0.9568
Max val ROC-AUC: 0.9583
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0393 | Acc: 0.9879 | Prec: 0.9862 | Recall: 0.9877 | F1: 0.9869 | ROC-AUC: 0.9985 || Test Loss: 0.4985 | Acc: 0.8977 | Prec: 0.9810 | Recall: 0.9073 | F1: 0.9427 | ROC-AUC: 0.9197
Max val ROC-AUC: 0.9528
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0441 | Acc: 0.9869 | Prec: 0.9869 | Recall: 0.9847 | F1: 0.9858 | ROC-AUC: 0.9976 || Test Loss: 0.2795 | Acc: 0.9372 | Prec: 0.9673 | Recall: 0.9649 | F1: 0.9661 | ROC-AUC: 0.9150
Max val ROC-AUC: 0.9388
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0676 | Acc: 0.9781 | Prec: 0.9809 | Recall: 0.9717 | F1: 0.9763 | ROC-AUC: 0.9960 || Test Loss: 0.1767 | Acc: 0.9442 | Prec: 0.9947 | Recall: 0.9449 | F1: 0.9692 | ROC-AUC: 0.9708
Max val ROC-AUC: 0.9744
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0655 | Acc: 0.9818 | Prec: 0.9832 | Recall: 0.9775 | F1: 0.9803 | ROC-AUC: 0.9955 || Test Loss: 0.3745 | Acc: 0.8884 | Prec: 0.9916 | Recall: 0.8872 | F1: 0.9365 | ROC-AUC: 0.9580
Max val ROC-AUC: 0.9721
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1009 | Acc: 0.9741 | Prec: 0.9710 | Recall: 0.9731 | F1: 0.9721 | ROC-AUC: 0.9933 || Test Loss: 0.4401 | Acc: 0.9070 | Prec: 0.9918 | Recall: 0.9073 | F1: 0.9476 | ROC-AUC: 0.9667
Max val ROC-AUC: 0.9681
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0355 | Acc: 0.9885 | Prec: 0.9862 | Recall: 0.9891 | F1: 0.9877 | ROC-AUC: 0.9990 || Test Loss: 0.4163 | Acc: 0.9070 | Prec: 0.9891 | Recall: 0.9098 | F1: 0.9478 | ROC-AUC: 0.9581
Max val ROC-AUC: 0.9712
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0645 | Acc: 0.9788 | Prec: 0.9888 | Recall: 0.9651 | F1: 0.9768 | ROC-AUC: 0.9964 || Test Loss: 0.2850 | Acc: 0.9302 | Prec: 0.9767 | Recall: 0.9474 | F1: 0.9618 | ROC-AUC: 0.9368
Max val ROC-AUC: 0.9368
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0194 | Acc: 0.9953 | Prec: 0.9956 | Recall: 0.9942 | F1: 0.9949 | ROC-AUC: 0.9994 || Test Loss: 0.2608 | Acc: 0.9442 | Prec: 0.9795 | Recall: 0.9599 | F1: 0.9696 | ROC-AUC: 0.9442
Max val ROC-AUC: 0.9442
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0183 | Acc: 0.9926 | Prec: 0.9956 | Recall: 0.9884 | F1: 0.9920 | ROC-AUC: 0.9997 || Test Loss: 0.4007 | Acc: 0.9279 | Prec: 0.9718 | Recall: 0.9499 | F1: 0.9607 | ROC-AUC: 0.9200
Max val ROC-AUC: 0.9566
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0079 | Acc: 0.9970 | Prec: 0.9971 | Recall: 0.9964 | F1: 0.9967 | ROC-AUC: 1.0000 || Test Loss: 0.4040 | Acc: 0.9279 | Prec: 0.9792 | Recall: 0.9424 | F1: 0.9604 | ROC-AUC: 0.9432
Max val ROC-AUC: 0.9554
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0351 | Acc: 0.9902 | Prec: 0.9920 | Recall: 0.9869 | F1: 0.9894 | ROC-AUC: 0.9986 || Test Loss: 0.3938 | Acc: 0.9140 | Prec: 0.9840 | Recall: 0.9223 | F1: 0.9521 | ROC-AUC: 0.9182
Max val ROC-AUC: 0.9498
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0246 | Acc: 0.9936 | Prec: 0.9942 | Recall: 0.9920 | F1: 0.9931 | ROC-AUC: 0.9995 || Test Loss: 0.2915 | Acc: 0.9163 | Prec: 0.9892 | Recall: 0.9198 | F1: 0.9532 | ROC-AUC: 0.9566
Max val ROC-AUC: 0.9566
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0315 | Acc: 0.9912 | Prec: 0.9956 | Recall: 0.9855 | F1: 0.9905 | ROC-AUC: 0.9986 || Test Loss: 0.3656 | Acc: 0.9070 | Prec: 0.9736 | Recall: 0.9248 | F1: 0.9486 | ROC-AUC: 0.9002
Max val ROC-AUC: 0.9570
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0149 | Acc: 0.9963 | Prec: 0.9949 | Recall: 0.9971 | F1: 0.9960 | ROC-AUC: 0.9992 || Test Loss: 0.3642 | Acc: 0.9372 | Prec: 0.9794 | Recall: 0.9524 | F1: 0.9657 | ROC-AUC: 0.9534
Max val ROC-AUC: 0.9537
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0118 | Acc: 0.9963 | Prec: 0.9978 | Recall: 0.9942 | F1: 0.9960 | ROC-AUC: 0.9999 || Test Loss: 0.4431 | Acc: 0.9279 | Prec: 0.9792 | Recall: 0.9424 | F1: 0.9604 | ROC-AUC: 0.9416
Max val ROC-AUC: 0.9617
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0380 | Acc: 0.9909 | Prec: 0.9898 | Recall: 0.9906 | F1: 0.9902 | ROC-AUC: 0.9975 || Test Loss: 0.3167 | Acc: 0.9233 | Prec: 0.9766 | Recall: 0.9398 | F1: 0.9579 | ROC-AUC: 0.9534
Max val ROC-AUC: 0.9534
Training with d_model=64, nhead=2, num_layers=2, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0321 | Acc: 0.9902 | Prec: 0.9934 | Recall: 0.9855 | F1: 0.9894 | ROC-AUC: 0.9986 || Test Loss: 0.3699 | Acc: 0.9326 | Prec: 0.9793 | Recall: 0.9474 | F1: 0.9631 | ROC-AUC: 0.9439
Max val ROC-AUC: 0.9501
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3402 | Acc: 0.8713 | Prec: 0.8732 | Recall: 0.8453 | F1: 0.8590 | ROC-AUC: 0.9338 || Test Loss: 0.2959 | Acc: 0.8442 | Prec: 0.9882 | Recall: 0.8421 | F1: 0.9093 | ROC-AUC: 0.9347
Max val ROC-AUC: 0.9456
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3320 | Acc: 0.8683 | Prec: 0.8894 | Recall: 0.8177 | F1: 0.8521 | ROC-AUC: 0.9336 || Test Loss: 0.7172 | Acc: 0.8395 | Prec: 0.9911 | Recall: 0.8346 | F1: 0.9061 | ROC-AUC: 0.9008
Max val ROC-AUC: 0.9335
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2707 | Acc: 0.9131 | Prec: 0.9730 | Recall: 0.8359 | F1: 0.8992 | ROC-AUC: 0.9308 || Test Loss: 0.4905 | Acc: 0.8279 | Prec: 0.9880 | Recall: 0.8246 | F1: 0.8989 | ROC-AUC: 0.8632
Max val ROC-AUC: 0.9559
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2915 | Acc: 0.8811 | Prec: 0.9038 | Recall: 0.8322 | F1: 0.8665 | ROC-AUC: 0.9527 || Test Loss: 0.3491 | Acc: 0.8953 | Prec: 0.9784 | Recall: 0.9073 | F1: 0.9415 | ROC-AUC: 0.9412
Max val ROC-AUC: 0.9547
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2454 | Acc: 0.9131 | Prec: 0.9437 | Recall: 0.8642 | F1: 0.9022 | ROC-AUC: 0.9632 || Test Loss: 0.4364 | Acc: 0.7884 | Prec: 0.9904 | Recall: 0.7794 | F1: 0.8724 | ROC-AUC: 0.9060
Max val ROC-AUC: 0.9613
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3231 | Acc: 0.8865 | Prec: 0.8741 | Recall: 0.8824 | F1: 0.8782 | ROC-AUC: 0.9405 || Test Loss: 0.4748 | Acc: 0.8698 | Prec: 0.9858 | Recall: 0.8722 | F1: 0.9255 | ROC-AUC: 0.8919
Max val ROC-AUC: 0.9503
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3378 | Acc: 0.8764 | Prec: 0.9126 | Recall: 0.8112 | F1: 0.8589 | ROC-AUC: 0.9331 || Test Loss: 0.3726 | Acc: 0.7884 | Prec: 0.9936 | Recall: 0.7769 | F1: 0.8720 | ROC-AUC: 0.8976
Max val ROC-AUC: 0.9503
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4361 | Acc: 0.8387 | Prec: 0.9204 | Recall: 0.7139 | F1: 0.8041 | ROC-AUC: 0.8864 || Test Loss: 0.4050 | Acc: 0.7326 | Prec: 0.9863 | Recall: 0.7218 | F1: 0.8336 | ROC-AUC: 0.8873
Max val ROC-AUC: 0.9452
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2258 | Acc: 0.9262 | Prec: 0.9276 | Recall: 0.9121 | F1: 0.9198 | ROC-AUC: 0.9684 || Test Loss: 1.4595 | Acc: 0.8047 | Prec: 0.9876 | Recall: 0.7995 | F1: 0.8837 | ROC-AUC: 0.8986
Max val ROC-AUC: 0.9402
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3032 | Acc: 0.8915 | Prec: 0.9313 | Recall: 0.8272 | F1: 0.8762 | ROC-AUC: 0.9415 || Test Loss: 0.3827 | Acc: 0.8837 | Prec: 0.9861 | Recall: 0.8872 | F1: 0.9340 | ROC-AUC: 0.9063
Max val ROC-AUC: 0.9482
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1057 | Acc: 0.9710 | Prec: 0.9857 | Recall: 0.9513 | F1: 0.9682 | ROC-AUC: 0.9899 || Test Loss: 0.6727 | Acc: 0.7000 | Prec: 0.9963 | Recall: 0.6792 | F1: 0.8077 | ROC-AUC: 0.9500
Max val ROC-AUC: 0.9691
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1010 | Acc: 0.9673 | Prec: 0.9856 | Recall: 0.9434 | F1: 0.9640 | ROC-AUC: 0.9912 || Test Loss: 0.4802 | Acc: 0.9047 | Prec: 0.9838 | Recall: 0.9123 | F1: 0.9467 | ROC-AUC: 0.9284
Max val ROC-AUC: 0.9438
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1087 | Acc: 0.9630 | Prec: 0.9854 | Recall: 0.9339 | F1: 0.9590 | ROC-AUC: 0.9898 || Test Loss: 0.3723 | Acc: 0.9023 | Prec: 0.9890 | Recall: 0.9048 | F1: 0.9450 | ROC-AUC: 0.9317
Max val ROC-AUC: 0.9484
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1109 | Acc: 0.9670 | Prec: 0.9762 | Recall: 0.9521 | F1: 0.9640 | ROC-AUC: 0.9884 || Test Loss: 0.5523 | Acc: 0.8721 | Prec: 0.9914 | Recall: 0.8697 | F1: 0.9266 | ROC-AUC: 0.9595
Max val ROC-AUC: 0.9595
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0635 | Acc: 0.9808 | Prec: 0.9896 | Recall: 0.9688 | F1: 0.9791 | ROC-AUC: 0.9954 || Test Loss: 0.2904 | Acc: 0.9140 | Prec: 0.9866 | Recall: 0.9198 | F1: 0.9520 | ROC-AUC: 0.9510
Max val ROC-AUC: 0.9715
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0974 | Acc: 0.9656 | Prec: 0.9761 | Recall: 0.9492 | F1: 0.9624 | ROC-AUC: 0.9918 || Test Loss: 0.2786 | Acc: 0.9023 | Prec: 0.9837 | Recall: 0.9098 | F1: 0.9453 | ROC-AUC: 0.9280
Max val ROC-AUC: 0.9412
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1392 | Acc: 0.9549 | Prec: 0.9628 | Recall: 0.9390 | F1: 0.9507 | ROC-AUC: 0.9854 || Test Loss: 0.3109 | Acc: 0.9000 | Prec: 0.9917 | Recall: 0.8997 | F1: 0.9435 | ROC-AUC: 0.9325
Max val ROC-AUC: 0.9374
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1351 | Acc: 0.9579 | Prec: 0.9793 | Recall: 0.9288 | F1: 0.9534 | ROC-AUC: 0.9857 || Test Loss: 0.3230 | Acc: 0.8814 | Prec: 0.9888 | Recall: 0.8822 | F1: 0.9325 | ROC-AUC: 0.9491
Max val ROC-AUC: 0.9491
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1316 | Acc: 0.9555 | Prec: 0.9800 | Recall: 0.9230 | F1: 0.9506 | ROC-AUC: 0.9852 || Test Loss: 0.3230 | Acc: 0.9000 | Prec: 0.9837 | Recall: 0.9073 | F1: 0.9439 | ROC-AUC: 0.9329
Max val ROC-AUC: 0.9550
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1491 | Acc: 0.9485 | Prec: 0.9715 | Recall: 0.9158 | F1: 0.9428 | ROC-AUC: 0.9815 || Test Loss: 0.4934 | Acc: 0.8721 | Prec: 0.9831 | Recall: 0.8772 | F1: 0.9272 | ROC-AUC: 0.9042
Max val ROC-AUC: 0.9482
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0361 | Acc: 0.9885 | Prec: 0.9919 | Recall: 0.9833 | F1: 0.9876 | ROC-AUC: 0.9988 || Test Loss: 0.3632 | Acc: 0.9302 | Prec: 0.9868 | Recall: 0.9373 | F1: 0.9614 | ROC-AUC: 0.9547
Max val ROC-AUC: 0.9588
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0221 | Acc: 0.9933 | Prec: 0.9963 | Recall: 0.9891 | F1: 0.9927 | ROC-AUC: 0.9996 || Test Loss: 0.4074 | Acc: 0.9302 | Prec: 0.9719 | Recall: 0.9524 | F1: 0.9620 | ROC-AUC: 0.9331
Max val ROC-AUC: 0.9527
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0088 | Acc: 0.9980 | Prec: 0.9978 | Recall: 0.9978 | F1: 0.9978 | ROC-AUC: 0.9999 || Test Loss: 0.5241 | Acc: 0.9256 | Prec: 0.9842 | Recall: 0.9348 | F1: 0.9589 | ROC-AUC: 0.9496
Max val ROC-AUC: 0.9496
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0267 | Acc: 0.9923 | Prec: 0.9956 | Recall: 0.9877 | F1: 0.9916 | ROC-AUC: 0.9991 || Test Loss: 0.3514 | Acc: 0.9349 | Prec: 0.9843 | Recall: 0.9449 | F1: 0.9642 | ROC-AUC: 0.9469
Max val ROC-AUC: 0.9469
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0332 | Acc: 0.9936 | Prec: 0.9913 | Recall: 0.9949 | F1: 0.9931 | ROC-AUC: 0.9982 || Test Loss: 0.3110 | Acc: 0.9349 | Prec: 0.9744 | Recall: 0.9549 | F1: 0.9646 | ROC-AUC: 0.9182
Max val ROC-AUC: 0.9534
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0273 | Acc: 0.9939 | Prec: 0.9956 | Recall: 0.9913 | F1: 0.9934 | ROC-AUC: 0.9992 || Test Loss: 0.8456 | Acc: 0.8442 | Prec: 0.9911 | Recall: 0.8396 | F1: 0.9091 | ROC-AUC: 0.9468
Max val ROC-AUC: 0.9480
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0250 | Acc: 0.9929 | Prec: 0.9934 | Recall: 0.9913 | F1: 0.9924 | ROC-AUC: 0.9992 || Test Loss: 0.3635 | Acc: 0.9233 | Prec: 0.9867 | Recall: 0.9298 | F1: 0.9574 | ROC-AUC: 0.9411
Max val ROC-AUC: 0.9513
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0276 | Acc: 0.9923 | Prec: 0.9899 | Recall: 0.9935 | F1: 0.9917 | ROC-AUC: 0.9990 || Test Loss: 0.4170 | Acc: 0.9256 | Prec: 0.9791 | Recall: 0.9398 | F1: 0.9591 | ROC-AUC: 0.9420
Max val ROC-AUC: 0.9505
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0254 | Acc: 0.9936 | Prec: 0.9935 | Recall: 0.9927 | F1: 0.9931 | ROC-AUC: 0.9992 || Test Loss: 0.3657 | Acc: 0.9256 | Prec: 0.9816 | Recall: 0.9373 | F1: 0.9590 | ROC-AUC: 0.9462
Max val ROC-AUC: 0.9462
Training with d_model=64, nhead=2, num_layers=2, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0137 | Acc: 0.9960 | Prec: 0.9956 | Recall: 0.9956 | F1: 0.9956 | ROC-AUC: 0.9999 || Test Loss: 0.4671 | Acc: 0.9163 | Prec: 0.9814 | Recall: 0.9273 | F1: 0.9536 | ROC-AUC: 0.9357
Max val ROC-AUC: 0.9560
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4518 | Acc: 0.8306 | Prec: 0.8594 | Recall: 0.7589 | F1: 0.8060 | ROC-AUC: 0.8712 || Test Loss: 0.2837 | Acc: 0.8558 | Prec: 0.9913 | Recall: 0.8521 | F1: 0.9164 | ROC-AUC: 0.8895
Max val ROC-AUC: 0.9616
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4474 | Acc: 0.8286 | Prec: 0.9289 | Recall: 0.6826 | F1: 0.7869 | ROC-AUC: 0.8403 || Test Loss: 0.2742 | Acc: 0.8372 | Prec: 0.9910 | Recall: 0.8321 | F1: 0.9046 | ROC-AUC: 0.8845
Max val ROC-AUC: 0.9304
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4734 | Acc: 0.7932 | Prec: 0.7622 | Recall: 0.8054 | F1: 0.7832 | ROC-AUC: 0.8738 || Test Loss: 0.4810 | Acc: 0.9256 | Prec: 0.9766 | Recall: 0.9424 | F1: 0.9592 | ROC-AUC: 0.8732
Max val ROC-AUC: 0.9603
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4219 | Acc: 0.8501 | Prec: 0.8923 | Recall: 0.7698 | F1: 0.8265 | ROC-AUC: 0.8846 || Test Loss: 1.3835 | Acc: 0.2628 | Prec: 1.0000 | Recall: 0.2055 | F1: 0.3410 | ROC-AUC: 0.9211
Max val ROC-AUC: 0.9351
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4517 | Acc: 0.8373 | Prec: 0.9154 | Recall: 0.7153 | F1: 0.8031 | ROC-AUC: 0.8622 || Test Loss: 0.7312 | Acc: 0.5047 | Prec: 0.9947 | Recall: 0.4687 | F1: 0.6371 | ROC-AUC: 0.7749
Max val ROC-AUC: 0.9438
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4028 | Acc: 0.8582 | Prec: 0.9178 | Recall: 0.7625 | F1: 0.8330 | ROC-AUC: 0.8911 || Test Loss: 0.4723 | Acc: 0.7605 | Prec: 0.9901 | Recall: 0.7494 | F1: 0.8531 | ROC-AUC: 0.8517
Max val ROC-AUC: 0.9580
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6939 | Acc: 0.5234 | Prec: 0.4087 | Recall: 0.0617 | F1: 0.1073 | ROC-AUC: 0.4840 || Test Loss: 0.7483 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.7638
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4725 | Acc: 0.7952 | Prec: 0.9605 | Recall: 0.5824 | F1: 0.7251 | ROC-AUC: 0.8226 || Test Loss: 0.4632 | Acc: 0.6698 | Prec: 0.9923 | Recall: 0.6491 | F1: 0.7848 | ROC-AUC: 0.7999
Max val ROC-AUC: 0.9391
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4867 | Acc: 0.7811 | Prec: 0.8668 | Recall: 0.6238 | F1: 0.7255 | ROC-AUC: 0.8369 || Test Loss: 0.2586 | Acc: 0.8512 | Prec: 0.9912 | Recall: 0.8471 | F1: 0.9135 | ROC-AUC: 0.8982
Max val ROC-AUC: 0.9707
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6953 | Acc: 0.5237 | Prec: 0.4657 | Recall: 0.1823 | F1: 0.2620 | ROC-AUC: 0.5024 || Test Loss: 0.7221 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.9505
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1554 | Acc: 0.9532 | Prec: 0.9747 | Recall: 0.9230 | F1: 0.9482 | ROC-AUC: 0.9798 || Test Loss: 0.3946 | Acc: 0.8581 | Prec: 0.9856 | Recall: 0.8596 | F1: 0.9183 | ROC-AUC: 0.9211
Max val ROC-AUC: 0.9545
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1746 | Acc: 0.9488 | Prec: 0.9658 | Recall: 0.9223 | F1: 0.9435 | ROC-AUC: 0.9766 || Test Loss: 0.4164 | Acc: 0.8907 | Prec: 0.9916 | Recall: 0.8897 | F1: 0.9379 | ROC-AUC: 0.9566
Max val ROC-AUC: 0.9611
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1139 | Acc: 0.9690 | Prec: 0.9856 | Recall: 0.9470 | F1: 0.9659 | ROC-AUC: 0.9874 || Test Loss: 0.3450 | Acc: 0.8814 | Prec: 0.9915 | Recall: 0.8797 | F1: 0.9323 | ROC-AUC: 0.9626
Max val ROC-AUC: 0.9626
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1583 | Acc: 0.9461 | Prec: 0.9551 | Recall: 0.9274 | F1: 0.9410 | ROC-AUC: 0.9834 || Test Loss: 0.3700 | Acc: 0.8953 | Prec: 0.9863 | Recall: 0.8997 | F1: 0.9410 | ROC-AUC: 0.9244
Max val ROC-AUC: 0.9436
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1068 | Acc: 0.9707 | Prec: 0.9806 | Recall: 0.9557 | F1: 0.9680 | ROC-AUC: 0.9888 || Test Loss: 0.4240 | Acc: 0.8721 | Prec: 0.9859 | Recall: 0.8747 | F1: 0.9270 | ROC-AUC: 0.9231
Max val ROC-AUC: 0.9438
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1312 | Acc: 0.9586 | Prec: 0.9845 | Recall: 0.9252 | F1: 0.9539 | ROC-AUC: 0.9861 || Test Loss: 0.4065 | Acc: 0.8372 | Prec: 0.9910 | Recall: 0.8321 | F1: 0.9046 | ROC-AUC: 0.9184
Max val ROC-AUC: 0.9543
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1529 | Acc: 0.9572 | Prec: 0.9721 | Recall: 0.9346 | F1: 0.9530 | ROC-AUC: 0.9813 || Test Loss: 0.3487 | Acc: 0.8837 | Prec: 0.9861 | Recall: 0.8872 | F1: 0.9340 | ROC-AUC: 0.9417
Max val ROC-AUC: 0.9536
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1173 | Acc: 0.9636 | Prec: 0.9789 | Recall: 0.9419 | F1: 0.9600 | ROC-AUC: 0.9887 || Test Loss: 0.4123 | Acc: 0.8628 | Prec: 0.9885 | Recall: 0.8622 | F1: 0.9210 | ROC-AUC: 0.9466
Max val ROC-AUC: 0.9527
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1835 | Acc: 0.9468 | Prec: 0.9728 | Recall: 0.9107 | F1: 0.9407 | ROC-AUC: 0.9715 || Test Loss: 0.6289 | Acc: 0.8349 | Prec: 0.9910 | Recall: 0.8296 | F1: 0.9031 | ROC-AUC: 0.9341
Max val ROC-AUC: 0.9679
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1689 | Acc: 0.9451 | Prec: 0.9742 | Recall: 0.9056 | F1: 0.9387 | ROC-AUC: 0.9763 || Test Loss: 0.3125 | Acc: 0.9209 | Prec: 0.9815 | Recall: 0.9323 | F1: 0.9563 | ROC-AUC: 0.9442
Max val ROC-AUC: 0.9517
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0620 | Acc: 0.9842 | Prec: 0.9897 | Recall: 0.9760 | F1: 0.9828 | ROC-AUC: 0.9958 || Test Loss: 0.2356 | Acc: 0.9419 | Prec: 0.9795 | Recall: 0.9574 | F1: 0.9683 | ROC-AUC: 0.9451
Max val ROC-AUC: 0.9451
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0331 | Acc: 0.9899 | Prec: 0.9905 | Recall: 0.9877 | F1: 0.9891 | ROC-AUC: 0.9992 || Test Loss: 0.4784 | Acc: 0.8814 | Prec: 0.9915 | Recall: 0.8797 | F1: 0.9323 | ROC-AUC: 0.9419
Max val ROC-AUC: 0.9671
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0350 | Acc: 0.9892 | Prec: 0.9912 | Recall: 0.9855 | F1: 0.9883 | ROC-AUC: 0.9989 || Test Loss: 0.4010 | Acc: 0.9233 | Prec: 0.9791 | Recall: 0.9373 | F1: 0.9577 | ROC-AUC: 0.9351
Max val ROC-AUC: 0.9589
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0218 | Acc: 0.9936 | Prec: 0.9935 | Recall: 0.9927 | F1: 0.9931 | ROC-AUC: 0.9993 || Test Loss: 0.4598 | Acc: 0.9209 | Prec: 0.9815 | Recall: 0.9323 | F1: 0.9563 | ROC-AUC: 0.9472
Max val ROC-AUC: 0.9560
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0368 | Acc: 0.9889 | Prec: 0.9912 | Recall: 0.9847 | F1: 0.9880 | ROC-AUC: 0.9988 || Test Loss: 0.6094 | Acc: 0.9116 | Prec: 0.9839 | Recall: 0.9198 | F1: 0.9508 | ROC-AUC: 0.9442
Max val ROC-AUC: 0.9542
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0445 | Acc: 0.9859 | Prec: 0.9912 | Recall: 0.9782 | F1: 0.9846 | ROC-AUC: 0.9984 || Test Loss: 0.3084 | Acc: 0.9256 | Prec: 0.9816 | Recall: 0.9373 | F1: 0.9590 | ROC-AUC: 0.9459
Max val ROC-AUC: 0.9589
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0226 | Acc: 0.9926 | Prec: 0.9927 | Recall: 0.9913 | F1: 0.9920 | ROC-AUC: 0.9996 || Test Loss: 0.4131 | Acc: 0.9233 | Prec: 0.9867 | Recall: 0.9298 | F1: 0.9574 | ROC-AUC: 0.9561
Max val ROC-AUC: 0.9563
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0378 | Acc: 0.9885 | Prec: 0.9905 | Recall: 0.9847 | F1: 0.9876 | ROC-AUC: 0.9989 || Test Loss: 0.4159 | Acc: 0.9186 | Prec: 0.9840 | Recall: 0.9273 | F1: 0.9548 | ROC-AUC: 0.9345
Max val ROC-AUC: 0.9446
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0146 | Acc: 0.9963 | Prec: 0.9971 | Recall: 0.9949 | F1: 0.9960 | ROC-AUC: 0.9998 || Test Loss: 0.4502 | Acc: 0.9326 | Prec: 0.9793 | Recall: 0.9474 | F1: 0.9631 | ROC-AUC: 0.9349
Max val ROC-AUC: 0.9440
Training with d_model=64, nhead=2, num_layers=2, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0314 | Acc: 0.9926 | Prec: 0.9934 | Recall: 0.9906 | F1: 0.9920 | ROC-AUC: 0.9987 || Test Loss: 0.6231 | Acc: 0.8907 | Prec: 0.9862 | Recall: 0.8947 | F1: 0.9382 | ROC-AUC: 0.9324
Max val ROC-AUC: 0.9659
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7114 | Acc: 0.5298 | Prec: 0.4691 | Recall: 0.1046 | F1: 0.1710 | ROC-AUC: 0.4902 || Test Loss: 0.7292 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7308 | Acc: 0.5261 | Prec: 0.4705 | Recall: 0.1736 | F1: 0.2536 | ROC-AUC: 0.4930 || Test Loss: 0.7557 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7902 | Acc: 0.5338 | Prec: 0.4951 | Recall: 0.2578 | F1: 0.3391 | ROC-AUC: 0.5187 || Test Loss: 0.8585 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.9595
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7003 | Acc: 0.5197 | Prec: 0.4701 | Recall: 0.2796 | F1: 0.3506 | ROC-AUC: 0.5026 || Test Loss: 0.7384 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6944 | Acc: 0.5197 | Prec: 0.4482 | Recall: 0.1540 | F1: 0.2292 | ROC-AUC: 0.4853 || Test Loss: 0.6955 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7206 | Acc: 0.5298 | Prec: 0.4869 | Recall: 0.2556 | F1: 0.3352 | ROC-AUC: 0.5155 || Test Loss: 0.8701 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5002
Max val ROC-AUC: 0.5002
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6942 | Acc: 0.5315 | Prec: 0.4711 | Recall: 0.0828 | F1: 0.1408 | ROC-AUC: 0.4822 || Test Loss: 0.7911 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6992 | Acc: 0.5153 | Prec: 0.4526 | Recall: 0.2150 | F1: 0.2915 | ROC-AUC: 0.4796 || Test Loss: 0.8260 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7115 | Acc: 0.5386 | Prec: 0.5069 | Recall: 0.1859 | F1: 0.2721 | ROC-AUC: 0.5122 || Test Loss: 0.7750 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.9000
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7315 | Acc: 0.5153 | Prec: 0.4460 | Recall: 0.1859 | F1: 0.2624 | ROC-AUC: 0.4871 || Test Loss: 0.7441 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5855
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1927 | Acc: 0.9407 | Prec: 0.9659 | Recall: 0.9041 | F1: 0.9340 | ROC-AUC: 0.9714 || Test Loss: 0.3245 | Acc: 0.8884 | Prec: 0.9889 | Recall: 0.8897 | F1: 0.9367 | ROC-AUC: 0.9211
Max val ROC-AUC: 0.9448
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1468 | Acc: 0.9589 | Prec: 0.9736 | Recall: 0.9368 | F1: 0.9548 | ROC-AUC: 0.9826 || Test Loss: 0.3290 | Acc: 0.9093 | Prec: 0.9918 | Recall: 0.9098 | F1: 0.9490 | ROC-AUC: 0.9590
Max val ROC-AUC: 0.9590
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1281 | Acc: 0.9633 | Prec: 0.9862 | Recall: 0.9339 | F1: 0.9593 | ROC-AUC: 0.9851 || Test Loss: 0.5738 | Acc: 0.8674 | Prec: 0.9803 | Recall: 0.8747 | F1: 0.9245 | ROC-AUC: 0.9359
Max val ROC-AUC: 0.9490
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1332 | Acc: 0.9592 | Prec: 0.9816 | Recall: 0.9296 | F1: 0.9549 | ROC-AUC: 0.9845 || Test Loss: 0.4251 | Acc: 0.9070 | Prec: 0.9891 | Recall: 0.9098 | F1: 0.9478 | ROC-AUC: 0.9006
Max val ROC-AUC: 0.9457
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1522 | Acc: 0.9535 | Prec: 0.9725 | Recall: 0.9259 | F1: 0.9487 | ROC-AUC: 0.9837 || Test Loss: 0.4159 | Acc: 0.8698 | Prec: 0.9914 | Recall: 0.8672 | F1: 0.9251 | ROC-AUC: 0.9500
Max val ROC-AUC: 0.9520
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1241 | Acc: 0.9599 | Prec: 0.9831 | Recall: 0.9296 | F1: 0.9556 | ROC-AUC: 0.9877 || Test Loss: 0.4631 | Acc: 0.9186 | Prec: 0.9815 | Recall: 0.9298 | F1: 0.9550 | ROC-AUC: 0.8741
Max val ROC-AUC: 0.9504
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1676 | Acc: 0.9552 | Prec: 0.9777 | Recall: 0.9245 | F1: 0.9504 | ROC-AUC: 0.9775 || Test Loss: 0.2328 | Acc: 0.9256 | Prec: 0.9867 | Recall: 0.9323 | F1: 0.9588 | ROC-AUC: 0.9555
Max val ROC-AUC: 0.9555
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1099 | Acc: 0.9704 | Prec: 0.9879 | Recall: 0.9477 | F1: 0.9674 | ROC-AUC: 0.9885 || Test Loss: 0.5763 | Acc: 0.8837 | Prec: 0.9861 | Recall: 0.8872 | F1: 0.9340 | ROC-AUC: 0.9318
Max val ROC-AUC: 0.9501
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1538 | Acc: 0.9552 | Prec: 0.9719 | Recall: 0.9303 | F1: 0.9506 | ROC-AUC: 0.9819 || Test Loss: 0.2982 | Acc: 0.8814 | Prec: 0.9860 | Recall: 0.8847 | F1: 0.9326 | ROC-AUC: 0.9288
Max val ROC-AUC: 0.9457
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1419 | Acc: 0.9559 | Prec: 0.9756 | Recall: 0.9281 | F1: 0.9512 | ROC-AUC: 0.9850 || Test Loss: 0.3956 | Acc: 0.8907 | Prec: 0.9862 | Recall: 0.8947 | F1: 0.9382 | ROC-AUC: 0.9062
Max val ROC-AUC: 0.9462
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0548 | Acc: 0.9805 | Prec: 0.9889 | Recall: 0.9688 | F1: 0.9787 | ROC-AUC: 0.9977 || Test Loss: 0.4909 | Acc: 0.9163 | Prec: 0.9789 | Recall: 0.9298 | F1: 0.9537 | ROC-AUC: 0.9121
Max val ROC-AUC: 0.9479
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0759 | Acc: 0.9771 | Prec: 0.9873 | Recall: 0.9630 | F1: 0.9750 | ROC-AUC: 0.9954 || Test Loss: 0.5309 | Acc: 0.8860 | Prec: 0.9861 | Recall: 0.8897 | F1: 0.9354 | ROC-AUC: 0.9181
Max val ROC-AUC: 0.9438
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0632 | Acc: 0.9818 | Prec: 0.9896 | Recall: 0.9710 | F1: 0.9802 | ROC-AUC: 0.9966 || Test Loss: 0.4441 | Acc: 0.9070 | Prec: 0.9864 | Recall: 0.9123 | F1: 0.9479 | ROC-AUC: 0.9341
Max val ROC-AUC: 0.9561
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0451 | Acc: 0.9855 | Prec: 0.9890 | Recall: 0.9797 | F1: 0.9843 | ROC-AUC: 0.9984 || Test Loss: 0.5510 | Acc: 0.9163 | Prec: 0.9814 | Recall: 0.9273 | F1: 0.9536 | ROC-AUC: 0.9086
Max val ROC-AUC: 0.9351
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0445 | Acc: 0.9845 | Prec: 0.9897 | Recall: 0.9768 | F1: 0.9832 | ROC-AUC: 0.9982 || Test Loss: 0.5274 | Acc: 0.8953 | Prec: 0.9784 | Recall: 0.9073 | F1: 0.9415 | ROC-AUC: 0.9010
Max val ROC-AUC: 0.9449
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0471 | Acc: 0.9848 | Prec: 0.9890 | Recall: 0.9782 | F1: 0.9836 | ROC-AUC: 0.9980 || Test Loss: 0.5581 | Acc: 0.9070 | Prec: 0.9838 | Recall: 0.9148 | F1: 0.9481 | ROC-AUC: 0.9143
Max val ROC-AUC: 0.9508
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0535 | Acc: 0.9838 | Prec: 0.9875 | Recall: 0.9775 | F1: 0.9825 | ROC-AUC: 0.9976 || Test Loss: 0.4163 | Acc: 0.9372 | Prec: 0.9769 | Recall: 0.9549 | F1: 0.9658 | ROC-AUC: 0.9215
Max val ROC-AUC: 0.9472
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0563 | Acc: 0.9815 | Prec: 0.9867 | Recall: 0.9731 | F1: 0.9799 | ROC-AUC: 0.9973 || Test Loss: 0.3389 | Acc: 0.9349 | Prec: 0.9818 | Recall: 0.9474 | F1: 0.9643 | ROC-AUC: 0.9441
Max val ROC-AUC: 0.9441
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0696 | Acc: 0.9791 | Prec: 0.9867 | Recall: 0.9680 | F1: 0.9773 | ROC-AUC: 0.9957 || Test Loss: 0.4229 | Acc: 0.9233 | Prec: 0.9841 | Recall: 0.9323 | F1: 0.9575 | ROC-AUC: 0.9322
Max val ROC-AUC: 0.9462
Training with d_model=64, nhead=2, num_layers=2, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0632 | Acc: 0.9798 | Prec: 0.9911 | Recall: 0.9651 | F1: 0.9779 | ROC-AUC: 0.9966 || Test Loss: 0.4467 | Acc: 0.9279 | Prec: 0.9842 | Recall: 0.9373 | F1: 0.9602 | ROC-AUC: 0.9346
Max val ROC-AUC: 0.9356
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2346 | Acc: 0.9212 | Prec: 0.9688 | Recall: 0.8577 | F1: 0.9099 | ROC-AUC: 0.9480 || Test Loss: 0.6498 | Acc: 0.8465 | Prec: 0.9883 | Recall: 0.8446 | F1: 0.9108 | ROC-AUC: 0.8779
Max val ROC-AUC: 0.9425
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1608 | Acc: 0.9528 | Prec: 0.9434 | Recall: 0.9557 | F1: 0.9495 | ROC-AUC: 0.9783 || Test Loss: 0.3037 | Acc: 0.9186 | Prec: 0.9789 | Recall: 0.9323 | F1: 0.9551 | ROC-AUC: 0.8897
Max val ROC-AUC: 0.9581
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1501 | Acc: 0.9559 | Prec: 0.9615 | Recall: 0.9426 | F1: 0.9520 | ROC-AUC: 0.9822 || Test Loss: 0.3981 | Acc: 0.9163 | Prec: 0.9840 | Recall: 0.9248 | F1: 0.9535 | ROC-AUC: 0.9377
Max val ROC-AUC: 0.9681
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2077 | Acc: 0.9336 | Prec: 0.9307 | Recall: 0.9259 | F1: 0.9283 | ROC-AUC: 0.9709 || Test Loss: 0.2073 | Acc: 0.9279 | Prec: 0.9868 | Recall: 0.9348 | F1: 0.9601 | ROC-AUC: 0.9368
Max val ROC-AUC: 0.9548
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2813 | Acc: 0.9121 | Prec: 0.9529 | Recall: 0.8526 | F1: 0.9000 | ROC-AUC: 0.9520 || Test Loss: 0.3590 | Acc: 0.8070 | Prec: 0.9937 | Recall: 0.7970 | F1: 0.8846 | ROC-AUC: 0.9571
Max val ROC-AUC: 0.9580
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1496 | Acc: 0.9542 | Prec: 0.9607 | Recall: 0.9397 | F1: 0.9501 | ROC-AUC: 0.9816 || Test Loss: 0.4287 | Acc: 0.8605 | Prec: 0.9857 | Recall: 0.8622 | F1: 0.9198 | ROC-AUC: 0.9166
Max val ROC-AUC: 0.9483
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1662 | Acc: 0.9444 | Prec: 0.9640 | Recall: 0.9143 | F1: 0.9385 | ROC-AUC: 0.9780 || Test Loss: 0.2224 | Acc: 0.9023 | Prec: 0.9890 | Recall: 0.9048 | F1: 0.9450 | ROC-AUC: 0.9319
Max val ROC-AUC: 0.9375
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1620 | Acc: 0.9491 | Prec: 0.9391 | Recall: 0.9521 | F1: 0.9455 | ROC-AUC: 0.9796 || Test Loss: 0.2087 | Acc: 0.9395 | Prec: 0.9844 | Recall: 0.9499 | F1: 0.9668 | ROC-AUC: 0.8898
Max val ROC-AUC: 0.9428
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1328 | Acc: 0.9599 | Prec: 0.9765 | Recall: 0.9361 | F1: 0.9559 | ROC-AUC: 0.9859 || Test Loss: 0.2770 | Acc: 0.8488 | Prec: 0.9941 | Recall: 0.8421 | F1: 0.9118 | ROC-AUC: 0.9555
Max val ROC-AUC: 0.9619
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2121 | Acc: 0.9350 | Prec: 0.9611 | Recall: 0.8962 | F1: 0.9275 | ROC-AUC: 0.9573 || Test Loss: 0.3125 | Acc: 0.8907 | Prec: 0.9835 | Recall: 0.8972 | F1: 0.9384 | ROC-AUC: 0.9095
Max val ROC-AUC: 0.9473
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0829 | Acc: 0.9764 | Prec: 0.9746 | Recall: 0.9746 | F1: 0.9746 | ROC-AUC: 0.9944 || Test Loss: 0.5044 | Acc: 0.8651 | Prec: 0.9942 | Recall: 0.8596 | F1: 0.9220 | ROC-AUC: 0.9704
Max val ROC-AUC: 0.9709
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0420 | Acc: 0.9848 | Prec: 0.9840 | Recall: 0.9833 | F1: 0.9837 | ROC-AUC: 0.9986 || Test Loss: 0.4851 | Acc: 0.9140 | Prec: 0.9918 | Recall: 0.9148 | F1: 0.9518 | ROC-AUC: 0.9740
Max val ROC-AUC: 0.9740
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1227 | Acc: 0.9693 | Prec: 0.9791 | Recall: 0.9542 | F1: 0.9665 | ROC-AUC: 0.9876 || Test Loss: 0.3449 | Acc: 0.9047 | Prec: 0.9812 | Recall: 0.9148 | F1: 0.9468 | ROC-AUC: 0.9276
Max val ROC-AUC: 0.9571
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0565 | Acc: 0.9855 | Prec: 0.9890 | Recall: 0.9797 | F1: 0.9843 | ROC-AUC: 0.9972 || Test Loss: 0.1532 | Acc: 0.9535 | Prec: 0.9897 | Recall: 0.9599 | F1: 0.9746 | ROC-AUC: 0.9742
Max val ROC-AUC: 0.9742
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0575 | Acc: 0.9821 | Prec: 0.9832 | Recall: 0.9782 | F1: 0.9807 | ROC-AUC: 0.9969 || Test Loss: 0.6792 | Acc: 0.8837 | Prec: 0.9915 | Recall: 0.8822 | F1: 0.9337 | ROC-AUC: 0.9563
Max val ROC-AUC: 0.9643
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1357 | Acc: 0.9491 | Prec: 0.9404 | Recall: 0.9506 | F1: 0.9455 | ROC-AUC: 0.9881 || Test Loss: 0.3352 | Acc: 0.9047 | Prec: 0.9864 | Recall: 0.9098 | F1: 0.9465 | ROC-AUC: 0.8972
Max val ROC-AUC: 0.9320
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0545 | Acc: 0.9848 | Prec: 0.9861 | Recall: 0.9811 | F1: 0.9836 | ROC-AUC: 0.9973 || Test Loss: 0.2376 | Acc: 0.9326 | Prec: 0.9920 | Recall: 0.9348 | F1: 0.9626 | ROC-AUC: 0.9525
Max val ROC-AUC: 0.9657
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0558 | Acc: 0.9842 | Prec: 0.9861 | Recall: 0.9797 | F1: 0.9829 | ROC-AUC: 0.9971 || Test Loss: 0.3177 | Acc: 0.9093 | Prec: 0.9891 | Recall: 0.9123 | F1: 0.9492 | ROC-AUC: 0.9611
Max val ROC-AUC: 0.9611
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0667 | Acc: 0.9784 | Prec: 0.9782 | Recall: 0.9753 | F1: 0.9767 | ROC-AUC: 0.9967 || Test Loss: 0.3585 | Acc: 0.9419 | Prec: 0.9675 | Recall: 0.9699 | F1: 0.9687 | ROC-AUC: 0.9295
Max val ROC-AUC: 0.9426
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0353 | Acc: 0.9906 | Prec: 0.9920 | Recall: 0.9877 | F1: 0.9898 | ROC-AUC: 0.9987 || Test Loss: 0.4191 | Acc: 0.9302 | Prec: 0.9792 | Recall: 0.9449 | F1: 0.9617 | ROC-AUC: 0.8996
Max val ROC-AUC: 0.9426
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0081 | Acc: 0.9987 | Prec: 0.9993 | Recall: 0.9978 | F1: 0.9985 | ROC-AUC: 0.9998 || Test Loss: 0.4426 | Acc: 0.9326 | Prec: 0.9818 | Recall: 0.9449 | F1: 0.9630 | ROC-AUC: 0.9420
Max val ROC-AUC: 0.9604
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0240 | Acc: 0.9949 | Prec: 0.9928 | Recall: 0.9964 | F1: 0.9946 | ROC-AUC: 0.9989 || Test Loss: 0.4944 | Acc: 0.9233 | Prec: 0.9791 | Recall: 0.9373 | F1: 0.9577 | ROC-AUC: 0.9223
Max val ROC-AUC: 0.9466
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0265 | Acc: 0.9929 | Prec: 0.9942 | Recall: 0.9906 | F1: 0.9924 | ROC-AUC: 0.9991 || Test Loss: 0.3236 | Acc: 0.9372 | Prec: 0.9819 | Recall: 0.9499 | F1: 0.9656 | ROC-AUC: 0.9439
Max val ROC-AUC: 0.9550
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0415 | Acc: 0.9889 | Prec: 0.9891 | Recall: 0.9869 | F1: 0.9880 | ROC-AUC: 0.9984 || Test Loss: 0.2400 | Acc: 0.9395 | Prec: 0.9582 | Recall: 0.9774 | F1: 0.9677 | ROC-AUC: 0.8901
Max val ROC-AUC: 0.9572
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0210 | Acc: 0.9939 | Prec: 0.9971 | Recall: 0.9898 | F1: 0.9934 | ROC-AUC: 0.9997 || Test Loss: 0.3534 | Acc: 0.9372 | Prec: 0.9819 | Recall: 0.9499 | F1: 0.9656 | ROC-AUC: 0.9394
Max val ROC-AUC: 0.9457
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0150 | Acc: 0.9963 | Prec: 0.9971 | Recall: 0.9949 | F1: 0.9960 | ROC-AUC: 0.9993 || Test Loss: 0.4596 | Acc: 0.9186 | Prec: 0.9815 | Recall: 0.9298 | F1: 0.9550 | ROC-AUC: 0.9186
Max val ROC-AUC: 0.9570
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0518 | Acc: 0.9872 | Prec: 0.9876 | Recall: 0.9847 | F1: 0.9862 | ROC-AUC: 0.9971 || Test Loss: 0.2613 | Acc: 0.9256 | Prec: 0.9717 | Recall: 0.9474 | F1: 0.9594 | ROC-AUC: 0.9535
Max val ROC-AUC: 0.9591
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0145 | Acc: 0.9946 | Prec: 0.9964 | Recall: 0.9920 | F1: 0.9942 | ROC-AUC: 0.9999 || Test Loss: 0.4097 | Acc: 0.9163 | Prec: 0.9814 | Recall: 0.9273 | F1: 0.9536 | ROC-AUC: 0.9551
Max val ROC-AUC: 0.9575
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0219 | Acc: 0.9936 | Prec: 0.9963 | Recall: 0.9898 | F1: 0.9931 | ROC-AUC: 0.9994 || Test Loss: 0.4014 | Acc: 0.9465 | Prec: 0.9870 | Recall: 0.9549 | F1: 0.9707 | ROC-AUC: 0.9106
Max val ROC-AUC: 0.9409
Training with d_model=64, nhead=2, num_layers=4, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0308 | Acc: 0.9926 | Prec: 0.9956 | Recall: 0.9884 | F1: 0.9920 | ROC-AUC: 0.9983 || Test Loss: 0.3208 | Acc: 0.9302 | Prec: 0.9743 | Recall: 0.9499 | F1: 0.9619 | ROC-AUC: 0.9479
Max val ROC-AUC: 0.9587
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2500 | Acc: 0.9077 | Prec: 0.9332 | Recall: 0.8627 | F1: 0.8966 | ROC-AUC: 0.9636 || Test Loss: 0.3235 | Acc: 0.9070 | Prec: 0.9838 | Recall: 0.9148 | F1: 0.9481 | ROC-AUC: 0.9214
Max val ROC-AUC: 0.9518
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2555 | Acc: 0.8872 | Prec: 0.8929 | Recall: 0.8598 | F1: 0.8761 | ROC-AUC: 0.9592 || Test Loss: 0.3183 | Acc: 0.8674 | Prec: 0.9942 | Recall: 0.8622 | F1: 0.9235 | ROC-AUC: 0.9340
Max val ROC-AUC: 0.9606
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3143 | Acc: 0.8821 | Prec: 0.9092 | Recall: 0.8286 | F1: 0.8670 | ROC-AUC: 0.9213 || Test Loss: 0.3047 | Acc: 0.8814 | Prec: 0.9833 | Recall: 0.8872 | F1: 0.9328 | ROC-AUC: 0.9110
Max val ROC-AUC: 0.9110
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3853 | Acc: 0.8646 | Prec: 0.8587 | Recall: 0.8475 | F1: 0.8531 | ROC-AUC: 0.9006 || Test Loss: 0.4123 | Acc: 0.9233 | Prec: 0.9841 | Recall: 0.9323 | F1: 0.9575 | ROC-AUC: 0.8956
Max val ROC-AUC: 0.9533
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2671 | Acc: 0.9175 | Prec: 0.9572 | Recall: 0.8606 | F1: 0.9063 | ROC-AUC: 0.9408 || Test Loss: 0.3597 | Acc: 0.8791 | Prec: 0.9887 | Recall: 0.8797 | F1: 0.9310 | ROC-AUC: 0.8994
Max val ROC-AUC: 0.9409
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2707 | Acc: 0.9013 | Prec: 0.9532 | Recall: 0.8279 | F1: 0.8861 | ROC-AUC: 0.9539 || Test Loss: 0.3464 | Acc: 0.8349 | Prec: 0.9881 | Recall: 0.8321 | F1: 0.9034 | ROC-AUC: 0.9342
Max val ROC-AUC: 0.9480
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2839 | Acc: 0.8996 | Prec: 0.9147 | Recall: 0.8642 | F1: 0.8887 | ROC-AUC: 0.9540 || Test Loss: 0.4118 | Acc: 0.7581 | Prec: 0.9933 | Recall: 0.7444 | F1: 0.8510 | ROC-AUC: 0.9142
Max val ROC-AUC: 0.9445
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2761 | Acc: 0.8966 | Prec: 0.9301 | Recall: 0.8402 | F1: 0.8829 | ROC-AUC: 0.9382 || Test Loss: 0.4469 | Acc: 0.8186 | Prec: 0.9938 | Recall: 0.8095 | F1: 0.8923 | ROC-AUC: 0.8797
Max val ROC-AUC: 0.9335
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2770 | Acc: 0.9033 | Prec: 0.9445 | Recall: 0.8410 | F1: 0.8897 | ROC-AUC: 0.9519 || Test Loss: 0.3167 | Acc: 0.8442 | Prec: 0.9911 | Recall: 0.8396 | F1: 0.9091 | ROC-AUC: 0.9192
Max val ROC-AUC: 0.9353
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3634 | Acc: 0.8794 | Prec: 0.8239 | Recall: 0.9412 | F1: 0.8786 | ROC-AUC: 0.9063 || Test Loss: 0.3520 | Acc: 0.9465 | Prec: 0.9845 | Recall: 0.9574 | F1: 0.9708 | ROC-AUC: 0.8866
Max val ROC-AUC: 0.9409
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1493 | Acc: 0.9481 | Prec: 0.9581 | Recall: 0.9288 | F1: 0.9432 | ROC-AUC: 0.9842 || Test Loss: 0.3757 | Acc: 0.8256 | Prec: 0.9909 | Recall: 0.8195 | F1: 0.8971 | ROC-AUC: 0.9422
Max val ROC-AUC: 0.9462
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1430 | Acc: 0.9552 | Prec: 0.9829 | Recall: 0.9194 | F1: 0.9501 | ROC-AUC: 0.9828 || Test Loss: 0.3053 | Acc: 0.9186 | Prec: 0.9892 | Recall: 0.9223 | F1: 0.9546 | ROC-AUC: 0.9479
Max val ROC-AUC: 0.9479
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1257 | Acc: 0.9596 | Prec: 0.9823 | Recall: 0.9296 | F1: 0.9552 | ROC-AUC: 0.9863 || Test Loss: 0.4090 | Acc: 0.8093 | Prec: 0.9847 | Recall: 0.8070 | F1: 0.8871 | ROC-AUC: 0.9223
Max val ROC-AUC: 0.9420
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1137 | Acc: 0.9626 | Prec: 0.9745 | Recall: 0.9441 | F1: 0.9591 | ROC-AUC: 0.9893 || Test Loss: 0.5551 | Acc: 0.8326 | Prec: 0.9852 | Recall: 0.8321 | F1: 0.9022 | ROC-AUC: 0.9237
Max val ROC-AUC: 0.9449
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1400 | Acc: 0.9512 | Prec: 0.9702 | Recall: 0.9230 | F1: 0.9460 | ROC-AUC: 0.9851 || Test Loss: 0.6352 | Acc: 0.8140 | Prec: 0.9908 | Recall: 0.8070 | F1: 0.8895 | ROC-AUC: 0.9366
Max val ROC-AUC: 0.9556
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1358 | Acc: 0.9528 | Prec: 0.9640 | Recall: 0.9332 | F1: 0.9483 | ROC-AUC: 0.9861 || Test Loss: 0.2574 | Acc: 0.8907 | Prec: 0.9835 | Recall: 0.8972 | F1: 0.9384 | ROC-AUC: 0.9027
Max val ROC-AUC: 0.9494
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1512 | Acc: 0.9458 | Prec: 0.9484 | Recall: 0.9339 | F1: 0.9411 | ROC-AUC: 0.9851 || Test Loss: 0.4326 | Acc: 0.8698 | Prec: 0.9858 | Recall: 0.8722 | F1: 0.9255 | ROC-AUC: 0.9320
Max val ROC-AUC: 0.9561
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1307 | Acc: 0.9559 | Prec: 0.9829 | Recall: 0.9208 | F1: 0.9509 | ROC-AUC: 0.9864 || Test Loss: 0.6062 | Acc: 0.6605 | Prec: 0.9884 | Recall: 0.6416 | F1: 0.7781 | ROC-AUC: 0.9244
Max val ROC-AUC: 0.9479
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1006 | Acc: 0.9653 | Prec: 0.9840 | Recall: 0.9405 | F1: 0.9618 | ROC-AUC: 0.9891 || Test Loss: 0.5646 | Acc: 0.7791 | Prec: 0.9967 | Recall: 0.7644 | F1: 0.8652 | ROC-AUC: 0.9555
Max val ROC-AUC: 0.9555
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1427 | Acc: 0.9528 | Prec: 0.9725 | Recall: 0.9245 | F1: 0.9479 | ROC-AUC: 0.9847 || Test Loss: 0.4650 | Acc: 0.8837 | Prec: 0.9888 | Recall: 0.8847 | F1: 0.9339 | ROC-AUC: 0.9400
Max val ROC-AUC: 0.9436
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0064 | Acc: 0.9980 | Prec: 0.9985 | Recall: 0.9971 | F1: 0.9978 | ROC-AUC: 1.0000 || Test Loss: 0.4565 | Acc: 0.9326 | Prec: 0.9818 | Recall: 0.9449 | F1: 0.9630 | ROC-AUC: 0.9306
Max val ROC-AUC: 0.9686
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0061 | Acc: 0.9987 | Prec: 1.0000 | Recall: 0.9971 | F1: 0.9985 | ROC-AUC: 1.0000 || Test Loss: 0.3883 | Acc: 0.9442 | Prec: 0.9896 | Recall: 0.9499 | F1: 0.9693 | ROC-AUC: 0.9536
Max val ROC-AUC: 0.9598
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0232 | Acc: 0.9936 | Prec: 0.9956 | Recall: 0.9906 | F1: 0.9931 | ROC-AUC: 0.9993 || Test Loss: 0.3645 | Acc: 0.9279 | Prec: 0.9792 | Recall: 0.9424 | F1: 0.9604 | ROC-AUC: 0.9249
Max val ROC-AUC: 0.9564
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0517 | Acc: 0.9838 | Prec: 0.9875 | Recall: 0.9775 | F1: 0.9825 | ROC-AUC: 0.9975 || Test Loss: 0.3123 | Acc: 0.9279 | Prec: 0.9742 | Recall: 0.9474 | F1: 0.9606 | ROC-AUC: 0.8991
Max val ROC-AUC: 0.9488
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0393 | Acc: 0.9896 | Prec: 0.9956 | Recall: 0.9818 | F1: 0.9887 | ROC-AUC: 0.9981 || Test Loss: 0.3343 | Acc: 0.9233 | Prec: 0.9791 | Recall: 0.9373 | F1: 0.9577 | ROC-AUC: 0.9447
Max val ROC-AUC: 0.9636
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0331 | Acc: 0.9899 | Prec: 0.9905 | Recall: 0.9877 | F1: 0.9891 | ROC-AUC: 0.9986 || Test Loss: 0.4010 | Acc: 0.9070 | Prec: 0.9864 | Recall: 0.9123 | F1: 0.9479 | ROC-AUC: 0.9414
Max val ROC-AUC: 0.9508
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0261 | Acc: 0.9929 | Prec: 0.9934 | Recall: 0.9913 | F1: 0.9924 | ROC-AUC: 0.9990 || Test Loss: 0.4494 | Acc: 0.9163 | Prec: 0.9814 | Recall: 0.9273 | F1: 0.9536 | ROC-AUC: 0.9296
Max val ROC-AUC: 0.9449
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0262 | Acc: 0.9933 | Prec: 0.9971 | Recall: 0.9884 | F1: 0.9927 | ROC-AUC: 0.9988 || Test Loss: 0.3968 | Acc: 0.9326 | Prec: 0.9744 | Recall: 0.9524 | F1: 0.9632 | ROC-AUC: 0.9350
Max val ROC-AUC: 0.9530
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0378 | Acc: 0.9892 | Prec: 0.9905 | Recall: 0.9862 | F1: 0.9884 | ROC-AUC: 0.9984 || Test Loss: 0.3849 | Acc: 0.9140 | Prec: 0.9763 | Recall: 0.9298 | F1: 0.9525 | ROC-AUC: 0.9128
Max val ROC-AUC: 0.9532
Training with d_model=64, nhead=2, num_layers=4, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0426 | Acc: 0.9882 | Prec: 0.9927 | Recall: 0.9818 | F1: 0.9872 | ROC-AUC: 0.9981 || Test Loss: 0.4115 | Acc: 0.8907 | Prec: 0.9862 | Recall: 0.8947 | F1: 0.9382 | ROC-AUC: 0.9335
Max val ROC-AUC: 0.9565
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.5264 | Acc: 0.7767 | Prec: 0.8742 | Recall: 0.6057 | F1: 0.7156 | ROC-AUC: 0.8325 || Test Loss: 0.4164 | Acc: 0.8023 | Prec: 0.9876 | Recall: 0.7970 | F1: 0.8821 | ROC-AUC: 0.8656
Max val ROC-AUC: 0.9456
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6961 | Acc: 0.5173 | Prec: 0.4514 | Recall: 0.1888 | F1: 0.2663 | ROC-AUC: 0.4812 || Test Loss: 0.8609 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.4998
Max val ROC-AUC: 0.5002
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6975 | Acc: 0.5291 | Prec: 0.4875 | Recall: 0.2963 | F1: 0.3686 | ROC-AUC: 0.5167 || Test Loss: 0.7924 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.9319
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4706 | Acc: 0.8259 | Prec: 0.9064 | Recall: 0.6964 | F1: 0.7877 | ROC-AUC: 0.8794 || Test Loss: 0.6487 | Acc: 0.6140 | Prec: 0.9874 | Recall: 0.5915 | F1: 0.7398 | ROC-AUC: 0.8735
Max val ROC-AUC: 0.9411
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6945 | Acc: 0.5194 | Prec: 0.4349 | Recall: 0.1213 | F1: 0.1897 | ROC-AUC: 0.4868 || Test Loss: 0.7620 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5959
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6939 | Acc: 0.5359 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.4853 || Test Loss: 0.7019 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.6378
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4635 | Acc: 0.8464 | Prec: 0.7916 | Recall: 0.9078 | F1: 0.8457 | ROC-AUC: 0.8780 || Test Loss: 0.4867 | Acc: 0.8791 | Prec: 0.9833 | Recall: 0.8847 | F1: 0.9314 | ROC-AUC: 0.8466
Max val ROC-AUC: 0.9569
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4306 | Acc: 0.8259 | Prec: 0.8015 | Recall: 0.8301 | F1: 0.8156 | ROC-AUC: 0.8895 || Test Loss: 0.2410 | Acc: 0.9233 | Prec: 0.9841 | Recall: 0.9323 | F1: 0.9575 | ROC-AUC: 0.9111
Max val ROC-AUC: 0.9540
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6942 | Acc: 0.5200 | Prec: 0.4674 | Recall: 0.2498 | F1: 0.3256 | ROC-AUC: 0.5081 || Test Loss: 0.6803 | Acc: 0.9279 | Prec: 0.9279 | Recall: 1.0000 | F1: 0.9626 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5002
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6954 | Acc: 0.5227 | Prec: 0.4664 | Recall: 0.2019 | F1: 0.2818 | ROC-AUC: 0.4985 || Test Loss: 0.8375 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.6583
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1560 | Acc: 0.9596 | Prec: 0.9831 | Recall: 0.9288 | F1: 0.9552 | ROC-AUC: 0.9769 || Test Loss: 0.2951 | Acc: 0.8953 | Prec: 0.9863 | Recall: 0.8997 | F1: 0.9410 | ROC-AUC: 0.9514
Max val ROC-AUC: 0.9541
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0998 | Acc: 0.9734 | Prec: 0.9843 | Recall: 0.9579 | F1: 0.9709 | ROC-AUC: 0.9880 || Test Loss: 0.4021 | Acc: 0.8977 | Prec: 0.9863 | Recall: 0.9023 | F1: 0.9424 | ROC-AUC: 0.9530
Max val ROC-AUC: 0.9530
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1265 | Acc: 0.9572 | Prec: 0.9742 | Recall: 0.9325 | F1: 0.9529 | ROC-AUC: 0.9869 || Test Loss: 0.3422 | Acc: 0.9140 | Prec: 0.9918 | Recall: 0.9148 | F1: 0.9518 | ROC-AUC: 0.9136
Max val ROC-AUC: 0.9517
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1470 | Acc: 0.9549 | Prec: 0.9726 | Recall: 0.9288 | F1: 0.9502 | ROC-AUC: 0.9837 || Test Loss: 0.4201 | Acc: 0.8721 | Prec: 0.9886 | Recall: 0.8722 | F1: 0.9268 | ROC-AUC: 0.9247
Max val ROC-AUC: 0.9546
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1376 | Acc: 0.9566 | Prec: 0.9749 | Recall: 0.9303 | F1: 0.9521 | ROC-AUC: 0.9831 || Test Loss: 0.3664 | Acc: 0.9116 | Prec: 0.9839 | Recall: 0.9198 | F1: 0.9508 | ROC-AUC: 0.9099
Max val ROC-AUC: 0.9500
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1394 | Acc: 0.9535 | Prec: 0.9806 | Recall: 0.9179 | F1: 0.9482 | ROC-AUC: 0.9829 || Test Loss: 0.3764 | Acc: 0.8930 | Prec: 0.9862 | Recall: 0.8972 | F1: 0.9396 | ROC-AUC: 0.9426
Max val ROC-AUC: 0.9452
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1649 | Acc: 0.9495 | Prec: 0.9609 | Recall: 0.9288 | F1: 0.9446 | ROC-AUC: 0.9813 || Test Loss: 0.2267 | Acc: 0.9163 | Prec: 0.9946 | Recall: 0.9148 | F1: 0.9530 | ROC-AUC: 0.9624
Max val ROC-AUC: 0.9624
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1349 | Acc: 0.9572 | Prec: 0.9756 | Recall: 0.9310 | F1: 0.9528 | ROC-AUC: 0.9839 || Test Loss: 0.4657 | Acc: 0.8814 | Prec: 0.9888 | Recall: 0.8822 | F1: 0.9325 | ROC-AUC: 0.9328
Max val ROC-AUC: 0.9622
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1665 | Acc: 0.9579 | Prec: 0.9793 | Recall: 0.9288 | F1: 0.9534 | ROC-AUC: 0.9776 || Test Loss: 0.2432 | Acc: 0.8837 | Prec: 0.9861 | Recall: 0.8872 | F1: 0.9340 | ROC-AUC: 0.9394
Max val ROC-AUC: 0.9539
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1590 | Acc: 0.9512 | Prec: 0.9717 | Recall: 0.9216 | F1: 0.9460 | ROC-AUC: 0.9778 || Test Loss: 0.3022 | Acc: 0.9163 | Prec: 0.9840 | Recall: 0.9248 | F1: 0.9535 | ROC-AUC: 0.9273
Max val ROC-AUC: 0.9608
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0485 | Acc: 0.9879 | Prec: 0.9912 | Recall: 0.9826 | F1: 0.9869 | ROC-AUC: 0.9972 || Test Loss: 0.5365 | Acc: 0.9047 | Prec: 0.9812 | Recall: 0.9148 | F1: 0.9468 | ROC-AUC: 0.9423
Max val ROC-AUC: 0.9459
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0587 | Acc: 0.9865 | Prec: 0.9934 | Recall: 0.9775 | F1: 0.9854 | ROC-AUC: 0.9950 || Test Loss: 0.4624 | Acc: 0.9209 | Prec: 0.9841 | Recall: 0.9298 | F1: 0.9562 | ROC-AUC: 0.9254
Max val ROC-AUC: 0.9361
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0379 | Acc: 0.9919 | Prec: 0.9942 | Recall: 0.9884 | F1: 0.9913 | ROC-AUC: 0.9974 || Test Loss: 0.4411 | Acc: 0.9209 | Prec: 0.9841 | Recall: 0.9298 | F1: 0.9562 | ROC-AUC: 0.9120
Max val ROC-AUC: 0.9580
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0415 | Acc: 0.9862 | Prec: 0.9883 | Recall: 0.9818 | F1: 0.9851 | ROC-AUC: 0.9980 || Test Loss: 0.5093 | Acc: 0.9140 | Prec: 0.9788 | Recall: 0.9273 | F1: 0.9524 | ROC-AUC: 0.9388
Max val ROC-AUC: 0.9558
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0291 | Acc: 0.9906 | Prec: 0.9949 | Recall: 0.9847 | F1: 0.9898 | ROC-AUC: 0.9991 || Test Loss: 0.5049 | Acc: 0.9279 | Prec: 0.9742 | Recall: 0.9474 | F1: 0.9606 | ROC-AUC: 0.9244
Max val ROC-AUC: 0.9512
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0414 | Acc: 0.9879 | Prec: 0.9919 | Recall: 0.9818 | F1: 0.9869 | ROC-AUC: 0.9981 || Test Loss: 0.4499 | Acc: 0.9326 | Prec: 0.9868 | Recall: 0.9398 | F1: 0.9628 | ROC-AUC: 0.9482
Max val ROC-AUC: 0.9528
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0508 | Acc: 0.9859 | Prec: 0.9919 | Recall: 0.9775 | F1: 0.9846 | ROC-AUC: 0.9975 || Test Loss: 0.4740 | Acc: 0.8930 | Prec: 0.9809 | Recall: 0.9023 | F1: 0.9399 | ROC-AUC: 0.9374
Max val ROC-AUC: 0.9507
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0489 | Acc: 0.9848 | Prec: 0.9890 | Recall: 0.9782 | F1: 0.9836 | ROC-AUC: 0.9979 || Test Loss: 0.5057 | Acc: 0.9093 | Prec: 0.9865 | Recall: 0.9148 | F1: 0.9493 | ROC-AUC: 0.9382
Max val ROC-AUC: 0.9616
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0445 | Acc: 0.9902 | Prec: 0.9941 | Recall: 0.9847 | F1: 0.9894 | ROC-AUC: 0.9971 || Test Loss: 0.3330 | Acc: 0.9256 | Prec: 0.9842 | Recall: 0.9348 | F1: 0.9589 | ROC-AUC: 0.9385
Max val ROC-AUC: 0.9704
Training with d_model=64, nhead=2, num_layers=4, dropout=0.3, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0257 | Acc: 0.9926 | Prec: 0.9956 | Recall: 0.9884 | F1: 0.9920 | ROC-AUC: 0.9994 || Test Loss: 0.3338 | Acc: 0.9279 | Prec: 0.9767 | Recall: 0.9449 | F1: 0.9605 | ROC-AUC: 0.9215
Max val ROC-AUC: 0.9572
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6925 | Acc: 0.5281 | Prec: 0.4790 | Recall: 0.1990 | F1: 0.2812 | ROC-AUC: 0.5155 || Test Loss: 0.7245 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6945 | Acc: 0.5153 | Prec: 0.4579 | Recall: 0.2447 | F1: 0.3190 | ROC-AUC: 0.4954 || Test Loss: 0.8182 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7277 | Acc: 0.5231 | Prec: 0.4774 | Recall: 0.2992 | F1: 0.3679 | ROC-AUC: 0.5013 || Test Loss: 0.8052 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5155
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6962 | Acc: 0.5285 | Prec: 0.4739 | Recall: 0.1518 | F1: 0.2299 | ROC-AUC: 0.4870 || Test Loss: 0.6750 | Acc: 0.9279 | Prec: 0.9279 | Recall: 1.0000 | F1: 0.9626 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5794
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7130 | Acc: 0.5217 | Prec: 0.4404 | Recall: 0.1155 | F1: 0.1830 | ROC-AUC: 0.5025 || Test Loss: 0.6701 | Acc: 0.9279 | Prec: 0.9279 | Recall: 1.0000 | F1: 0.9626 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5245
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7126 | Acc: 0.5083 | Prec: 0.4459 | Recall: 0.2484 | F1: 0.3190 | ROC-AUC: 0.4780 || Test Loss: 0.6876 | Acc: 0.9279 | Prec: 0.9279 | Recall: 1.0000 | F1: 0.9626 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6939 | Acc: 0.5227 | Prec: 0.4695 | Recall: 0.2237 | F1: 0.3030 | ROC-AUC: 0.5097 || Test Loss: 0.9156 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.5000
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7070 | Acc: 0.5184 | Prec: 0.4430 | Recall: 0.1496 | F1: 0.2237 | ROC-AUC: 0.4911 || Test Loss: 0.6570 | Acc: 0.9279 | Prec: 0.9279 | Recall: 1.0000 | F1: 0.9626 | ROC-AUC: 0.5070
Max val ROC-AUC: 0.5070
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.7312 | Acc: 0.5291 | Prec: 0.4769 | Recall: 0.1576 | F1: 0.2369 | ROC-AUC: 0.5057 || Test Loss: 0.9040 | Acc: 0.0721 | Prec: 0.0000 | Recall: 0.0000 | F1: 0.0000 | ROC-AUC: 0.5000
Max val ROC-AUC: 0.6779
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.6935 | Acc: 0.5241 | Prec: 0.4735 | Recall: 0.2331 | F1: 0.3124 | ROC-AUC: 0.5009 || Test Loss: 0.3211 | Acc: 0.9279 | Prec: 0.9279 | Recall: 1.0000 | F1: 0.9626 | ROC-AUC: 0.8028
Max val ROC-AUC: 0.9660
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1546 | Acc: 0.9515 | Prec: 0.9790 | Recall: 0.9150 | F1: 0.9459 | ROC-AUC: 0.9794 || Test Loss: 0.5153 | Acc: 0.7721 | Prec: 0.9902 | Recall: 0.7619 | F1: 0.8612 | ROC-AUC: 0.9254
Max val ROC-AUC: 0.9471
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1617 | Acc: 0.9498 | Prec: 0.9782 | Recall: 0.9121 | F1: 0.9440 | ROC-AUC: 0.9802 || Test Loss: 0.4649 | Acc: 0.8442 | Prec: 0.9882 | Recall: 0.8421 | F1: 0.9093 | ROC-AUC: 0.9253
Max val ROC-AUC: 0.9533
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1200 | Acc: 0.9680 | Prec: 0.9849 | Recall: 0.9455 | F1: 0.9648 | ROC-AUC: 0.9861 || Test Loss: 0.3722 | Acc: 0.9000 | Prec: 0.9863 | Recall: 0.9048 | F1: 0.9438 | ROC-AUC: 0.9496
Max val ROC-AUC: 0.9565
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1980 | Acc: 0.9404 | Prec: 0.9580 | Recall: 0.9114 | F1: 0.9341 | ROC-AUC: 0.9748 || Test Loss: 0.3806 | Acc: 0.8977 | Prec: 0.9863 | Recall: 0.9023 | F1: 0.9424 | ROC-AUC: 0.9259
Max val ROC-AUC: 0.9462
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1449 | Acc: 0.9626 | Prec: 0.9862 | Recall: 0.9325 | F1: 0.9586 | ROC-AUC: 0.9828 || Test Loss: 0.4575 | Acc: 0.8884 | Prec: 0.9861 | Recall: 0.8922 | F1: 0.9368 | ROC-AUC: 0.9095
Max val ROC-AUC: 0.9449
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1771 | Acc: 0.9438 | Prec: 0.9802 | Recall: 0.8969 | F1: 0.9367 | ROC-AUC: 0.9732 || Test Loss: 0.4610 | Acc: 0.8977 | Prec: 0.9863 | Recall: 0.9023 | F1: 0.9424 | ROC-AUC: 0.9407
Max val ROC-AUC: 0.9658
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1529 | Acc: 0.9562 | Prec: 0.9807 | Recall: 0.9237 | F1: 0.9514 | ROC-AUC: 0.9799 || Test Loss: 0.7389 | Acc: 0.7209 | Prec: 0.9929 | Recall: 0.7043 | F1: 0.8240 | ROC-AUC: 0.9431
Max val ROC-AUC: 0.9431
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1545 | Acc: 0.9522 | Prec: 0.9717 | Recall: 0.9237 | F1: 0.9471 | ROC-AUC: 0.9809 || Test Loss: 0.4307 | Acc: 0.8907 | Prec: 0.9862 | Recall: 0.8947 | F1: 0.9382 | ROC-AUC: 0.9043
Max val ROC-AUC: 0.9452
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1367 | Acc: 0.9656 | Prec: 0.9826 | Recall: 0.9426 | F1: 0.9622 | ROC-AUC: 0.9861 || Test Loss: 0.3503 | Acc: 0.9395 | Prec: 0.9869 | Recall: 0.9474 | F1: 0.9668 | ROC-AUC: 0.9621
Max val ROC-AUC: 0.9667
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1749 | Acc: 0.9461 | Prec: 0.9684 | Recall: 0.9136 | F1: 0.9402 | ROC-AUC: 0.9765 || Test Loss: 0.3476 | Acc: 0.9023 | Prec: 0.9837 | Recall: 0.9098 | F1: 0.9453 | ROC-AUC: 0.9394
Max val ROC-AUC: 0.9504
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0768 | Acc: 0.9788 | Prec: 0.9918 | Recall: 0.9622 | F1: 0.9768 | ROC-AUC: 0.9946 || Test Loss: 0.4353 | Acc: 0.9163 | Prec: 0.9814 | Recall: 0.9273 | F1: 0.9536 | ROC-AUC: 0.9147
Max val ROC-AUC: 0.9382
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0776 | Acc: 0.9734 | Prec: 0.9829 | Recall: 0.9593 | F1: 0.9710 | ROC-AUC: 0.9953 || Test Loss: 0.6299 | Acc: 0.8628 | Prec: 0.9857 | Recall: 0.8647 | F1: 0.9212 | ROC-AUC: 0.9118
Max val ROC-AUC: 0.9424
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0643 | Acc: 0.9825 | Prec: 0.9889 | Recall: 0.9731 | F1: 0.9810 | ROC-AUC: 0.9961 || Test Loss: 0.4488 | Acc: 0.9023 | Prec: 0.9811 | Recall: 0.9123 | F1: 0.9455 | ROC-AUC: 0.9350
Max val ROC-AUC: 0.9616
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0825 | Acc: 0.9764 | Prec: 0.9888 | Recall: 0.9601 | F1: 0.9742 | ROC-AUC: 0.9940 || Test Loss: 0.4914 | Acc: 0.9093 | Prec: 0.9813 | Recall: 0.9198 | F1: 0.9495 | ROC-AUC: 0.9017
Max val ROC-AUC: 0.9579
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0544 | Acc: 0.9869 | Prec: 0.9934 | Recall: 0.9782 | F1: 0.9857 | ROC-AUC: 0.9957 || Test Loss: 0.4728 | Acc: 0.9140 | Prec: 0.9788 | Recall: 0.9273 | F1: 0.9524 | ROC-AUC: 0.9322
Max val ROC-AUC: 0.9429
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0659 | Acc: 0.9821 | Prec: 0.9896 | Recall: 0.9717 | F1: 0.9806 | ROC-AUC: 0.9960 || Test Loss: 0.5057 | Acc: 0.9209 | Prec: 0.9815 | Recall: 0.9323 | F1: 0.9563 | ROC-AUC: 0.8997
Max val ROC-AUC: 0.9501
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0912 | Acc: 0.9761 | Prec: 0.9873 | Recall: 0.9608 | F1: 0.9739 | ROC-AUC: 0.9935 || Test Loss: 0.4129 | Acc: 0.9023 | Prec: 0.9837 | Recall: 0.9098 | F1: 0.9453 | ROC-AUC: 0.9142
Max val ROC-AUC: 0.9400
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0621 | Acc: 0.9811 | Prec: 0.9903 | Recall: 0.9688 | F1: 0.9794 | ROC-AUC: 0.9964 || Test Loss: 0.5766 | Acc: 0.8884 | Prec: 0.9861 | Recall: 0.8922 | F1: 0.9368 | ROC-AUC: 0.9404
Max val ROC-AUC: 0.9463
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0674 | Acc: 0.9805 | Prec: 0.9874 | Recall: 0.9702 | F1: 0.9788 | ROC-AUC: 0.9958 || Test Loss: 0.4325 | Acc: 0.9070 | Prec: 0.9812 | Recall: 0.9173 | F1: 0.9482 | ROC-AUC: 0.9177
Max val ROC-AUC: 0.9473
Training with d_model=64, nhead=2, num_layers=4, dropout=0.5, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0748 | Acc: 0.9764 | Prec: 0.9881 | Recall: 0.9608 | F1: 0.9742 | ROC-AUC: 0.9950 || Test Loss: 0.4836 | Acc: 0.8884 | Prec: 0.9808 | Recall: 0.8972 | F1: 0.9372 | ROC-AUC: 0.9247
Max val ROC-AUC: 0.9363
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1858 | Acc: 0.9485 | Prec: 0.9588 | Recall: 0.9288 | F1: 0.9436 | ROC-AUC: 0.9720 || Test Loss: 0.4187 | Acc: 0.8326 | Prec: 0.9910 | Recall: 0.8271 | F1: 0.9016 | ROC-AUC: 0.9121
Max val ROC-AUC: 0.9386
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1328 | Acc: 0.9515 | Prec: 0.9570 | Recall: 0.9375 | F1: 0.9472 | ROC-AUC: 0.9888 || Test Loss: 0.2863 | Acc: 0.9140 | Prec: 0.9866 | Recall: 0.9198 | F1: 0.9520 | ROC-AUC: 0.9584
Max val ROC-AUC: 0.9635
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1418 | Acc: 0.9518 | Prec: 0.9591 | Recall: 0.9361 | F1: 0.9474 | ROC-AUC: 0.9853 || Test Loss: 0.3230 | Acc: 0.9326 | Prec: 0.9744 | Recall: 0.9524 | F1: 0.9632 | ROC-AUC: 0.9489
Max val ROC-AUC: 0.9707
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2134 | Acc: 0.9390 | Prec: 0.9523 | Recall: 0.9143 | F1: 0.9329 | ROC-AUC: 0.9662 || Test Loss: 0.4275 | Acc: 0.8163 | Prec: 0.9908 | Recall: 0.8095 | F1: 0.8910 | ROC-AUC: 0.9246
Max val ROC-AUC: 0.9601
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2188 | Acc: 0.9330 | Prec: 0.9510 | Recall: 0.9020 | F1: 0.9258 | ROC-AUC: 0.9479 || Test Loss: 0.2809 | Acc: 0.9000 | Prec: 0.9811 | Recall: 0.9098 | F1: 0.9441 | ROC-AUC: 0.9052
Max val ROC-AUC: 0.9305
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1491 | Acc: 0.9528 | Prec: 0.9675 | Recall: 0.9296 | F1: 0.9481 | ROC-AUC: 0.9816 || Test Loss: 0.5151 | Acc: 0.7884 | Prec: 0.9904 | Recall: 0.7794 | F1: 0.8724 | ROC-AUC: 0.9210
Max val ROC-AUC: 0.9635
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.4082 | Acc: 0.8673 | Prec: 0.9212 | Recall: 0.7807 | F1: 0.8451 | ROC-AUC: 0.9174 || Test Loss: 0.7674 | Acc: 0.5837 | Prec: 0.9911 | Recall: 0.5564 | F1: 0.7127 | ROC-AUC: 0.7503
Max val ROC-AUC: 0.9621
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1505 | Acc: 0.9522 | Prec: 0.9598 | Recall: 0.9361 | F1: 0.9478 | ROC-AUC: 0.9785 || Test Loss: 0.2584 | Acc: 0.9116 | Prec: 0.9865 | Recall: 0.9173 | F1: 0.9506 | ROC-AUC: 0.9394
Max val ROC-AUC: 0.9573
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2146 | Acc: 0.9323 | Prec: 0.9727 | Recall: 0.8787 | F1: 0.9233 | ROC-AUC: 0.9582 || Test Loss: 1.0904 | Acc: 0.8233 | Prec: 0.9879 | Recall: 0.8195 | F1: 0.8959 | ROC-AUC: 0.8721
Max val ROC-AUC: 0.9506
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1257 | Acc: 0.9579 | Prec: 0.9596 | Recall: 0.9492 | F1: 0.9544 | ROC-AUC: 0.9887 || Test Loss: 0.2680 | Acc: 0.9209 | Prec: 0.9893 | Recall: 0.9248 | F1: 0.9560 | ROC-AUC: 0.9508
Max val ROC-AUC: 0.9575
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0614 | Acc: 0.9808 | Prec: 0.9818 | Recall: 0.9768 | F1: 0.9793 | ROC-AUC: 0.9969 || Test Loss: 0.3565 | Acc: 0.9233 | Prec: 0.9766 | Recall: 0.9398 | F1: 0.9579 | ROC-AUC: 0.9314
Max val ROC-AUC: 0.9516
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0996 | Acc: 0.9690 | Prec: 0.9842 | Recall: 0.9484 | F1: 0.9660 | ROC-AUC: 0.9909 || Test Loss: 0.5065 | Acc: 0.8605 | Prec: 0.9885 | Recall: 0.8596 | F1: 0.9196 | ROC-AUC: 0.9493
Max val ROC-AUC: 0.9493
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0449 | Acc: 0.9875 | Prec: 0.9891 | Recall: 0.9840 | F1: 0.9865 | ROC-AUC: 0.9984 || Test Loss: 0.4305 | Acc: 0.9000 | Prec: 0.9917 | Recall: 0.8997 | F1: 0.9435 | ROC-AUC: 0.9562
Max val ROC-AUC: 0.9565
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0397 | Acc: 0.9872 | Prec: 0.9876 | Recall: 0.9847 | F1: 0.9862 | ROC-AUC: 0.9989 || Test Loss: 0.2320 | Acc: 0.9465 | Prec: 0.9896 | Recall: 0.9524 | F1: 0.9706 | ROC-AUC: 0.9726
Max val ROC-AUC: 0.9726
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0482 | Acc: 0.9848 | Prec: 0.9876 | Recall: 0.9797 | F1: 0.9836 | ROC-AUC: 0.9983 || Test Loss: 0.3293 | Acc: 0.9419 | Prec: 0.9895 | Recall: 0.9474 | F1: 0.9680 | ROC-AUC: 0.9522
Max val ROC-AUC: 0.9581
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0323 | Acc: 0.9892 | Prec: 0.9891 | Recall: 0.9877 | F1: 0.9884 | ROC-AUC: 0.9991 || Test Loss: 0.2657 | Acc: 0.9395 | Prec: 0.9746 | Recall: 0.9599 | F1: 0.9672 | ROC-AUC: 0.9506
Max val ROC-AUC: 0.9576
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0625 | Acc: 0.9795 | Prec: 0.9741 | Recall: 0.9818 | F1: 0.9779 | ROC-AUC: 0.9968 || Test Loss: 0.3301 | Acc: 0.9209 | Prec: 0.9841 | Recall: 0.9298 | F1: 0.9562 | ROC-AUC: 0.9232
Max val ROC-AUC: 0.9357
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0389 | Acc: 0.9835 | Prec: 0.9812 | Recall: 0.9833 | F1: 0.9822 | ROC-AUC: 0.9991 || Test Loss: 0.2445 | Acc: 0.9395 | Prec: 0.9819 | Recall: 0.9524 | F1: 0.9669 | ROC-AUC: 0.9664
Max val ROC-AUC: 0.9664
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0443 | Acc: 0.9872 | Prec: 0.9890 | Recall: 0.9833 | F1: 0.9862 | ROC-AUC: 0.9975 || Test Loss: 0.2019 | Acc: 0.9419 | Prec: 0.9770 | Recall: 0.9599 | F1: 0.9684 | ROC-AUC: 0.9521
Max val ROC-AUC: 0.9521
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0585 | Acc: 0.9808 | Prec: 0.9783 | Recall: 0.9804 | F1: 0.9793 | ROC-AUC: 0.9965 || Test Loss: 0.3430 | Acc: 0.9186 | Prec: 0.9789 | Recall: 0.9323 | F1: 0.9551 | ROC-AUC: 0.9276
Max val ROC-AUC: 0.9440
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0344 | Acc: 0.9872 | Prec: 0.9848 | Recall: 0.9877 | F1: 0.9862 | ROC-AUC: 0.9990 || Test Loss: 0.3316 | Acc: 0.9349 | Prec: 0.9818 | Recall: 0.9474 | F1: 0.9643 | ROC-AUC: 0.9362
Max val ROC-AUC: 0.9362
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0303 | Acc: 0.9923 | Prec: 0.9956 | Recall: 0.9877 | F1: 0.9916 | ROC-AUC: 0.9988 || Test Loss: 0.3588 | Acc: 0.9302 | Prec: 0.9792 | Recall: 0.9449 | F1: 0.9617 | ROC-AUC: 0.9229
Max val ROC-AUC: 0.9391
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0416 | Acc: 0.9872 | Prec: 0.9869 | Recall: 0.9855 | F1: 0.9862 | ROC-AUC: 0.9978 || Test Loss: 0.4050 | Acc: 0.9256 | Prec: 0.9791 | Recall: 0.9398 | F1: 0.9591 | ROC-AUC: 0.9379
Max val ROC-AUC: 0.9632
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0308 | Acc: 0.9912 | Prec: 0.9898 | Recall: 0.9913 | F1: 0.9906 | ROC-AUC: 0.9988 || Test Loss: 0.5642 | Acc: 0.8953 | Prec: 0.9836 | Recall: 0.9023 | F1: 0.9412 | ROC-AUC: 0.9446
Max val ROC-AUC: 0.9639
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0247 | Acc: 0.9909 | Prec: 0.9898 | Recall: 0.9906 | F1: 0.9902 | ROC-AUC: 0.9996 || Test Loss: 0.3358 | Acc: 0.9395 | Prec: 0.9746 | Recall: 0.9599 | F1: 0.9672 | ROC-AUC: 0.9250
Max val ROC-AUC: 0.9419
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0119 | Acc: 0.9966 | Prec: 0.9978 | Recall: 0.9949 | F1: 0.9964 | ROC-AUC: 0.9997 || Test Loss: 0.4507 | Acc: 0.9209 | Prec: 0.9740 | Recall: 0.9398 | F1: 0.9566 | ROC-AUC: 0.9451
Max val ROC-AUC: 0.9526
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0122 | Acc: 0.9963 | Prec: 0.9964 | Recall: 0.9956 | F1: 0.9960 | ROC-AUC: 0.9999 || Test Loss: 0.4276 | Acc: 0.9372 | Prec: 0.9869 | Recall: 0.9449 | F1: 0.9654 | ROC-AUC: 0.9211
Max val ROC-AUC: 0.9714
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0166 | Acc: 0.9933 | Prec: 0.9935 | Recall: 0.9920 | F1: 0.9927 | ROC-AUC: 0.9998 || Test Loss: 0.3728 | Acc: 0.9302 | Prec: 0.9792 | Recall: 0.9449 | F1: 0.9617 | ROC-AUC: 0.9524
Max val ROC-AUC: 0.9655
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0422 | Acc: 0.9872 | Prec: 0.9890 | Recall: 0.9833 | F1: 0.9862 | ROC-AUC: 0.9978 || Test Loss: 0.3791 | Acc: 0.9256 | Prec: 0.9816 | Recall: 0.9373 | F1: 0.9590 | ROC-AUC: 0.9248
Max val ROC-AUC: 0.9491
Training with d_model=64, nhead=2, num_layers=6, dropout=0.0, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0039 | Acc: 0.9993 | Prec: 0.9993 | Recall: 0.9993 | F1: 0.9993 | ROC-AUC: 0.9999 || Test Loss: 0.4845 | Acc: 0.9256 | Prec: 0.9717 | Recall: 0.9474 | F1: 0.9594 | ROC-AUC: 0.9180
Max val ROC-AUC: 0.9665
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2407 | Acc: 0.9020 | Prec: 0.9393 | Recall: 0.8431 | F1: 0.8886 | ROC-AUC: 0.9678 || Test Loss: 0.5144 | Acc: 0.4977 | Prec: 1.0000 | Recall: 0.4586 | F1: 0.6289 | ROC-AUC: 0.9430
Max val ROC-AUC: 0.9634
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3446 | Acc: 0.8659 | Prec: 0.9166 | Recall: 0.7821 | F1: 0.8440 | ROC-AUC: 0.9150 || Test Loss: 0.3021 | Acc: 0.8558 | Prec: 0.9941 | Recall: 0.8496 | F1: 0.9162 | ROC-AUC: 0.9147
Max val ROC-AUC: 0.9529
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3069 | Acc: 0.8845 | Prec: 0.8641 | Recall: 0.8911 | F1: 0.8774 | ROC-AUC: 0.9443 || Test Loss: 0.3656 | Acc: 0.9372 | Prec: 0.9844 | Recall: 0.9474 | F1: 0.9655 | ROC-AUC: 0.8971
Max val ROC-AUC: 0.9563
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2964 | Acc: 0.9043 | Prec: 0.8857 | Recall: 0.9114 | F1: 0.8984 | ROC-AUC: 0.9482 || Test Loss: 0.4109 | Acc: 0.9233 | Prec: 0.9867 | Recall: 0.9298 | F1: 0.9574 | ROC-AUC: 0.9553
Max val ROC-AUC: 0.9553
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2484 | Acc: 0.9212 | Prec: 0.9420 | Recall: 0.8845 | F1: 0.9124 | ROC-AUC: 0.9558 || Test Loss: 0.2730 | Acc: 0.9023 | Prec: 0.9917 | Recall: 0.9023 | F1: 0.9449 | ROC-AUC: 0.9400
Max val ROC-AUC: 0.9736
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3059 | Acc: 0.8959 | Prec: 0.9619 | Recall: 0.8076 | F1: 0.8780 | ROC-AUC: 0.9247 || Test Loss: 0.6361 | Acc: 0.6233 | Prec: 0.9958 | Recall: 0.5965 | F1: 0.7461 | ROC-AUC: 0.9132
Max val ROC-AUC: 0.9449
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2730 | Acc: 0.9047 | Prec: 0.9506 | Recall: 0.8381 | F1: 0.8908 | ROC-AUC: 0.9417 || Test Loss: 1.0358 | Acc: 0.7860 | Prec: 0.9873 | Recall: 0.7794 | F1: 0.8711 | ROC-AUC: 0.8302
Max val ROC-AUC: 0.9360
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3029 | Acc: 0.8936 | Prec: 0.8915 | Recall: 0.8773 | F1: 0.8843 | ROC-AUC: 0.9528 || Test Loss: 0.6575 | Acc: 0.9256 | Prec: 0.9766 | Recall: 0.9424 | F1: 0.9592 | ROC-AUC: 0.8971
Max val ROC-AUC: 0.9293
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.3605 | Acc: 0.8717 | Prec: 0.9228 | Recall: 0.7894 | F1: 0.8509 | ROC-AUC: 0.9214 || Test Loss: 0.3159 | Acc: 0.8698 | Prec: 0.9831 | Recall: 0.8747 | F1: 0.9257 | ROC-AUC: 0.9120
Max val ROC-AUC: 0.9607
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.1


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.2558 | Acc: 0.9064 | Prec: 0.9428 | Recall: 0.8497 | F1: 0.8938 | ROC-AUC: 0.9587 || Test Loss: 0.4906 | Acc: 0.8047 | Prec: 0.9876 | Recall: 0.7995 | F1: 0.8837 | ROC-AUC: 0.9000
Max val ROC-AUC: 0.9495
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0884 | Acc: 0.9737 | Prec: 0.9800 | Recall: 0.9630 | F1: 0.9714 | ROC-AUC: 0.9933 || Test Loss: 0.4171 | Acc: 0.9000 | Prec: 0.9837 | Recall: 0.9073 | F1: 0.9439 | ROC-AUC: 0.9047
Max val ROC-AUC: 0.9360
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1296 | Acc: 0.9532 | Prec: 0.9704 | Recall: 0.9274 | F1: 0.9484 | ROC-AUC: 0.9884 || Test Loss: 0.2855 | Acc: 0.9047 | Prec: 0.9891 | Recall: 0.9073 | F1: 0.9464 | ROC-AUC: 0.9183
Max val ROC-AUC: 0.9521
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1061 | Acc: 0.9687 | Prec: 0.9805 | Recall: 0.9513 | F1: 0.9657 | ROC-AUC: 0.9899 || Test Loss: 0.3625 | Acc: 0.9140 | Prec: 0.9892 | Recall: 0.9173 | F1: 0.9519 | ROC-AUC: 0.9635
Max val ROC-AUC: 0.9635
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1247 | Acc: 0.9603 | Prec: 0.9809 | Recall: 0.9325 | F1: 0.9561 | ROC-AUC: 0.9844 || Test Loss: 0.2616 | Acc: 0.8977 | Prec: 0.9890 | Recall: 0.8997 | F1: 0.9423 | ROC-AUC: 0.9444
Max val ROC-AUC: 0.9592
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0842 | Acc: 0.9771 | Prec: 0.9881 | Recall: 0.9622 | F1: 0.9750 | ROC-AUC: 0.9920 || Test Loss: 0.3869 | Acc: 0.9000 | Prec: 0.9863 | Recall: 0.9048 | F1: 0.9438 | ROC-AUC: 0.9332
Max val ROC-AUC: 0.9562
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1087 | Acc: 0.9626 | Prec: 0.9795 | Recall: 0.9390 | F1: 0.9588 | ROC-AUC: 0.9895 || Test Loss: 0.3857 | Acc: 0.8884 | Prec: 0.9861 | Recall: 0.8922 | F1: 0.9368 | ROC-AUC: 0.9292
Max val ROC-AUC: 0.9385
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1114 | Acc: 0.9643 | Prec: 0.9818 | Recall: 0.9405 | F1: 0.9607 | ROC-AUC: 0.9896 || Test Loss: 0.4261 | Acc: 0.8884 | Prec: 0.9835 | Recall: 0.8947 | F1: 0.9370 | ROC-AUC: 0.9241
Max val ROC-AUC: 0.9431
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1194 | Acc: 0.9633 | Prec: 0.9738 | Recall: 0.9463 | F1: 0.9599 | ROC-AUC: 0.9898 || Test Loss: 0.5471 | Acc: 0.7372 | Prec: 0.9965 | Recall: 0.7193 | F1: 0.8355 | ROC-AUC: 0.9552
Max val ROC-AUC: 0.9609
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0657 | Acc: 0.9832 | Prec: 0.9897 | Recall: 0.9739 | F1: 0.9817 | ROC-AUC: 0.9953 || Test Loss: 0.3099 | Acc: 0.9163 | Prec: 0.9840 | Recall: 0.9248 | F1: 0.9535 | ROC-AUC: 0.9424
Max val ROC-AUC: 0.9642
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.01


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.1289 | Acc: 0.9586 | Prec: 0.9816 | Recall: 0.9281 | F1: 0.9541 | ROC-AUC: 0.9860 || Test Loss: 0.2633 | Acc: 0.8837 | Prec: 0.9915 | Recall: 0.8822 | F1: 0.9337 | ROC-AUC: 0.9449
Max val ROC-AUC: 0.9629
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 10 | Train Loss: 0.0328 | Acc: 0.9906 | Prec: 0.9898 | Recall: 0.9898 | F1: 0.9898 | ROC-AUC: 0.9986 || Test Loss: 0.4336 | Acc: 0.9279 | Prec: 0.9767 | Recall: 0.9449 | F1: 0.9605 | ROC-AUC: 0.9007
Max val ROC-AUC: 0.9474
Training with d_model=64, nhead=2, num_layers=6, dropout=0.1, lr=0.001


  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
# Attention Weights
import torch
import torch.nn as nn

class CustomTransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, nhead, dim_feedforward=256, dropout=0.5):
        super(CustomTransformerEncoderLayer, self).__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=True)
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        self.activation = nn.GELU()

    def forward(self, src):
        # Self-attention with weights
        attn_output, attn_weights = self.self_attn(src, src, src, need_weights=True)
        src = src + self.dropout1(attn_output)
        src = self.norm1(src)

        ff_output = self.linear2(self.dropout(self.activation(self.linear1(src))))
        src = src + self.dropout2(ff_output)
        src = self.norm2(src)

        return src, attn_weights  # Return both output and attention
        

class TabTransformerWithAttention(nn.Module):
    def __init__(self, input_dim,
                 d_model=128,
                 nhead=8,
                 num_layers=4,
                 dim_feedforward=256,
                 dropout=0.5):
        super(TabTransformerWithAttention, self).__init__()

        self.embedding = nn.Linear(input_dim, d_model)
        self.norm = nn.LayerNorm(d_model)

        # Stack custom encoder layers
        self.encoder_layers = nn.ModuleList([
            CustomTransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout)
            for _ in range(num_layers)
        ])

        self.classifier = nn.Sequential(
            nn.Linear(d_model, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        x = x.unsqueeze(1)  # (batch_size, seq_len=1, input_dim)
        x = self.embedding(x)
        x = self.norm(x)

        all_attn_weights = []
        for layer in self.encoder_layers:
            x, attn_weights = layer(x)
            all_attn_weights.append(attn_weights)  # store attention maps

        x = x.mean(dim=1)
        logits = self.classifier(x)
        return logits, all_attn_weights  # return both predictions and attention maps
