In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler


url = "https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data"
df = pd.read_csv(url)

# Drop the non-predictive ID column and separate features/target
X = df.drop(columns=['name', 'status'])
y = df['status']

# Standardize features: TabNet can work without, but scaling often helps convergence
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [8]:
from sklearn.model_selection import train_test_split


X_train, X_temp, y_train, y_temp = train_test_split(
    X_scaled, y, test_size=0.30, random_state=42, stratify=y
)
X_valid, X_test, y_valid, y_test = train_test_split(
    X_temp, y_temp, test_size=0.50, random_state=42, stratify=y_temp
)

print(f"Train size: {X_train.shape[0]}, Valid size: {X_valid.shape[0]}, Test size: {X_test.shape[0]}")


Train size: 136, Valid size: 29, Test size: 30


In [9]:
from pytorch_tabnet.tab_model import TabNetClassifier
import torch

clf = TabNetClassifier()


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cpu




In [10]:
import optuna
from sklearn.metrics import accuracy_score
from pytorch_tabnet.tab_model import TabNetClassifier  

def objective(trial):
    params = {
        'n_d': trial.suggest_categorical('n_d', [8, 16, 32]),
        'n_a': trial.suggest_categorical('n_a', [8, 16, 32]),
        'n_steps': trial.suggest_int('n_steps', 3, 5),
        'gamma': trial.suggest_float('gamma', 1.0, 2.0),
        'lambda_sparse': trial.suggest_float('lambda_sparse', 1e-5, 1e-3, log=True),  # Updated
        'optimizer_fn': torch.optim.Adam,
        'optimizer_params': dict(lr=trial.suggest_float('lr', 1e-3, 1e-1, log=True))  # Updated
    }
    model = TabNetClassifier(**params)
    model.fit(
        X_train, y_train,
        eval_set=[(X_valid, y_valid)],
        eval_metric=['accuracy'],
        max_epochs=50,
        patience=10,
        batch_size=32,
        virtual_batch_size=16,
    )
    preds = model.predict(X_valid)
    acc = accuracy_score(y_valid, preds)
    return acc

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)
print("Best trial:", study.best_trial.params)



[I 2025-05-09 05:07:02,723] A new study created in memory with name: no-name-2f5f0d54-1cbb-47c9-a3d5-63ad0d83454d


epoch 0  | loss: 1.38674 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 1  | loss: 0.62443 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 2  | loss: 0.50818 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 3  | loss: 0.45771 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 4  | loss: 0.38803 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 5  | loss: 0.47349 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 6  | loss: 0.38094 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 7  | loss: 0.39202 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 8  | loss: 0.40343 | val_0_accuracy: 0.82759 |  0:00:00s
epoch 9  | loss: 0.37735 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 10 | loss: 0.37723 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 11 | loss: 0.34428 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 12 | loss: 0.31854 | val_0_accuracy: 0.72414 |  0:00:01s
epoch 13 | loss: 0.35084 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 14 | loss: 0.31227 | val_0_accuracy: 0.86207 |  0:00:01s
epoch 15 | loss: 0.30635 | val_0_accuracy: 0.93103 |  0

[I 2025-05-09 05:07:05,552] Trial 0 finished with value: 0.9310344827586207 and parameters: {'n_d': 32, 'n_a': 8, 'n_steps': 3, 'gamma': 1.1739585789048637, 'lambda_sparse': 0.0002412938365734335, 'lr': 0.013228654467939298}. Best is trial 0 with value: 0.9310344827586207.


epoch 25 | loss: 0.22771 | val_0_accuracy: 0.82759 |  0:00:02s

Early stopping occurred at epoch 25 with best_epoch = 15 and best_val_0_accuracy = 0.93103
epoch 0  | loss: 0.7003  | val_0_accuracy: 0.62069 |  0:00:00s




epoch 1  | loss: 0.75554 | val_0_accuracy: 0.58621 |  0:00:00s
epoch 2  | loss: 0.55584 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 3  | loss: 0.45558 | val_0_accuracy: 0.86207 |  0:00:00s
epoch 4  | loss: 0.4221  | val_0_accuracy: 0.7931  |  0:00:00s
epoch 5  | loss: 0.46848 | val_0_accuracy: 0.65517 |  0:00:00s
epoch 6  | loss: 0.40138 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 7  | loss: 0.41739 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 8  | loss: 0.44273 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 9  | loss: 0.45757 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 10 | loss: 0.45654 | val_0_accuracy: 0.86207 |  0:00:01s
epoch 11 | loss: 0.40682 | val_0_accuracy: 0.82759 |  0:00:01s


[I 2025-05-09 05:07:07,103] Trial 1 finished with value: 0.8620689655172413 and parameters: {'n_d': 16, 'n_a': 8, 'n_steps': 3, 'gamma': 1.6646996680646604, 'lambda_sparse': 0.0006791689975067122, 'lr': 0.01922046897316321}. Best is trial 0 with value: 0.9310344827586207.


epoch 12 | loss: 0.30821 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 13 | loss: 0.38011 | val_0_accuracy: 0.7931  |  0:00:01s

Early stopping occurred at epoch 13 with best_epoch = 3 and best_val_0_accuracy = 0.86207




epoch 0  | loss: 0.63804 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 1  | loss: 0.4836  | val_0_accuracy: 0.7931  |  0:00:00s
epoch 2  | loss: 0.49203 | val_0_accuracy: 0.82759 |  0:00:00s
epoch 3  | loss: 0.39408 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 4  | loss: 0.42891 | val_0_accuracy: 0.65517 |  0:00:00s
epoch 5  | loss: 0.39966 | val_0_accuracy: 0.96552 |  0:00:00s
epoch 6  | loss: 0.33281 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 7  | loss: 0.39947 | val_0_accuracy: 0.96552 |  0:00:00s
epoch 8  | loss: 0.36837 | val_0_accuracy: 0.86207 |  0:00:00s
epoch 9  | loss: 0.39062 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 10 | loss: 0.40742 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 11 | loss: 0.38926 | val_0_accuracy: 0.93103 |  0:00:01s
epoch 12 | loss: 0.34989 | val_0_accuracy: 0.93103 |  0:00:01s
epoch 13 | loss: 0.40683 | val_0_accuracy: 0.86207 |  0:00:01s
epoch 14 | loss: 0.32707 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 15 | loss: 0.31127 | val_0_accuracy: 0.82759 |  0

[I 2025-05-09 05:07:08,734] Trial 2 finished with value: 0.9655172413793104 and parameters: {'n_d': 8, 'n_a': 8, 'n_steps': 3, 'gamma': 1.9168452073142168, 'lambda_sparse': 1.3648047351037876e-05, 'lr': 0.06466701589324549}. Best is trial 2 with value: 0.9655172413793104.


epoch 0  | loss: 1.74209 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 1  | loss: 0.77309 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 2  | loss: 0.64    | val_0_accuracy: 0.86207 |  0:00:00s
epoch 3  | loss: 0.65067 | val_0_accuracy: 0.86207 |  0:00:00s
epoch 4  | loss: 0.59303 | val_0_accuracy: 0.86207 |  0:00:00s
epoch 5  | loss: 0.56994 | val_0_accuracy: 0.82759 |  0:00:00s
epoch 6  | loss: 0.46909 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 7  | loss: 0.51414 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 8  | loss: 0.45651 | val_0_accuracy: 0.86207 |  0:00:01s
epoch 9  | loss: 0.39196 | val_0_accuracy: 0.89655 |  0:00:01s
epoch 10 | loss: 0.4715  | val_0_accuracy: 0.72414 |  0:00:01s
epoch 11 | loss: 0.47205 | val_0_accuracy: 0.72414 |  0:00:01s
epoch 12 | loss: 0.48238 | val_0_accuracy: 0.72414 |  0:00:02s
epoch 13 | loss: 0.34926 | val_0_accuracy: 0.75862 |  0:00:02s
epoch 14 | loss: 0.39104 | val_0_accuracy: 0.82759 |  0:00:02s
epoch 15 | loss: 0.40113 | val_0_accuracy: 0.72414 |  0

[I 2025-05-09 05:07:12,026] Trial 3 finished with value: 0.896551724137931 and parameters: {'n_d': 32, 'n_a': 32, 'n_steps': 5, 'gamma': 1.349696847388448, 'lambda_sparse': 0.0001425708043969091, 'lr': 0.006816860629054308}. Best is trial 2 with value: 0.9655172413793104.


epoch 0  | loss: 1.38515 | val_0_accuracy: 0.62069 |  0:00:00s
epoch 1  | loss: 0.66944 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 2  | loss: 0.57529 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 3  | loss: 0.50657 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 4  | loss: 0.57722 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 5  | loss: 0.42976 | val_0_accuracy: 0.82759 |  0:00:00s
epoch 6  | loss: 0.41138 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 7  | loss: 0.44052 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 8  | loss: 0.47949 | val_0_accuracy: 0.62069 |  0:00:01s
epoch 9  | loss: 0.38759 | val_0_accuracy: 0.86207 |  0:00:01s
epoch 10 | loss: 0.41889 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 11 | loss: 0.31672 | val_0_accuracy: 0.72414 |  0:00:01s
epoch 12 | loss: 0.40726 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 13 | loss: 0.60389 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 14 | loss: 0.36219 | val_0_accuracy: 0.72414 |  0:00:02s
epoch 15 | loss: 0.41726 | val_0_accuracy: 0.72414 |  0

[I 2025-05-09 05:07:14,867] Trial 4 finished with value: 0.8620689655172413 and parameters: {'n_d': 8, 'n_a': 8, 'n_steps': 5, 'gamma': 1.6861764706196862, 'lambda_sparse': 1.2977097773344425e-05, 'lr': 0.043528925802748204}. Best is trial 2 with value: 0.9655172413793104.


epoch 18 | loss: 0.38837 | val_0_accuracy: 0.68966 |  0:00:02s
epoch 19 | loss: 0.37888 | val_0_accuracy: 0.7931  |  0:00:02s

Early stopping occurred at epoch 19 with best_epoch = 9 and best_val_0_accuracy = 0.86207




epoch 0  | loss: 1.29416 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 1  | loss: 0.52948 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 2  | loss: 0.53897 | val_0_accuracy: 0.93103 |  0:00:00s
epoch 3  | loss: 0.64524 | val_0_accuracy: 0.89655 |  0:00:00s
epoch 4  | loss: 0.50503 | val_0_accuracy: 0.93103 |  0:00:00s
epoch 5  | loss: 0.36017 | val_0_accuracy: 0.93103 |  0:00:00s
epoch 6  | loss: 0.53877 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 7  | loss: 0.45662 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 8  | loss: 0.36205 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 9  | loss: 0.55714 | val_0_accuracy: 0.86207 |  0:00:01s
epoch 10 | loss: 0.37677 | val_0_accuracy: 0.86207 |  0:00:01s
epoch 11 | loss: 0.2945  | val_0_accuracy: 0.75862 |  0:00:01s


[I 2025-05-09 05:07:16,972] Trial 5 finished with value: 0.9310344827586207 and parameters: {'n_d': 32, 'n_a': 16, 'n_steps': 5, 'gamma': 1.0337306836363722, 'lambda_sparse': 2.579219158635761e-05, 'lr': 0.036120281093349235}. Best is trial 2 with value: 0.9655172413793104.


epoch 12 | loss: 0.30909 | val_0_accuracy: 0.7931  |  0:00:01s

Early stopping occurred at epoch 12 with best_epoch = 2 and best_val_0_accuracy = 0.93103




epoch 0  | loss: 1.5055  | val_0_accuracy: 0.86207 |  0:00:00s
epoch 1  | loss: 0.72897 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 2  | loss: 0.76881 | val_0_accuracy: 0.82759 |  0:00:00s
epoch 3  | loss: 0.62333 | val_0_accuracy: 0.65517 |  0:00:00s
epoch 4  | loss: 0.47677 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 5  | loss: 0.43223 | val_0_accuracy: 0.58621 |  0:00:00s
epoch 6  | loss: 0.66293 | val_0_accuracy: 0.44828 |  0:00:01s
epoch 7  | loss: 0.38897 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 8  | loss: 0.61359 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 9  | loss: 0.49168 | val_0_accuracy: 0.65517 |  0:00:01s


[I 2025-05-09 05:07:18,769] Trial 6 finished with value: 0.8620689655172413 and parameters: {'n_d': 32, 'n_a': 16, 'n_steps': 5, 'gamma': 1.3974110944327265, 'lambda_sparse': 0.00037972998939911253, 'lr': 0.05345846292998866}. Best is trial 2 with value: 0.9655172413793104.


epoch 10 | loss: 0.38092 | val_0_accuracy: 0.82759 |  0:00:01s

Early stopping occurred at epoch 10 with best_epoch = 0 and best_val_0_accuracy = 0.86207
epoch 0  | loss: 0.82095 | val_0_accuracy: 0.62069 |  0:00:00s




epoch 1  | loss: 0.73412 | val_0_accuracy: 0.62069 |  0:00:00s
epoch 2  | loss: 0.66849 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 3  | loss: 0.559   | val_0_accuracy: 0.7931  |  0:00:00s
epoch 4  | loss: 0.57399 | val_0_accuracy: 0.82759 |  0:00:00s
epoch 5  | loss: 0.50474 | val_0_accuracy: 0.86207 |  0:00:00s
epoch 6  | loss: 0.41869 | val_0_accuracy: 0.82759 |  0:00:00s
epoch 7  | loss: 0.4469  | val_0_accuracy: 0.86207 |  0:00:00s
epoch 8  | loss: 0.38722 | val_0_accuracy: 0.89655 |  0:00:00s
epoch 9  | loss: 0.43204 | val_0_accuracy: 0.86207 |  0:00:00s
epoch 10 | loss: 0.32717 | val_0_accuracy: 0.93103 |  0:00:01s
epoch 11 | loss: 0.40819 | val_0_accuracy: 0.89655 |  0:00:01s
epoch 12 | loss: 0.3715  | val_0_accuracy: 0.93103 |  0:00:01s
epoch 13 | loss: 0.39461 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 14 | loss: 0.33713 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 15 | loss: 0.3509  | val_0_accuracy: 0.82759 |  0:00:01s
epoch 16 | loss: 0.36732 | val_0_accuracy: 0.86207 |  0

[I 2025-05-09 05:07:20,903] Trial 7 finished with value: 0.9310344827586207 and parameters: {'n_d': 16, 'n_a': 32, 'n_steps': 3, 'gamma': 1.5245910319071672, 'lambda_sparse': 8.675224011336264e-05, 'lr': 0.006290500764357661}. Best is trial 2 with value: 0.9655172413793104.


epoch 20 | loss: 0.31954 | val_0_accuracy: 0.86207 |  0:00:02s

Early stopping occurred at epoch 20 with best_epoch = 10 and best_val_0_accuracy = 0.93103
epoch 0  | loss: 1.06254 | val_0_accuracy: 0.55172 |  0:00:00s




epoch 1  | loss: 0.88547 | val_0_accuracy: 0.65517 |  0:00:00s
epoch 2  | loss: 0.53997 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 3  | loss: 0.78406 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 4  | loss: 0.70492 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 5  | loss: 0.57272 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 6  | loss: 0.54036 | val_0_accuracy: 0.93103 |  0:00:00s
epoch 7  | loss: 0.57119 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 8  | loss: 0.41191 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 9  | loss: 0.41572 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 10 | loss: 0.32928 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 11 | loss: 0.36836 | val_0_accuracy: 0.89655 |  0:00:01s
epoch 12 | loss: 0.30695 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 13 | loss: 0.38718 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 14 | loss: 0.36383 | val_0_accuracy: 0.75862 |  0:00:02s
epoch 15 | loss: 0.26341 | val_0_accuracy: 0.86207 |  0:00:02s
epoch 16 | loss: 0.31205 | val_0_accuracy: 0.93103 |  0

[I 2025-05-09 05:07:23,311] Trial 8 finished with value: 0.9310344827586207 and parameters: {'n_d': 32, 'n_a': 8, 'n_steps': 4, 'gamma': 1.4975492456644277, 'lambda_sparse': 0.00032173919686413933, 'lr': 0.027994675237410102}. Best is trial 2 with value: 0.9655172413793104.


epoch 0  | loss: 1.18001 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 1  | loss: 0.51687 | val_0_accuracy: 0.65517 |  0:00:00s
epoch 2  | loss: 0.67651 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 3  | loss: 0.43051 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 4  | loss: 0.53836 | val_0_accuracy: 0.86207 |  0:00:00s
epoch 5  | loss: 0.46133 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 6  | loss: 0.39836 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 7  | loss: 0.43657 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 8  | loss: 0.31654 | val_0_accuracy: 0.65517 |  0:00:00s
epoch 9  | loss: 0.36993 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 10 | loss: 0.41655 | val_0_accuracy: 0.93103 |  0:00:01s
epoch 11 | loss: 0.33606 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 12 | loss: 0.35788 | val_0_accuracy: 0.72414 |  0:00:01s
epoch 13 | loss: 0.27007 | val_0_accuracy: 0.86207 |  0:00:01s
epoch 14 | loss: 0.35603 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 15 | loss: 0.36213 | val_0_accuracy: 0.7931  |  0

[I 2025-05-09 05:07:25,501] Trial 9 finished with value: 0.9310344827586207 and parameters: {'n_d': 32, 'n_a': 16, 'n_steps': 3, 'gamma': 1.5354284199553443, 'lambda_sparse': 2.0158549144375598e-05, 'lr': 0.07830302539432067}. Best is trial 2 with value: 0.9655172413793104.


epoch 20 | loss: 0.3272  | val_0_accuracy: 0.72414 |  0:00:02s

Early stopping occurred at epoch 20 with best_epoch = 10 and best_val_0_accuracy = 0.93103




epoch 0  | loss: 1.54749 | val_0_accuracy: 0.34483 |  0:00:00s
epoch 1  | loss: 1.44643 | val_0_accuracy: 0.48276 |  0:00:00s
epoch 2  | loss: 1.32776 | val_0_accuracy: 0.44828 |  0:00:00s
epoch 3  | loss: 1.17089 | val_0_accuracy: 0.48276 |  0:00:00s
epoch 4  | loss: 1.09693 | val_0_accuracy: 0.51724 |  0:00:00s
epoch 5  | loss: 1.08099 | val_0_accuracy: 0.55172 |  0:00:00s
epoch 6  | loss: 0.90159 | val_0_accuracy: 0.55172 |  0:00:00s
epoch 7  | loss: 0.89444 | val_0_accuracy: 0.58621 |  0:00:01s
epoch 8  | loss: 0.7935  | val_0_accuracy: 0.62069 |  0:00:01s
epoch 9  | loss: 0.76765 | val_0_accuracy: 0.62069 |  0:00:01s
epoch 10 | loss: 0.87332 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 11 | loss: 0.87751 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 12 | loss: 0.70532 | val_0_accuracy: 0.72414 |  0:00:01s
epoch 13 | loss: 0.65947 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 14 | loss: 0.65712 | val_0_accuracy: 0.72414 |  0:00:01s
epoch 15 | loss: 0.68951 | val_0_accuracy: 0.58621 |  0

[I 2025-05-09 05:07:29,625] Trial 10 finished with value: 0.896551724137931 and parameters: {'n_d': 8, 'n_a': 8, 'n_steps': 4, 'gamma': 1.9889748736492285, 'lambda_sparse': 4.6943287507478173e-05, 'lr': 0.0019060160811898842}. Best is trial 2 with value: 0.9655172413793104.


epoch 30 | loss: 0.51172 | val_0_accuracy: 0.86207 |  0:00:03s
epoch 31 | loss: 0.45838 | val_0_accuracy: 0.82759 |  0:00:04s

Early stopping occurred at epoch 31 with best_epoch = 21 and best_val_0_accuracy = 0.89655




epoch 0  | loss: 0.98935 | val_0_accuracy: 0.58621 |  0:00:00s
epoch 1  | loss: 0.66509 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 2  | loss: 0.62087 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 3  | loss: 0.54624 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 4  | loss: 0.48818 | val_0_accuracy: 0.55172 |  0:00:00s
epoch 5  | loss: 0.45216 | val_0_accuracy: 0.62069 |  0:00:00s
epoch 6  | loss: 0.39273 | val_0_accuracy: 0.62069 |  0:00:00s
epoch 7  | loss: 0.46011 | val_0_accuracy: 0.65517 |  0:00:00s
epoch 8  | loss: 0.37273 | val_0_accuracy: 0.68966 |  0:00:01s
epoch 9  | loss: 0.4154  | val_0_accuracy: 0.7931  |  0:00:01s
epoch 10 | loss: 0.42865 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 11 | loss: 0.39458 | val_0_accuracy: 0.86207 |  0:00:01s
epoch 12 | loss: 0.37796 | val_0_accuracy: 0.86207 |  0:00:01s
epoch 13 | loss: 0.38274 | val_0_accuracy: 0.89655 |  0:00:01s
epoch 14 | loss: 0.39251 | val_0_accuracy: 0.93103 |  0:00:01s
epoch 15 | loss: 0.35377 | val_0_accuracy: 0.89655 |  0

[I 2025-05-09 05:07:32,250] Trial 11 finished with value: 0.9310344827586207 and parameters: {'n_d': 8, 'n_a': 8, 'n_steps': 3, 'gamma': 1.0703030111920224, 'lambda_sparse': 0.00016410481548947373, 'lr': 0.012107320868417116}. Best is trial 2 with value: 0.9655172413793104.


epoch 23 | loss: 0.34051 | val_0_accuracy: 0.86207 |  0:00:02s
epoch 24 | loss: 0.29574 | val_0_accuracy: 0.86207 |  0:00:02s

Early stopping occurred at epoch 24 with best_epoch = 14 and best_val_0_accuracy = 0.93103




epoch 0  | loss: 1.56978 | val_0_accuracy: 0.37931 |  0:00:00s
epoch 1  | loss: 1.37386 | val_0_accuracy: 0.55172 |  0:00:00s
epoch 2  | loss: 1.09701 | val_0_accuracy: 0.62069 |  0:00:00s
epoch 3  | loss: 0.99576 | val_0_accuracy: 0.62069 |  0:00:00s
epoch 4  | loss: 1.11348 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 5  | loss: 0.92712 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 6  | loss: 0.67664 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 7  | loss: 0.68066 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 8  | loss: 0.63533 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 9  | loss: 0.67374 | val_0_accuracy: 0.86207 |  0:00:01s
epoch 10 | loss: 0.64433 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 11 | loss: 0.55512 | val_0_accuracy: 0.72414 |  0:00:01s
epoch 12 | loss: 0.51186 | val_0_accuracy: 0.72414 |  0:00:01s
epoch 13 | loss: 0.54002 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 14 | loss: 0.46314 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 15 | loss: 0.49795 | val_0_accuracy: 0.7931  |  0

[I 2025-05-09 05:07:34,694] Trial 12 finished with value: 0.8620689655172413 and parameters: {'n_d': 8, 'n_a': 8, 'n_steps': 4, 'gamma': 1.9038813658681235, 'lambda_sparse': 6.241912562527029e-05, 'lr': 0.0026393023294690667}. Best is trial 2 with value: 0.9655172413793104.


epoch 18 | loss: 0.48457 | val_0_accuracy: 0.75862 |  0:00:02s
epoch 19 | loss: 0.54257 | val_0_accuracy: 0.82759 |  0:00:02s

Early stopping occurred at epoch 19 with best_epoch = 9 and best_val_0_accuracy = 0.86207




epoch 0  | loss: 0.76662 | val_0_accuracy: 0.58621 |  0:00:00s
epoch 1  | loss: 0.45498 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 2  | loss: 0.61909 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 3  | loss: 0.50644 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 4  | loss: 0.4943  | val_0_accuracy: 0.72414 |  0:00:00s
epoch 5  | loss: 0.42508 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 6  | loss: 0.4369  | val_0_accuracy: 0.82759 |  0:00:00s
epoch 7  | loss: 0.40374 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 8  | loss: 0.34047 | val_0_accuracy: 0.65517 |  0:00:01s
epoch 9  | loss: 0.46314 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 10 | loss: 0.31833 | val_0_accuracy: 0.72414 |  0:00:01s
epoch 11 | loss: 0.33214 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 12 | loss: 0.30332 | val_0_accuracy: 0.89655 |  0:00:01s
epoch 13 | loss: 0.36253 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 14 | loss: 0.26939 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 15 | loss: 0.35741 | val_0_accuracy: 0.7931  |  0

[I 2025-05-09 05:07:37,099] Trial 13 finished with value: 0.896551724137931 and parameters: {'n_d': 8, 'n_a': 8, 'n_steps': 3, 'gamma': 1.2470999109490295, 'lambda_sparse': 0.0009218507192842421, 'lr': 0.0951006853421955}. Best is trial 2 with value: 0.9655172413793104.


epoch 22 | loss: 0.39481 | val_0_accuracy: 0.7931  |  0:00:02s

Early stopping occurred at epoch 22 with best_epoch = 12 and best_val_0_accuracy = 0.89655
epoch 0  | loss: 0.88625 | val_0_accuracy: 0.68966 |  0:00:00s




epoch 1  | loss: 0.84107 | val_0_accuracy: 0.65517 |  0:00:00s
epoch 2  | loss: 0.6826  | val_0_accuracy: 0.48276 |  0:00:00s
epoch 3  | loss: 0.61208 | val_0_accuracy: 0.62069 |  0:00:00s
epoch 4  | loss: 0.57891 | val_0_accuracy: 0.62069 |  0:00:00s
epoch 5  | loss: 0.59997 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 6  | loss: 0.44554 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 7  | loss: 0.57024 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 8  | loss: 0.46769 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 9  | loss: 0.46353 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 10 | loss: 0.59945 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 11 | loss: 0.52603 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 12 | loss: 0.53312 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 13 | loss: 0.55845 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 14 | loss: 0.48377 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 15 | loss: 0.50298 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 16 | loss: 0.48    | val_0_accuracy: 0.7931  |  0

[I 2025-05-09 05:07:41,106] Trial 14 finished with value: 0.9310344827586207 and parameters: {'n_d': 16, 'n_a': 8, 'n_steps': 3, 'gamma': 1.1844306958549562, 'lambda_sparse': 0.000235293538378797, 'lr': 0.003709435875028911}. Best is trial 2 with value: 0.9655172413793104.


epoch 39 | loss: 0.28953 | val_0_accuracy: 0.86207 |  0:00:03s
epoch 40 | loss: 0.33472 | val_0_accuracy: 0.89655 |  0:00:03s

Early stopping occurred at epoch 40 with best_epoch = 30 and best_val_0_accuracy = 0.93103




epoch 0  | loss: 2.43297 | val_0_accuracy: 0.24138 |  0:00:00s
epoch 1  | loss: 2.45245 | val_0_accuracy: 0.10345 |  0:00:00s
epoch 2  | loss: 2.24214 | val_0_accuracy: 0.2069  |  0:00:00s
epoch 3  | loss: 1.83511 | val_0_accuracy: 0.17241 |  0:00:00s
epoch 4  | loss: 1.85505 | val_0_accuracy: 0.31034 |  0:00:00s
epoch 5  | loss: 1.61906 | val_0_accuracy: 0.41379 |  0:00:00s
epoch 6  | loss: 1.65126 | val_0_accuracy: 0.58621 |  0:00:00s
epoch 7  | loss: 1.3399  | val_0_accuracy: 0.51724 |  0:00:01s
epoch 8  | loss: 1.38931 | val_0_accuracy: 0.55172 |  0:00:01s
epoch 9  | loss: 0.99252 | val_0_accuracy: 0.68966 |  0:00:01s
epoch 10 | loss: 0.9454  | val_0_accuracy: 0.75862 |  0:00:01s
epoch 11 | loss: 0.8985  | val_0_accuracy: 0.72414 |  0:00:01s
epoch 12 | loss: 0.71889 | val_0_accuracy: 0.68966 |  0:00:01s
epoch 13 | loss: 0.70502 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 14 | loss: 0.6766  | val_0_accuracy: 0.7931  |  0:00:02s
epoch 15 | loss: 0.56948 | val_0_accuracy: 0.75862 |  0

[I 2025-05-09 05:07:44,867] Trial 15 finished with value: 0.8275862068965517 and parameters: {'n_d': 32, 'n_a': 32, 'n_steps': 4, 'gamma': 1.8195142842080845, 'lambda_sparse': 1.1775136220918548e-05, 'lr': 0.0010066014100261207}. Best is trial 2 with value: 0.9655172413793104.


epoch 23 | loss: 0.67797 | val_0_accuracy: 0.75862 |  0:00:03s

Early stopping occurred at epoch 23 with best_epoch = 13 and best_val_0_accuracy = 0.82759




epoch 0  | loss: 0.84995 | val_0_accuracy: 0.65517 |  0:00:00s
epoch 1  | loss: 0.53766 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 2  | loss: 0.52149 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 3  | loss: 0.48998 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 4  | loss: 0.54959 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 5  | loss: 0.46411 | val_0_accuracy: 0.82759 |  0:00:00s
epoch 6  | loss: 0.45096 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 7  | loss: 0.42036 | val_0_accuracy: 0.65517 |  0:00:01s
epoch 8  | loss: 0.39092 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 9  | loss: 0.45098 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 10 | loss: 0.37038 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 11 | loss: 0.38717 | val_0_accuracy: 0.86207 |  0:00:01s
epoch 12 | loss: 0.36695 | val_0_accuracy: 0.89655 |  0:00:01s
epoch 13 | loss: 0.40592 | val_0_accuracy: 0.89655 |  0:00:01s
epoch 14 | loss: 0.42738 | val_0_accuracy: 0.89655 |  0:00:01s
epoch 15 | loss: 0.41761 | val_0_accuracy: 0.7931  |  0

[I 2025-05-09 05:07:47,801] Trial 16 finished with value: 0.896551724137931 and parameters: {'n_d': 8, 'n_a': 8, 'n_steps': 3, 'gamma': 1.786959982551877, 'lambda_sparse': 5.042399752700395e-05, 'lr': 0.016584086094057184}. Best is trial 2 with value: 0.9655172413793104.


epoch 0  | loss: 1.24809 | val_0_accuracy: 0.62069 |  0:00:00s
epoch 1  | loss: 0.81016 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 2  | loss: 0.65311 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 3  | loss: 0.60865 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 4  | loss: 0.57532 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 5  | loss: 0.60642 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 6  | loss: 0.59108 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 7  | loss: 0.50693 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 8  | loss: 0.52443 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 9  | loss: 0.39416 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 10 | loss: 0.44121 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 11 | loss: 0.4614  | val_0_accuracy: 0.86207 |  0:00:01s
epoch 12 | loss: 0.46942 | val_0_accuracy: 0.86207 |  0:00:01s
epoch 13 | loss: 0.44511 | val_0_accuracy: 0.82759 |  0:00:02s
epoch 14 | loss: 0.45822 | val_0_accuracy: 0.7931  |  0:00:02s
epoch 15 | loss: 0.36911 | val_0_accuracy: 0.75862 |  0

[I 2025-05-09 05:07:51,243] Trial 17 finished with value: 0.8620689655172413 and parameters: {'n_d': 8, 'n_a': 8, 'n_steps': 4, 'gamma': 1.1933023017810964, 'lambda_sparse': 3.1967539389922275e-05, 'lr': 0.00855685311818897}. Best is trial 2 with value: 0.9655172413793104.


epoch 0  | loss: 0.86521 | val_0_accuracy: 0.82759 |  0:00:00s
epoch 1  | loss: 0.52983 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 2  | loss: 0.46232 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 3  | loss: 0.40146 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 4  | loss: 0.47966 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 5  | loss: 0.33989 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 6  | loss: 0.3594  | val_0_accuracy: 0.75862 |  0:00:00s
epoch 7  | loss: 0.35677 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 8  | loss: 0.32838 | val_0_accuracy: 0.68966 |  0:00:01s
epoch 9  | loss: 0.37127 | val_0_accuracy: 0.68966 |  0:00:01s


[I 2025-05-09 05:07:52,715] Trial 18 finished with value: 0.8275862068965517 and parameters: {'n_d': 32, 'n_a': 32, 'n_steps': 3, 'gamma': 1.6684548778913892, 'lambda_sparse': 0.0005023739401706747, 'lr': 0.02187930236773479}. Best is trial 2 with value: 0.9655172413793104.


epoch 10 | loss: 0.43816 | val_0_accuracy: 0.75862 |  0:00:01s

Early stopping occurred at epoch 10 with best_epoch = 0 and best_val_0_accuracy = 0.82759




epoch 0  | loss: 0.63127 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 1  | loss: 0.74196 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 2  | loss: 0.48808 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 3  | loss: 0.49748 | val_0_accuracy: 0.75862 |  0:00:00s
epoch 4  | loss: 0.43976 | val_0_accuracy: 0.68966 |  0:00:00s
epoch 5  | loss: 0.39148 | val_0_accuracy: 0.72414 |  0:00:00s
epoch 6  | loss: 0.42729 | val_0_accuracy: 0.7931  |  0:00:00s
epoch 7  | loss: 0.43495 | val_0_accuracy: 0.82759 |  0:00:01s
epoch 8  | loss: 0.36267 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 9  | loss: 0.3925  | val_0_accuracy: 0.82759 |  0:00:01s
epoch 10 | loss: 0.3367  | val_0_accuracy: 0.75862 |  0:00:01s
epoch 11 | loss: 0.35376 | val_0_accuracy: 0.68966 |  0:00:01s
epoch 12 | loss: 0.39369 | val_0_accuracy: 0.7931  |  0:00:01s
epoch 13 | loss: 0.3432  | val_0_accuracy: 0.68966 |  0:00:01s
epoch 14 | loss: 0.32886 | val_0_accuracy: 0.75862 |  0:00:01s
epoch 15 | loss: 0.33595 | val_0_accuracy: 0.75862 |  0

[I 2025-05-09 05:07:57,328] Trial 19 finished with value: 0.9655172413793104 and parameters: {'n_d': 16, 'n_a': 16, 'n_steps': 3, 'gamma': 1.3358767750235476, 'lambda_sparse': 0.00012079651778824189, 'lr': 0.05879131688961723}. Best is trial 2 with value: 0.9655172413793104.


epoch 35 | loss: 0.27276 | val_0_accuracy: 0.96552 |  0:00:04s

Early stopping occurred at epoch 35 with best_epoch = 25 and best_val_0_accuracy = 0.96552
Best trial: {'n_d': 8, 'n_a': 8, 'n_steps': 3, 'gamma': 1.9168452073142168, 'lambda_sparse': 1.3648047351037876e-05, 'lr': 0.06466701589324549}
