In [1]:
from toolbox import *
import warnings
import argparse
import random
import os
import pandas as pd
import numpy as np
import cv2
import librosa
import time
import re
from timeout_decorator import timeout
import json

import matplotlib.pyplot as plt
import numpy as np
from ConfigSpace import (
    Categorical,
    Configuration,
    ConfigurationSpace,
    EqualsCondition,
    Float,
    InCondition,
    Integer,
)

import torchvision.models as models
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from sklearn.datasets import load_digits
from sklearn.model_selection import StratifiedKFold
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score
from sklearn.metrics import cohen_kappa_score, accuracy_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder, LabelEncoder
import itertools
import xgboost as xgb

from sklearn.preprocessing import scale
from sklearn.base import BaseEstimator
from sklearn.preprocessing import StandardScaler
from skopt.callbacks import DeadlineStopper

from smac import MultiFidelityFacade as MFFacade
from smac import Scenario
from smac.facade import AbstractFacade
from smac.intensifier.hyperband import Hyperband
from smac.intensifier.successive_halving import SuccessiveHalving

import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
from torch.utils.data import Dataset
import torch.optim as optim
import torchaudio
import torchaudio.transforms as trans
import re

from line_profiler import LineProfiler
from pathlib import Path


warnings.filterwarnings("ignore", category=RuntimeWarning)

# Load data

In [2]:
nodes_combination = [20, 100, 180, 260, 340, 400]
dataset_indices_max = 7
max_shape_to_run = 10000
alpha_range_nn = [0.001, 0.01, 0.1]
subsample = [0.5, 0.8, 1.0]

In [3]:
dataset_indices = list(range(dataset_indices_max))
dict_data_indices = {dataset_ind: {} for dataset_ind in dataset_indices}
encode_cnt = 0

X_data_list = []
y_data_list = []
dataset_names = []
def import_datasets():
    SUITE_ID = [334]
    for i in SUITE_ID:
        benchmark_suite = openml.study.get_suite(i)
        for task_id in benchmark_suite.tasks:  # iterate over all tasks
            task = openml.tasks.get_task(task_id)  # download the OpenML task
            dataset = task.get_dataset()
            X, y, categorical_indicator, attribute_names = dataset.get_data(
                dataset_format="dataframe", target=dataset.default_target_attribute
            )   

            # ### Covert labels to numerical values
            # le = LabelEncoder()
            # y_encoded = le.fit_transform(y)
            # y = pd.DataFrame(y_encoded)

            X_data_list.append(X)
            y_data_list.append(y)
            dataset_names.append(dataset.name)

            # print(" ")
            # print(" SUITE_ID:", i)
            # print("X_data_list length:", len(X_data_list))
            # print(" ")
    
import_datasets()

train_x_list = X_data_list.copy()
train_y_list = y_data_list.copy()

for dataset_index, dataset in enumerate(dataset_indices):
    print("\n\nCurrent Dataset: ", dataset)

    X = X_data_list[dataset]
    y = y_data_list[dataset]

    if X.shape[0] > max_shape_to_run:
        X, y = sample_large_datasets(X, y)
    
    np.random.seed(dataset_index)
    dict_data_indices = find_indices_train_val_test(
        X.shape[0], dict_data_indices=dict_data_indices, dataset_ind=dataset_index
    )
    train_indices = dict_data_indices[dataset_index]["train"]
    val_indices = dict_data_indices[dataset_index]["val"]

    ### Covert labels to numerical values
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    # y = pd.DataFrame(y_encoded)
    y = y_encoded

    if isinstance(X, np.ndarray):
        X = pd.DataFrame(X)
    # print(X.dtypes)

    ### Convert categories features to numerical features
    print("X shape: ", X.shape)
    categorical_columns = X.select_dtypes(include=['object']).columns
    numeric_columns = X.select_dtypes(include=['number']).columns

    encoder = OneHotEncoder(sparse_output=False)
    if len(categorical_columns) > 0:
        X_encoded_strings = encoder.fit_transform(X[categorical_columns])

        X = np.hstack((X[numeric_columns].values, X_encoded_strings))
        print("Encoded", len(categorical_columns), " columns")
        encode_cnt += 1
    else:
        print("No string columns to encode")
    
    print("X_encoded shape: ", X.shape)
    scaler = StandardScaler()
    scaler.fit(X)
    X = scaler.transform(X)
    # X = pd.DataFrame(X)
    train_x_list[dataset] = X
    train_y_list[dataset] = y
    print("X scalered")




Current Dataset:  0
X shape:  (10000, 8)
No string columns to encode
X_encoded shape:  (10000, 8)
X scalered


Current Dataset:  1
X shape:  (7608, 23)
No string columns to encode
X_encoded shape:  (7608, 23)
X scalered


Current Dataset:  2
X shape:  (10000, 54)
No string columns to encode
X_encoded shape:  (10000, 54)
X scalered


Current Dataset:  3
X shape:  (10000, 31)
No string columns to encode
X_encoded shape:  (10000, 31)
X scalered


Current Dataset:  4
X shape:  (10000, 21)
No string columns to encode
X_encoded shape:  (10000, 21)
X scalered


Current Dataset:  5
X shape:  (10000, 32)
No string columns to encode
X_encoded shape:  (10000, 32)
X scalered


Current Dataset:  6
X shape:  (4966, 11)
No string columns to encode
X_encoded shape:  (4966, 11)
X scalered


In [4]:
train_x = train_x_list[0]
train_y = train_y_list[0]
num_class = len(np.unique(train_y))
print(np.unique(train_y))
# print(train_x.shape)
# print(train_y.shape)
# print(train_y.flatten().shape)

# categorical_columns = train_x.select_dtypes(include=['category']).columns

# if not categorical_columns.empty:
#     print("There are categorical columns:", categorical_columns)
# else:
#     print("No categorical columns found.")

# print(train_x.head(5))
# print(train_x['day'].cat.categories.is_numeric())

[0 1]


# Models

In [5]:
RF = RandomForestClassifier(n_estimators=100, random_state=317)
XGBT = xgb.XGBClassifier(n_estimators=100, random_state=317)
TabNet = TabNetClassifier(n_d=64, n_a=64, n_steps=5, gamma=1.3, n_independent=2, n_shared=2, seed=317, optimizer_fn=torch.optim.Adam, optimizer_params=dict(lr=1e-2), scheduler_params={"step_size":50, "gamma":0.9}, scheduler_fn=torch.optim.lr_scheduler.StepLR, mask_type='entmax', verbose=0)

In [6]:
class XGBTWrapper(BaseEstimator):
    def __init__(self, n_estimators=100, max_depth=2, seed= 317, min_child_weight=1, gamma=0.1, subsample=0.8, colsample_bytree=0.5, learning_rate=0.1, objective='binary:logistic', colsample_bylevel=0.5, colsample_bynode=0.5):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.seed= seed
        self.min_child_weight = min_child_weight
        self.gamma = gamma
        self.subsample = subsample
        self.colsample_bytree = colsample_bytree
        self.colsample_bylevel = colsample_bylevel
        self.colsample_bynode = colsample_bynode
        self.learning_rate = learning_rate
        self.objective = objective
        # self.num_class = num_class
        self.model = xgb.XGBClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth, seed=self.seed, min_child_weight=self.min_child_weight, gamma=self.gamma, subsample=self.subsample, colsample_bytree=self.colsample_bytree, colsample_bylevel=self.colsample_bylevel, colsample_bynode=self.colsample_bynode ,learning_rate=self.learning_rate, objective=self.objective)

    @property
    def configspace(self) -> ConfigurationSpace:
        cs = ConfigurationSpace()
        n_estimators = Integer("n_estimators", (100, 1200), default=100)
        max_depth = Integer("max_depth", (2, 21), default=2)
        min_child_weight = Integer("min_child_weight", (1, 10), default=1)
        gamma = Float("gamma", (0.1, 1.0), default=0.1)
        subsample = Float("subsample", (0.5, 1.0), default=0.8)
        colsample_bytree = Float("colsample_bytree", (0.3, 1.0), default=0.6)
        colsample_bylevel = Float("colsample_bylevel", (0.3, 1.0), default=0.6)
        colsample_bynode = Float("colsample_bynode", (0.3, 1.0), default=0.6)
        learning_rate = Float("learning_rate", (0.001, 0.3), default=0.1)
        cs.add_hyperparameters([n_estimators, max_depth, min_child_weight, gamma, subsample, colsample_bytree, colsample_bylevel, colsample_bynode, learning_rate])
        return cs
    
    def fit(self, config: Configuration, seed: int = 0, budget: int = 250) -> float: 
        config = dict(config)  
        self.model.set_params(**config)
        X = train_x
        y = train_y
        # print("X shape: ", X.shape)
        # print("y shape: ", y.shape)
        self.model.fit(X, y)
        preds = self.model.predict(X)
        scores = accuracy_score(y, preds)
        
        return 1 - scores
    

In [90]:
@timeout(900)
def main():
    GBT = XGBTWrapper()

    facades: list[AbstractFacade] = []
    for intensifier_object in [Hyperband]:

        scenario = Scenario(
            GBT.configspace,
            walltime_limit=600,
            output_directory=Path("smac_hyperband_output_budget_10mins_XGBT"),
            n_trials=10000,
            min_budget=100,
            max_budget=1000,
            n_workers=8,

        )

        initial_design = MFFacade.get_initial_design(scenario, n_configs=5)
        intensifier = intensifier_object(scenario, incumbent_selection="highest_budget")

        smac = MFFacade(
            scenario,
            GBT.fit,
            initial_design=initial_design,
            intensifier=intensifier,
            overwrite=True,
        )

        print("optimiizing")
        print(type(smac), "|", smac)
        incumbent = smac.optimize()
        print("incumbent:", incumbent)
        default_cost = smac.validate(GBT.configspace.get_default_configuration())
        print(f"Default cost ({intensifier.__class__.__name__}): {default_cost}")
        incumbent_cost = smac.validate(incumbent)
        print(f"Incumbent cost ({intensifier.__class__.__name__}): {incumbent_cost}")

        facades.append(smac)
        for arrt in dir(smac):
            if not arrt.startswith("_"):
                print(arrt, getattr(smac, arrt))

    print("facades:", facades)



if __name__ == "__main__":
    # with open('smac_results_2h.txt', "w") as f:
    #     pass
    profiler = LineProfiler()
    profiler.add_function(main)
    profiler.enable()

    main()

    profiler.disable()
    profiler.print_stats()

[INFO][abstract_initial_design.py:82] Using `n_configs` and ignoring `n_configs_per_hyperparameter`.
epoch 91 | loss: 0.38851 | val_0_auc: 0.87132 |  0:36:22s
[INFO][abstract_initial_design.py:147] Using 5 initial design configurations and 0 additional configurations.
optimiizing
<class 'smac.facade.multi_fidelity_facade.MultiFidelityFacade'> | <smac.facade.multi_fidelity_facade.MultiFidelityFacade object at 0x7fe0ee789eb0>
[INFO][successive_halving.py:164] Successive Halving uses budget type BUDGETS with eta 3, min budget 100, and max budget 1000.
[INFO][successive_halving.py:323] Number of configs in stage:
[INFO][successive_halving.py:325] --- Bracket 0: [9, 3, 1]
[INFO][successive_halving.py:325] --- Bracket 1: [5, 1]
[INFO][successive_halving.py:325] --- Bracket 2: [3]
[INFO][successive_halving.py:327] Budgets in stage:
[INFO][successive_halving.py:329] --- Bracket 0: [111.1111111111111, 333.3333333333333, 1000.0]
[INFO][successive_halving.py:329] --- Bracket 1: [333.3333333333333



epoch 99 | loss: 0.37577 | val_0_auc: 0.87015 |  0:38:52s
Stop training because you reached max_epochs = 100 with best_epoch = 90 and best_val_0_auc = 0.87289
epoch 72 | loss: 0.40954 | val_0_auc: 0.8787  |  0:12:51s
epoch 30 | loss: 0.44854 | val_0_auc: 0.86423 |  0:10:23s
epoch 98 | loss: 0.38909 | val_0_auc: 0.87462 |  0:39:05s
epoch 73 | loss: 0.41608 | val_0_auc: 0.87425 |  0:13:01s
epoch 74 | loss: 0.4123  | val_0_auc: 0.87552 |  0:13:12s
epoch 31 | loss: 0.44613 | val_0_auc: 0.86355 |  0:10:42s
epoch 99 | loss: 0.38644 | val_0_auc: 0.87838 |  0:39:28s
Stop training because you reached max_epochs = 100 with best_epoch = 82 and best_val_0_auc = 0.8867




epoch 75 | loss: 0.41274 | val_0_auc: 0.86772 |  0:13:22s
epoch 76 | loss: 0.41153 | val_0_auc: 0.87715 |  0:13:33s
epoch 32 | loss: 0.44449 | val_0_auc: 0.85953 |  0:11:02s
epoch 77 | loss: 0.41436 | val_0_auc: 0.87629 |  0:13:43s
epoch 78 | loss: 0.41906 | val_0_auc: 0.87544 |  0:13:53s
epoch 33 | loss: 0.43989 | val_0_auc: 0.86762 |  0:11:22s
[INFO][smbo.py:319] Finished 250 trials.
[INFO][smbo.py:319] Finished 250 trials.
[INFO][smbo.py:319] Finished 250 trials.
[INFO][smbo.py:319] Finished 250 trials.
epoch 79 | loss: 0.40211 | val_0_auc: 0.87451 |  0:14:04s
epoch 34 | loss: 0.44505 | val_0_auc: 0.8697  |  0:11:42s
epoch 80 | loss: 0.4173  | val_0_auc: 0.87874 |  0:14:14s
epoch 81 | loss: 0.41133 | val_0_auc: 0.8805  |  0:14:25s
epoch 35 | loss: 0.44057 | val_0_auc: 0.8607  |  0:12:02s
epoch 82 | loss: 0.40873 | val_0_auc: 0.87674 |  0:14:36s
epoch 83 | loss: 0.4031  | val_0_auc: 0.87859 |  0:14:46s
epoch 36 | loss: 0.4414  | val_0_auc: 0.86087 |  0:12:22s
[INFO][smbo.py:319] Fini



epoch 45 | loss: 0.44172 | val_0_auc: 0.86165 |  0:15:20s
[INFO][smbo.py:319] Finished 450 trials.
epoch 46 | loss: 0.44357 | val_0_auc: 0.86777 |  0:15:41s
epoch 47 | loss: 0.43356 | val_0_auc: 0.86652 |  0:16:01s
epoch 48 | loss: 0.4349  | val_0_auc: 0.86701 |  0:16:21s
epoch 49 | loss: 0.43213 | val_0_auc: 0.86612 |  0:16:41s
epoch 50 | loss: 0.43017 | val_0_auc: 0.86201 |  0:17:01s
epoch 51 | loss: 0.42606 | val_0_auc: 0.87211 |  0:17:21s
[INFO][smbo.py:319] Finished 550 trials.
[INFO][smbo.py:327] Configuration budget is exhausted:
[INFO][smbo.py:328] --- Remaining wallclock time: -0.4297318458557129
[INFO][smbo.py:329] --- Remaining cpu time: inf
[INFO][smbo.py:330] --- Remaining trials: 9439
epoch 52 | loss: 0.41854 | val_0_auc: 0.87367 |  0:17:42s
incumbent: Configuration(values={
  'colsample_bylevel': 0.9932103869366515,
  'colsample_bynode': 0.6917578508788402,
  'colsample_bytree': 0.951941640794665,
  'gamma': 0.11292898427928434,
  'learning_rate': 0.29590233778911207,
  

In [8]:
params_dict_XGBT = {}
for i in range(len(train_x_list)):
    train_x = train_x_list[i]
    train_y = train_y_list[i]
    
    class XGBTWrapper(BaseEstimator):
        def __init__(self, n_estimators=100, max_depth=2, seed= 317, min_child_weight=1, gamma=0.1, subsample=0.8, colsample_bytree=0.5, learning_rate=0.1, objective='binary:logistic', colsample_bylevel=0.5, colsample_bynode=0.5):
            self.n_estimators = n_estimators
            self.max_depth = max_depth
            self.seed= seed
            self.min_child_weight = min_child_weight
            self.gamma = gamma
            self.subsample = subsample
            self.colsample_bytree = colsample_bytree
            self.colsample_bylevel = colsample_bylevel
            self.colsample_bynode = colsample_bynode
            self.learning_rate = learning_rate
            self.objective = objective
            # self.num_class = num_class
            self.model = xgb.XGBClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth, seed=self.seed, min_child_weight=self.min_child_weight, gamma=self.gamma, subsample=self.subsample, colsample_bytree=self.colsample_bytree, colsample_bylevel=self.colsample_bylevel, colsample_bynode=self.colsample_bynode ,learning_rate=self.learning_rate, objective=self.objective)

        @property
        def configspace(self) -> ConfigurationSpace:
            cs = ConfigurationSpace()
            n_estimators = Integer("n_estimators", (100, 1200), default=100)
            max_depth = Integer("max_depth", (2, 21), default=2)
            min_child_weight = Integer("min_child_weight", (1, 10), default=1)
            gamma = Float("gamma", (0.1, 1.0), default=0.1)
            subsample = Float("subsample", (0.5, 1.0), default=0.8)
            colsample_bytree = Float("colsample_bytree", (0.3, 1.0), default=0.6)
            colsample_bylevel = Float("colsample_bylevel", (0.3, 1.0), default=0.6)
            colsample_bynode = Float("colsample_bynode", (0.3, 1.0), default=0.6)
            learning_rate = Float("learning_rate", (0.001, 0.3), default=0.1)
            cs.add_hyperparameters([n_estimators, max_depth, min_child_weight, gamma, subsample, colsample_bytree, colsample_bylevel, colsample_bynode, learning_rate])
            return cs
        
        def fit(self, config: Configuration, seed: int = 0, budget: int = 250) -> float: 
            config = dict(config)  
            self.model.set_params(**config)
            X = train_x
            y = train_y
            # print("X shape: ", X.shape)
            # print("y shape: ", y.shape)
            self.model.fit(X, y)
            preds = self.model.predict(X)
            scores = accuracy_score(y, preds)
            
            return 1 - scores

    # @timeout(90)
    def main():
        GBT = XGBTWrapper()

        facades: list[AbstractFacade] = []
        for intensifier_object in [Hyperband]:

            scenario = Scenario(
                GBT.configspace,
                walltime_limit=60,
                output_directory=Path("smac_hyperband_output_budget_1mins_XGBT/" + dataset_names[i]),
                n_trials=10000,
                min_budget=100,
                max_budget=1000,
                n_workers=8,

            )

            initial_design = MFFacade.get_initial_design(scenario, n_configs=5)
            intensifier = intensifier_object(scenario, incumbent_selection="highest_budget")

            smac = MFFacade(
                scenario,
                GBT.fit,
                initial_design=initial_design,
                intensifier=intensifier,
                overwrite=True,
            )

            print("optimiizing")
            # print(type(smac), "|", smac)
            incumbent = smac.optimize()
            # print("incumbent:", incumbent)

            # Store the best configuration as dictionary
            best_params = incumbent.get_dictionary()
            params_dict_XGBT[dataset_names[i]] = best_params

            run_history = smac.get_runhistory()
            incumbent_cost = incumbent_run.cost
            incumbent_run_id = incumbent_run.config_id
            print(f"Cost: {incumbent_cost} | Config ID: {incumbent_run_id}")

            default_cost = smac.validate(GBT.configspace.get_default_configuration())
            # print(f"Default cost ({intensifier.__class__.__name__}): {default_cost}")
            incumbent_cost = smac.validate(incumbent)
            # print(f"Incumbent cost ({intensifier.__class__.__name__}): {incumbent_cost}")

            facades.append(smac)
        #     for arrt in dir(smac):
        #         if not arrt.startswith("_"):
        #             print(arrt, getattr(smac, arrt))

        # print("facades:", facades)



    if __name__ == "__main__":
        # with open('smac_results_2h.txt', "w") as f:
        #     pass
        # profiler = LineProfiler()
        # profiler.add_function(main)
        # profiler.enable()

        main()

        # profiler.disable()
        # profiler.print_stats()
    

[INFO][abstract_initial_design.py:147] Using 5 initial design configurations and 0 additional configurations.
optimiizing
<class 'smac.facade.multi_fidelity_facade.MultiFidelityFacade'> | <smac.facade.multi_fidelity_facade.MultiFidelityFacade object at 0x7fac99056a90>
[INFO][successive_halving.py:164] Successive Halving uses budget type BUDGETS with eta 3, min budget 100, and max budget 1000.
[INFO][successive_halving.py:323] Number of configs in stage:
[INFO][successive_halving.py:325] --- Bracket 0: [9, 3, 1]
[INFO][successive_halving.py:325] --- Bracket 1: [5, 1]
[INFO][successive_halving.py:325] --- Bracket 2: [3]
[INFO][successive_halving.py:327] Budgets in stage:
[INFO][successive_halving.py:329] --- Bracket 0: [111.1111111111111, 333.3333333333333, 1000.0]
[INFO][successive_halving.py:329] --- Bracket 1: [333.3333333333333, 1000.0]
[INFO][successive_halving.py:329] --- Bracket 2: [1000.0]
[INFO][smbo.py:319] Finished 0 trials.
[INFO][smbo.py:319] Finished 0 trials.
[INFO][smbo.p

2024-08-12 18:09:41,401 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.
Traceback (most recent call last):
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/comm/tcp.py", line 225, in read
    frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/worker.py", line 1252, in heartbeat
    response = await retry_operation(
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/utils_comm.py", line 452, in retry_operation
    return await retry(
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/utils_comm.py", line 431, in retry
    return await coro()
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/py

[INFO][abstract_intensifier.py:515] Added config ed1799 as new incumbent because there are no incumbents yet.
[INFO][abstract_intensifier.py:594] Added config af1315 and rejected config ed1799 as incumbent because it is not better than the incumbents on 1 instances:
[INFO][smbo.py:319] Finished 50 trials.
[INFO][abstract_intensifier.py:594] Added config 68c154 and rejected config af1315 as incumbent because it is not better than the incumbents on 1 instances:
[INFO][abstract_intensifier.py:594] Added config c59acd and rejected config 68c154 as incumbent because it is not better than the incumbents on 1 instances:
[INFO][abstract_intensifier.py:594] Added config dfeeb1 and rejected config c59acd as incumbent because it is not better than the incumbents on 1 instances:
[INFO][smbo.py:327] Configuration budget is exhausted:
[INFO][smbo.py:328] --- Remaining wallclock time: -0.9683279991149902
[INFO][smbo.py:329] --- Remaining cpu time: inf
[INFO][smbo.py:330] --- Remaining trials: 9869


In [14]:
# for dataset, params in params_dict_XGBT.items():
#         run_history = smac.get_runhistory()
#         config_id = run_history.get_id_for_config(smac.solver.incumbent)
#         cost = run_history.get_cost(smac.solver.incumbent)
        
#         print(f"Dataset: {dataset}")
#         print(f"Best Parameters: {params}")
#         print(f"Config ID: {config_id}")
#         print(f"Cost: {cost}")
for dataset, params in params_dict_XGBT.items():
    print(f"Dataset: {dataset}, Best Parameters: {params}")
    # print(f"Best Parameters: {params}")

Dataset: electricity, Best Parameters: {'colsample_bylevel': 0.6841694527491273, 'colsample_bynode': 0.7521258791466592, 'colsample_bytree': 0.8542075266578653, 'gamma': 0.17841636973138664, 'learning_rate': 0.2936068843275964, 'max_depth': 14, 'min_child_weight': 3, 'n_estimators': 780, 'subsample': 0.679753950286893}
Dataset: eye_movements, Best Parameters: {'colsample_bylevel': 0.6841694527491273, 'colsample_bynode': 0.8150860310502579, 'colsample_bytree': 0.8655054784519463, 'gamma': 0.2573783935536148, 'learning_rate': 0.28815400057691976, 'max_depth': 13, 'min_child_weight': 3, 'n_estimators': 780, 'subsample': 0.679753950286893}
Dataset: covertype, Best Parameters: {'colsample_bylevel': 0.6841694527491273, 'colsample_bynode': 0.8150860310502579, 'colsample_bytree': 0.8655054784519463, 'gamma': 0.2573783935536148, 'learning_rate': 0.28815400057691976, 'max_depth': 13, 'min_child_weight': 3, 'n_estimators': 780, 'subsample': 0.679753950286893}
Dataset: albert, Best Parameters: {'c

# RF

In [92]:
class RFWrapper(BaseEstimator):
    def __init__(self, n_estimators=100, max_depth=2, random_state=317, min_samples_split=2, min_samples_leaf=1, max_features=None, criterion="gini", max_samples = 0.5):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.random_state = random_state
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.max_features = max_features   
        self.criterion = "gini"
        self.max_samples = max_samples
        self.model = RandomForestClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth, random_state=self.random_state, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, criterion=self.criterion, max_features=self.max_features, max_samples=self.max_samples)

    @property
    def configspace(self) -> ConfigurationSpace:
        cs = ConfigurationSpace()
        n_estimators = Integer("n_estimators", (100, 1200), default=100)
        max_depth = Integer("max_depth", (2,21), default=2)
        min_samples_split = Integer("min_samples_split", (2, 20), default=2)
        min_samples_leaf = Integer("min_samples_leaf", (1, 20), default=1)
        criterion = Categorical("criterion", ["gini", "entropy", "log_loss"], default="gini")
        max_features = Categorical("max_features", ["sqrt", "log2", "None"], default="None")
        max_samples = Float("max_samples", (0.1, 0.99), log=True)
        cs.add_hyperparameters([n_estimators, max_depth, min_samples_split, min_samples_leaf, criterion, max_features, max_samples])
        return cs
    
    def fit(self, config: Configuration, seed: int = 0, budget: int = 250) -> float: 
        config = dict(config)  
        if config['max_features'] == 'None':
            config['max_features'] = None
        self.model.set_params(**config)
        X = train_x
        y = train_y
        self.model.fit(X, y)
        preds = self.model.predict(X)
        scores = accuracy_score(y, preds)

        return 1 - scores

In [93]:
@timeout(900)
def main():
    RF = RFWrapper()

    facades: list[AbstractFacade] = []
    for intensifier_object in [Hyperband]:

        scenario = Scenario(
            RF.configspace,
            walltime_limit=600,
            output_directory=Path("smac_hyperband_output_budget_10mins_RF"),
            n_trials=10000,
            min_budget=100,
            max_budget=1000,
            n_workers=8,

        )

        initial_design = MFFacade.get_initial_design(scenario, n_configs=5)
        intensifier = intensifier_object(scenario, incumbent_selection="highest_budget")

        smac = MFFacade(
            scenario,
            RF.fit,
            initial_design=initial_design,
            intensifier=intensifier,
            overwrite=True,
        )

        print("optimiizing")
        print(type(smac), "|", smac)
        incumbent = smac.optimize()
        print("incumbent:", incumbent)
        default_cost = smac.validate(RF.configspace.get_default_configuration())
        print(f"Default cost ({intensifier.__class__.__name__}): {default_cost}")
        incumbent_cost = smac.validate(incumbent)
        print(f"Incumbent cost ({intensifier.__class__.__name__}): {incumbent_cost}")

        facades.append(smac)
        for arrt in dir(smac):
            if not arrt.startswith("_"):
                print(arrt, getattr(smac, arrt))

    print("facades:", facades)



if __name__ == "__main__":
    # with open('smac_results_2h.txt', "w") as f:
    #     pass
    profiler = LineProfiler()
    profiler.add_function(main)
    profiler.enable()

    main()

    profiler.disable()
    profiler.print_stats()

    

[INFO][abstract_initial_design.py:82] Using `n_configs` and ignoring `n_configs_per_hyperparameter`.
[INFO][abstract_initial_design.py:147] Using 5 initial design configurations and 0 additional configurations.
optimiizing
<class 'smac.facade.multi_fidelity_facade.MultiFidelityFacade'> | <smac.facade.multi_fidelity_facade.MultiFidelityFacade object at 0x7fde0fc88880>
[INFO][successive_halving.py:164] Successive Halving uses budget type BUDGETS with eta 3, min budget 100, and max budget 1000.
[INFO][successive_halving.py:323] Number of configs in stage:
[INFO][successive_halving.py:325] --- Bracket 0: [9, 3, 1]
[INFO][successive_halving.py:325] --- Bracket 1: [5, 1]
[INFO][successive_halving.py:325] --- Bracket 2: [3]
[INFO][successive_halving.py:327] Budgets in stage:
[INFO][successive_halving.py:329] --- Bracket 0: [111.1111111111111, 333.3333333333333, 1000.0]
[INFO][successive_halving.py:329] --- Bracket 1: [333.3333333333333, 1000.0]
[INFO][successive_halving.py:329] --- Bracket 2:



incumbent: Configuration(values={
  'criterion': 'gini',
  'max_depth': 20,
  'max_features': 'sqrt',
  'max_samples': 0.9064575310082272,
  'min_samples_leaf': 1,
  'min_samples_split': 2,
  'n_estimators': 224,
})
Default cost (Hyperband): 0.2724
Incumbent cost (Hyperband): 0.0
ask <bound method AbstractFacade.ask of <smac.facade.multi_fidelity_facade.MultiFidelityFacade object at 0x7fde0fc88880>>
get_acquisition_function <function HyperparameterOptimizationFacade.get_acquisition_function at 0x7fe1b4961ca0>
get_acquisition_maximizer <function HyperparameterOptimizationFacade.get_acquisition_maximizer at 0x7fe1b4961d30>
get_config_selector <function AbstractFacade.get_config_selector at 0x7fe1b49ff5e0>
get_initial_design <function MultiFidelityFacade.get_initial_design at 0x7fe1b496a280>
get_intensifier <function MultiFidelityFacade.get_intensifier at 0x7fe1b496a1f0>
get_model <function HyperparameterOptimizationFacade.get_model at 0x7fe1b4961c10>
get_multi_objective_algorithm <functi

In [20]:
params_dict_RF = {}
for i in range(len(train_x_list)):
    train_x = train_x_list[i]
    train_y = train_y_list[i]
    class RFWrapper(BaseEstimator):
        def __init__(self, n_estimators=100, max_depth=2, random_state=317, min_samples_split=2, min_samples_leaf=1, max_features=None, criterion="gini", max_samples = 0.5):
            self.n_estimators = n_estimators
            self.max_depth = max_depth
            self.random_state = random_state
            self.min_samples_split = min_samples_split
            self.min_samples_leaf = min_samples_leaf
            self.max_features = max_features   
            self.criterion = "gini"
            self.max_samples = max_samples
            self.model = RandomForestClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth, random_state=self.random_state, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, criterion=self.criterion, max_features=self.max_features, max_samples=self.max_samples)

        @property
        def configspace(self) -> ConfigurationSpace:
            cs = ConfigurationSpace()
            n_estimators = Integer("n_estimators", (100, 1200), default=100)
            max_depth = Integer("max_depth", (2,21), default=2)
            min_samples_split = Integer("min_samples_split", (2, 20), default=2)
            min_samples_leaf = Integer("min_samples_leaf", (1, 20), default=1)
            criterion = Categorical("criterion", ["gini", "entropy", "log_loss"], default="gini")
            max_features = Categorical("max_features", ["sqrt", "log2", "None"], default="None")
            max_samples = Float("max_samples", (0.1, 0.99), log=True)
            cs.add_hyperparameters([n_estimators, max_depth, min_samples_split, min_samples_leaf, criterion, max_features, max_samples])
            return cs
        
        def fit(self, config: Configuration, seed: int = 0, budget: int = 250) -> float: 
            config = dict(config)  
            if config['max_features'] == 'None':
                config['max_features'] = None
            self.model.set_params(**config)
            X = train_x
            y = train_y
            self.model.fit(X, y)
            preds = self.model.predict(X)
            scores = accuracy_score(y, preds)

            return 1 - scores

    # @timeout(90)
    def main():
        RF = RFWrapper()

        facades: list[AbstractFacade] = []
        for intensifier_object in [Hyperband]:

            scenario = Scenario(
                RF.configspace,
                walltime_limit=60,
                output_directory=Path("smac_hyperband_output_budget_1mins_RF/" + dataset_names[i]),
                n_trials=10000,
                min_budget=100,
                max_budget=1000,
                n_workers=8,

            )

            initial_design = MFFacade.get_initial_design(scenario, n_configs=5)
            intensifier = intensifier_object(scenario, incumbent_selection="highest_budget")

            smac = MFFacade(
                scenario,
                RF.fit,
                initial_design=initial_design,
                intensifier=intensifier,
                overwrite=True,
            )

            print("optimiizing")
            # print(type(smac), "|", smac)
            incumbent = smac.optimize()
            best_params = incumbent.get_dictionary()
            params_dict_RF[dataset_names[i]] = best_params

            incumbent_cost = smac.runhistory.get_cost(incumbent)
            incumbent_run_id = incumbent.config_id

            print(f"Parameters: {best_params}")
            print(f"Cost: {incumbent_cost} | Config ID: {incumbent_run_id}")

            default_cost = smac.validate(RF.configspace.get_default_configuration())
            # print(f"Default cost ({intensifier.__class__.__name__}): {default_cost}")
            incumbent_cost = smac.validate(incumbent)
            # print(f"Incumbent cost ({intensifier.__class__.__name__}): {incumbent_cost}")

            facades.append(smac)
        #     for arrt in dir(smac):
        #         if not arrt.startswith("_"):
        #             print(arrt, getattr(smac, arrt))

        # print("facades:", facades)



    if __name__ == "__main__":
        # with open('smac_results_2h.txt', "w") as f:
        #     pass
        # profiler = LineProfiler()
        # profiler.add_function(main)
        # profiler.enable()

        main()

        # profiler.disable()
        # profiler.print_stats()

    
    

[INFO][abstract_initial_design.py:82] Using `n_configs` and ignoring `n_configs_per_hyperparameter`.
[INFO][abstract_initial_design.py:147] Using 5 initial design configurations and 0 additional configurations.
optimiizing
[INFO][successive_halving.py:164] Successive Halving uses budget type BUDGETS with eta 3, min budget 100, and max budget 1000.
[INFO][successive_halving.py:323] Number of configs in stage:
[INFO][successive_halving.py:325] --- Bracket 0: [9, 3, 1]
[INFO][successive_halving.py:325] --- Bracket 1: [5, 1]
[INFO][successive_halving.py:325] --- Bracket 2: [3]
[INFO][successive_halving.py:327] Budgets in stage:
[INFO][successive_halving.py:329] --- Bracket 0: [111.1111111111111, 333.3333333333333, 1000.0]
[INFO][successive_halving.py:329] --- Bracket 1: [333.3333333333333, 1000.0]
[INFO][successive_halving.py:329] --- Bracket 2: [1000.0]
[INFO][smbo.py:319] Finished 0 trials.
[INFO][smbo.py:319] Finished 0 trials.
[INFO][smbo.py:319] Finished 0 trials.
[INFO][smbo.py:319] 

2024-08-12 19:11:44,053 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.
Traceback (most recent call last):
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/comm/tcp.py", line 225, in read
    frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/worker.py", line 1252, in heartbeat
    response = await retry_operation(
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/utils_comm.py", line 452, in retry_operation
    return await retry(
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/utils_comm.py", line 431, in retry
    return await coro()
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/py

[INFO][smbo.py:327] Configuration budget is exhausted:
[INFO][smbo.py:328] --- Remaining wallclock time: -1.470794677734375
[INFO][smbo.py:329] --- Remaining cpu time: inf
[INFO][smbo.py:330] --- Remaining trials: 9967
Parameters: {'criterion': 'gini', 'max_depth': 15, 'max_features': 'log2', 'max_samples': 0.9510983452704872, 'min_samples_leaf': 5, 'min_samples_split': 20, 'n_estimators': 175}
Cost: 0.09550000000000003 | Config ID: 17
[INFO][abstract_initial_design.py:82] Using `n_configs` and ignoring `n_configs_per_hyperparameter`.
[INFO][abstract_initial_design.py:147] Using 5 initial design configurations and 0 additional configurations.
optimiizing
[INFO][successive_halving.py:164] Successive Halving uses budget type BUDGETS with eta 3, min budget 100, and max budget 1000.
[INFO][successive_halving.py:323] Number of configs in stage:
[INFO][successive_halving.py:325] --- Bracket 0: [9, 3, 1]
[INFO][successive_halving.py:325] --- Bracket 1: [5, 1]
[INFO][successive_halving.py:325]

2024-08-12 19:18:22,654 - distributed.utils_perf - INFO - full garbage collection released 37.85 MiB from 4858 reference cycles (threshold: 9.54 MiB)


[INFO][abstract_intensifier.py:515] Added config 20baa5 as new incumbent because there are no incumbents yet.
[INFO][abstract_intensifier.py:594] Added config e7fe46 and rejected config 20baa5 as incumbent because it is not better than the incumbents on 1 instances:
[INFO][smbo.py:327] Configuration budget is exhausted:
[INFO][smbo.py:328] --- Remaining wallclock time: -1.6591498851776123
[INFO][smbo.py:329] --- Remaining cpu time: inf
[INFO][smbo.py:330] --- Remaining trials: 9965
Parameters: {'criterion': 'gini', 'max_depth': 15, 'max_features': 'log2', 'max_samples': 0.9510983452704872, 'min_samples_leaf': 5, 'min_samples_split': 20, 'n_estimators': 175}
Cost: 0.1652 | Config ID: 17
[INFO][abstract_initial_design.py:82] Using `n_configs` and ignoring `n_configs_per_hyperparameter`.
[INFO][abstract_initial_design.py:147] Using 5 initial design configurations and 0 additional configurations.
optimiizing
[INFO][successive_halving.py:164] Successive Halving uses budget type BUDGETS with

In [21]:
for dataset_name, params in params_dict_RF.items():
    print(dataset_name, ":", params)

electricity : {'criterion': 'gini', 'max_depth': 15, 'max_features': 'log2', 'max_samples': 0.9510983452704872, 'min_samples_leaf': 5, 'min_samples_split': 20, 'n_estimators': 175}
eye_movements : {'criterion': 'gini', 'max_depth': 15, 'max_features': 'log2', 'max_samples': 0.9510983452704872, 'min_samples_leaf': 5, 'min_samples_split': 20, 'n_estimators': 175}
covertype : {'criterion': 'log_loss', 'max_depth': 13, 'max_features': 'None', 'max_samples': 0.275725107753872, 'min_samples_leaf': 11, 'min_samples_split': 20, 'n_estimators': 898}
albert : {'criterion': 'gini', 'max_depth': 15, 'max_features': 'log2', 'max_samples': 0.9510983452704872, 'min_samples_leaf': 5, 'min_samples_split': 20, 'n_estimators': 175}
default-of-credit-card-clients : {'criterion': 'gini', 'max_depth': 15, 'max_features': 'log2', 'max_samples': 0.9510983452704872, 'min_samples_leaf': 5, 'min_samples_split': 20, 'n_estimators': 175}
road-safety : {'criterion': 'gini', 'max_depth': 15, 'max_features': 'log2', 

# TabNet

In [94]:
class TabWrapper(BaseEstimator):
    def __init__(self, n_d=64, n_a=64, n_steps=5, gamma=1.3, n_independent=2, n_shared=2, seed=317, optimizer_fn=torch.optim.Adam, optimizer_params=dict(lr=1e-2), scheduler_params={"step_size":50, "gamma":0.9}, scheduler_fn=torch.optim.lr_scheduler.StepLR, mask_type='entmax', verbose=0):
        self.n_d = n_d
        self.n_a = n_a
        self.n_steps = n_steps
        self.gamma = gamma
        self.n_independent = n_independent
        self.n_shared = n_shared
        self.seed = seed
        self.optimizer_fn = optimizer_fn
        self.optimizer_params = optimizer_params
        self.scheduler_params = scheduler_params
        self.scheduler_fn = scheduler_fn
        self.mask_type = mask_type
        self.model = TabNetClassifier(n_d=self.n_d, n_a=self.n_a, n_steps=self.n_steps, gamma=self.gamma, n_independent=self.n_independent, n_shared=self.n_shared, seed=self.seed, optimizer_fn=self.optimizer_fn, optimizer_params=self.optimizer_params, scheduler_params=self.scheduler_params, scheduler_fn=self.scheduler_fn, mask_type=self.mask_type)

    @property
    def configspace(self) -> ConfigurationSpace:
        cs = ConfigurationSpace()
        n_d = Integer("n_d", (4, 256), default=64)
        n_a = Integer("n_a", (4, 256), default=64)
        # n_steps = Integer("n_steps", (3, 10), default=5)
        # gamma = Float("gamma", (0.9, 2.0), default=1.3)
        # n_independent = Integer("n_independent", (1, 10), default=2)
        # n_shared = Integer("n_shared", (1, 10), default=2)
        # seed = Integer("seed", (0, 1000), default=317)
        # optimizer_fn = Categorical("optimizer_fn", [torch.optim.Adam, torch.optim.AdamW], default=torch.optim.Adam)
        # scheduler_fn = Categorical("scheduler_fn", [torch.optim.lr_scheduler.StepLR, torch.optim.lr_scheduler.MultiStepLR], default=torch.optim.lr_scheduler.StepLR)
        # mask_type = Categorical("mask_type", ['sparsemax', 'entmax'], default='entmax')
        cs.add_hyperparameters([n_d, n_a])
        # cs.add_hyperparameters([n_d, n_a, n_steps, gamma, n_independent, n_shared, seed, optimizer_fn, scheduler_fn, mask_type])
        return cs

    def fit(self, config: Configuration, seed: int = 0, budget: int = 250) -> float:
        config = dict(config)
        self.model.set_params(**config)
        X = train_x
        y = train_y
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
        self.model.fit(X_train, y_train, eval_set=[(X_val, y_val)], patience=50)
        preds = self.model.predict(X_val)
        score = accuracy_score(y_val, preds)
        return 1 - score

In [95]:
@timeout(900)
def main():
    Tab = TabWrapper()

    facades: list[AbstractFacade] = []
    for intensifier_object in [Hyperband]:

        scenario = Scenario(
            Tab.configspace,
            walltime_limit=600,
            output_directory=Path("smac_hyperband_output_budget_10mins_Tab"),
            n_trials=10000,
            min_budget=100,
            max_budget=1000,
            n_workers=8,

        )

        initial_design = MFFacade.get_initial_design(scenario, n_configs=5)
        intensifier = intensifier_object(scenario, incumbent_selection="highest_budget")

        smac = MFFacade(
            scenario,
            Tab.fit,
            initial_design=initial_design,
            intensifier=intensifier,
            overwrite=True,
        )

        print("optimiizing")
        print(type(smac), "|", smac)
        incumbent = smac.optimize()
        print("incumbent:", incumbent)
        default_cost = smac.validate(Tab.configspace.get_default_configuration())
        print(f"Default cost ({intensifier.__class__.__name__}): {default_cost}")
        incumbent_cost = smac.validate(incumbent)
        print(f"Incumbent cost ({intensifier.__class__.__name__}): {incumbent_cost}")

        facades.append(smac)
        for arrt in dir(smac):
            if not arrt.startswith("_"):
                print(arrt, getattr(smac, arrt))

    print("facades:", facades)

if __name__ == "__main__":
    # with open('smac_results_2h.txt', "w") as f:
    #     pass
    profiler = LineProfiler()
    profiler.add_function(main)
    profiler.enable()

    main()

    profiler.disable()
    profiler.print_stats()

[INFO][abstract_initial_design.py:82] Using `n_configs` and ignoring `n_configs_per_hyperparameter`.
[INFO][abstract_initial_design.py:147] Using 5 initial design configurations and 0 additional configurations.
optimiizing
<class 'smac.facade.multi_fidelity_facade.MultiFidelityFacade'> | <smac.facade.multi_fidelity_facade.MultiFidelityFacade object at 0x7fde7c3c2ca0>
[INFO][successive_halving.py:164] Successive Halving uses budget type BUDGETS with eta 3, min budget 100, and max budget 1000.
[INFO][successive_halving.py:323] Number of configs in stage:
[INFO][successive_halving.py:325] --- Bracket 0: [9, 3, 1]
[INFO][successive_halving.py:325] --- Bracket 1: [5, 1]
[INFO][successive_halving.py:325] --- Bracket 2: [3]
[INFO][successive_halving.py:327] Budgets in stage:
[INFO][successive_halving.py:329] --- Bracket 0: [111.1111111111111, 333.3333333333333, 1000.0]
[INFO][successive_halving.py:329] --- Bracket 1: [333.3333333333333, 1000.0]
[INFO][successive_halving.py:329] --- Bracket 2:

  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count


epoch 0  | loss: 0.83758 | val_0_auc: 0.76598 |  0:00:02s
epoch 1  | loss: 0.57687 | val_0_auc: 0.79049 |  0:00:04s
epoch 2  | loss: 0.5471  | val_0_auc: 0.82639 |  0:00:07s
epoch 0  | loss: 1.3403  | val_0_auc: 0.74762 |  0:00:09s
epoch 3  | loss: 0.52554 | val_0_auc: 0.83619 |  0:00:09s
epoch 4  | loss: 0.50996 | val_0_auc: 0.84552 |  0:00:11s
epoch 0  | loss: 1.71805 | val_0_auc: 0.70575 |  0:00:15s
epoch 0  | loss: 1.33967 | val_0_auc: 0.69962 |  0:00:15s
epoch 5  | loss: 0.50691 | val_0_auc: 0.84556 |  0:00:14s
epoch 6  | loss: 0.5032  | val_0_auc: 0.84244 |  0:00:16s
epoch 1  | loss: 0.75264 | val_0_auc: 0.77813 |  0:00:19s
epoch 0  | loss: 1.89356 | val_0_auc: 0.71991 |  0:00:20s
epoch 7  | loss: 0.49526 | val_0_auc: 0.84954 |  0:00:18s
epoch 0  | loss: 1.9091  | val_0_auc: 0.67222 |  0:00:20s
epoch 0  | loss: 2.24179 | val_0_auc: 0.73574 |  0:00:21s
epoch 0  | loss: 2.04471 | val_0_auc: 0.74623 |  0:00:21s
epoch 8  | loss: 0.49497 | val_0_auc: 0.85019 |  0:00:20s
epoch 9  | los



epoch 14 | loss: 0.45711 | val_0_auc: 0.85881 |  0:03:47s
epoch 10 | loss: 0.4652  | val_0_auc: 0.85768 |  0:03:49s
epoch 10 | loss: 0.46852 | val_0_auc: 0.85563 |  0:03:52s
epoch 11 | loss: 0.48205 | val_0_auc: 0.8523  |  0:03:51s
epoch 22 | loss: 0.45716 | val_0_auc: 0.86406 |  0:03:53s
epoch 10 | loss: 0.49021 | val_0_auc: 0.85147 |  0:03:52s
epoch 14 | loss: 0.48953 | val_0_auc: 0.84692 |  0:03:56s
epoch 0  | loss: 1.56151 | val_0_auc: 0.69811 |  0:00:10s
epoch 15 | loss: 0.45542 | val_0_auc: 0.86173 |  0:04:02s
epoch 23 | loss: 0.44974 | val_0_auc: 0.86415 |  0:04:03s
epoch 1  | loss: 0.95974 | val_0_auc: 0.77292 |  0:00:21s
epoch 11 | loss: 0.46651 | val_0_auc: 0.85883 |  0:04:10s
epoch 15 | loss: 0.46676 | val_0_auc: 0.84766 |  0:04:12s
epoch 12 | loss: 0.4815  | val_0_auc: 0.85255 |  0:04:10s
epoch 11 | loss: 0.47557 | val_0_auc: 0.85704 |  0:04:13s
epoch 24 | loss: 0.45    | val_0_auc: 0.86593 |  0:04:13s
epoch 11 | loss: 0.47532 | val_0_auc: 0.85481 |  0:04:14s
epoch 16 | los

TimeoutError: 'Timed Out'

epoch 55 | loss: 0.425   | val_0_auc: 0.87356 |  0:14:50s
epoch 41 | loss: 0.43985 | val_0_auc: 0.86968 |  0:14:53s
epoch 41 | loss: 0.42583 | val_0_auc: 0.868   |  0:14:52s
epoch 87 | loss: 0.39997 | val_0_auc: 0.8774  |  0:14:55s
epoch 61 | loss: 0.41461 | val_0_auc: 0.86091 |  0:11:09s
epoch 58 | loss: 0.41145 | val_0_auc: 0.86838 |  0:14:59s


2024-08-08 16:21:26,339 - distributed.core - INFO - Event loop was unresponsive in Nanny for 3.59s.  This is often caused by long-running GIL-holding functions or moving large chunks of data. This can cause timeouts and instability.
2024-08-08 16:21:26,343 - distributed.core - INFO - Event loop was unresponsive in Nanny for 3.59s.  This is often caused by long-running GIL-holding functions or moving large chunks of data. This can cause timeouts and instability.
2024-08-08 16:21:26,347 - distributed.core - INFO - Event loop was unresponsive in Scheduler for 3.59s.  This is often caused by long-running GIL-holding functions or moving large chunks of data. This can cause timeouts and instability.
2024-08-08 16:21:26,348 - distributed.core - INFO - Event loop was unresponsive in Nanny for 3.60s.  This is often caused by long-running GIL-holding functions or moving large chunks of data. This can cause timeouts and instability.
2024-08-08 16:21:26,393 - distributed.core - INFO - Event loop w

epoch 42 | loss: 0.4233  | val_0_auc: 0.86757 |  0:15:05s
epoch 46 | loss: 0.44357 | val_0_auc: 0.86777 |  0:15:04s
epoch 88 | loss: 0.40064 | val_0_auc: 0.883   |  0:15:06s
epoch 56 | loss: 0.42195 | val_0_auc: 0.87193 |  0:15:07s
epoch 62 | loss: 0.41485 | val_0_auc: 0.8628  |  0:11:20s
epoch 42 | loss: 0.43201 | val_0_auc: 0.86954 |  0:15:14s
epoch 59 | loss: 0.40129 | val_0_auc: 0.86736 |  0:15:15s
epoch 42 | loss: 0.42259 | val_0_auc: 0.87454 |  0:15:14s
epoch 89 | loss: 0.3982  | val_0_auc: 0.87739 |  0:15:16s
epoch 63 | loss: 0.41838 | val_0_auc: 0.86553 |  0:11:31s
epoch 57 | loss: 0.41706 | val_0_auc: 0.87332 |  0:15:23s
epoch 47 | loss: 0.43356 | val_0_auc: 0.86652 |  0:15:23s
epoch 43 | loss: 0.42554 | val_0_auc: 0.86861 |  0:15:26s
epoch 90 | loss: 0.39733 | val_0_auc: 0.87967 |  0:15:27s
epoch 60 | loss: 0.40623 | val_0_auc: 0.87181 |  0:15:30s
epoch 64 | loss: 0.42461 | val_0_auc: 0.86451 |  0:11:42s
epoch 43 | loss: 0.42892 | val_0_auc: 0.87135 |  0:15:36s
epoch 43 | los

In [12]:
params_dict_Tab = {}
for i in range(len(train_x_list)):
    train_x = train_x_list[i]
    train_y = train_y_list[i]
    class TabWrapper(BaseEstimator):
        def __init__(self, n_d=64, n_a=64, n_steps=5, gamma=1.3, n_independent=2, n_shared=2, seed=317, optimizer_fn=torch.optim.Adam, optimizer_params=dict(lr=1e-2), scheduler_params={"step_size":50, "gamma":0.9}, scheduler_fn=torch.optim.lr_scheduler.StepLR, mask_type='entmax', verbose=0):
            self.n_d = n_d
            self.n_a = n_a
            self.n_steps = n_steps
            self.gamma = gamma
            self.n_independent = n_independent
            self.n_shared = n_shared
            self.seed = seed
            self.optimizer_fn = optimizer_fn
            self.optimizer_params = optimizer_params
            self.scheduler_params = scheduler_params
            self.scheduler_fn = scheduler_fn
            self.mask_type = mask_type
            self.verbose = 0
            self.model = TabNetClassifier(n_d=self.n_d, n_a=self.n_a, n_steps=self.n_steps, gamma=self.gamma, n_independent=self.n_independent, n_shared=self.n_shared, seed=self.seed, optimizer_fn=self.optimizer_fn, optimizer_params=self.optimizer_params, scheduler_params=self.scheduler_params, scheduler_fn=self.scheduler_fn, mask_type=self.mask_type, verbose=self.verbose)

        @property
        def configspace(self) -> ConfigurationSpace:
            cs = ConfigurationSpace()
            n_d = Integer("n_d", (4, 256), default=64)
            n_a = Integer("n_a", (4, 256), default=64)
            # n_steps = Integer("n_steps", (3, 10), default=5)
            # gamma = Float("gamma", (0.9, 2.0), default=1.3)
            # n_independent = Integer("n_independent", (1, 10), default=2)
            # n_shared = Integer("n_shared", (1, 10), default=2)
            # seed = Integer("seed", (0, 1000), default=317)
            # optimizer_fn = Categorical("optimizer_fn", [torch.optim.Adam, torch.optim.AdamW], default=torch.optim.Adam)
            # scheduler_fn = Categorical("scheduler_fn", [torch.optim.lr_scheduler.StepLR, torch.optim.lr_scheduler.MultiStepLR], default=torch.optim.lr_scheduler.StepLR)
            # mask_type = Categorical("mask_type", ['sparsemax', 'entmax'], default='entmax')
            verbose = Categorical("verbose", [0], default=0)
            cs.add_hyperparameters([n_d, n_a, verbose])
            # cs.add_hyperparameters([n_d, n_a, n_steps, gamma, n_independent, n_shared, seed, optimizer_fn, scheduler_fn, mask_type])
            return cs

        def fit(self, config: Configuration, seed: int = 0, budget: int = 250) -> float:
            config = dict(config)
            self.model.set_params(**config)
            X = train_x
            y = train_y
            X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
            self.model.fit(X_train, y_train, eval_set=[(X_val, y_val)], patience=10)
            preds = self.model.predict(X_val)
            score = accuracy_score(y_val, preds)
            return 1 - score    

    @timeout(3900)
    def main():
        start_time = time.time()
        print("Here 1")
        Tab = TabWrapper()

        facades: list[AbstractFacade] = []
        for intensifier_object in [Hyperband]:

            scenario = Scenario(
                Tab.configspace,
                walltime_limit=1800,
                output_directory=Path("smac_hyperband_output_budget_30mins_Tab/" + dataset_names[i]),
                n_trials=10000,
                min_budget=100,
                max_budget=1000,
                n_workers=8,

            )
            

            initial_design = MFFacade.get_initial_design(scenario, n_configs=5)
            intensifier = intensifier_object(scenario, incumbent_selection="highest_budget")

            smac = MFFacade(
                scenario,
                Tab.fit,
                initial_design=initial_design,
                intensifier=intensifier,
                overwrite=True,
            )
            print("Here 2")

            print("optimizing")
            # print(type(smac), "|", smac)
            incumbent = smac.optimize()
            best_params = incumbent.get_dictionary()
            params_dict_Tab[dataset_names[i]] = best_params

            print("Here 3")
            incumbent_cost = smac.runhistory.get_cost(incumbent)
            incumbent_run_id = incumbent.config_id

            print(f"Parameters: {best_params}")
            print(f"Cost: {incumbent_cost} | Config ID: {incumbent_run_id}")

            # if time.time() - start_time > 60:
            #     break

            default_cost = smac.validate(Tab.configspace.get_default_configuration())
            # print(f"Default cost ({intensifier.__class__.__name__}): {default_cost}")
            incumbent_cost = smac.validate(incumbent)
            # print(f"Incumbent cost ({intensifier.__class__.__name__}): {incumbent_cost}")

            facades.append(smac)
        #     for arrt in dir(smac):
        #         if not arrt.startswith("_"):
        #             print(arrt, getattr(smac, arrt))

        # print("facades:", facades)



    if __name__ == "__main__":
        # with open('smac_results_2h.txt', "w") as f:
        #     pass
        # profiler = LineProfiler()
        # profiler.add_function(main)
        # profiler.enable()

        main()
        with open("SmacResults/TabNet_results.json", "w") as f:
            for dataset_name, params in params_dict_Tab.items():
                f.write(f"{dataset_name}: {params}\n")

        # profiler.disable()

Here 1
[INFO][abstract_initial_design.py:82] Using `n_configs` and ignoring `n_configs_per_hyperparameter`.
[INFO][abstract_initial_design.py:147] Using 5 initial design configurations and 0 additional configurations.
Here 2
optimizing
[INFO][successive_halving.py:164] Successive Halving uses budget type BUDGETS with eta 3, min budget 100, and max budget 1000.
[INFO][successive_halving.py:323] Number of configs in stage:
[INFO][successive_halving.py:325] --- Bracket 0: [9, 3, 1]
[INFO][successive_halving.py:325] --- Bracket 1: [5, 1]
[INFO][successive_halving.py:325] --- Bracket 2: [3]
[INFO][successive_halving.py:327] Budgets in stage:
[INFO][successive_halving.py:329] --- Bracket 0: [111.1111111111111, 333.3333333333333, 1000.0]
[INFO][successive_halving.py:329] --- Bracket 1: [333.3333333333333, 1000.0]
[INFO][successive_halving.py:329] --- Bracket 2: [1000.0]
[INFO][smbo.py:319] Finished 0 trials.
[INFO][smbo.py:319] Finished 0 trials.
[INFO][smbo.py:319] Finished 0 trials.
[INFO][

  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count



Early stopping occurred at epoch 11 with best_epoch = 6 and best_val_0_auc = 0.86199

Early stopping occurred at epoch 27 with best_epoch = 22 and best_val_0_auc = 0.86813





Early stopping occurred at epoch 19 with best_epoch = 14 and best_val_0_auc = 0.87585




Stop training because you reached max_epochs = 30 with best_epoch = 25 and best_val_0_auc = 0.87534





Early stopping occurred at epoch 29 with best_epoch = 24 and best_val_0_auc = 0.86792


2024-08-13 13:07:16,192 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.
Traceback (most recent call last):
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/comm/tcp.py", line 225, in read
    frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/worker.py", line 1252, in heartbeat
    response = await retry_operation(
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/utils_comm.py", line 452, in retry_operation
    return await retry(
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/utils_comm.py", line 431, in retry
    return await coro()
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/py


Early stopping occurred at epoch 11 with best_epoch = 6 and best_val_0_auc = 0.86199





Early stopping occurred at epoch 15 with best_epoch = 10 and best_val_0_auc = 0.873





Early stopping occurred at epoch 27 with best_epoch = 22 and best_val_0_auc = 0.86813





Early stopping occurred at epoch 15 with best_epoch = 10 and best_val_0_auc = 0.873





Early stopping occurred at epoch 15 with best_epoch = 10 and best_val_0_auc = 0.86277





Early stopping occurred at epoch 27 with best_epoch = 22 and best_val_0_auc = 0.86813





Early stopping occurred at epoch 19 with best_epoch = 14 and best_val_0_auc = 0.87585
Stop training because you reached max_epochs = 30 with best_epoch = 25 and best_val_0_auc = 0.87534
Stop training because you reached max_epochs = 30 with best_epoch = 27 and best_val_0_auc = 0.87449





Early stopping occurred at epoch 29 with best_epoch = 24 and best_val_0_auc = 0.8727
Stop training because you reached max_epochs = 30 with best_epoch = 25 and best_val_0_auc = 0.87534





Early stopping occurred at epoch 19 with best_epoch = 14 and best_val_0_auc = 0.87585





Early stopping occurred at epoch 27 with best_epoch = 22 and best_val_0_auc = 0.86902





Early stopping occurred at epoch 28 with best_epoch = 23 and best_val_0_auc = 0.87967





Early stopping occurred at epoch 20 with best_epoch = 15 and best_val_0_auc = 0.86357




Stop training because you reached max_epochs = 30 with best_epoch = 27 and best_val_0_auc = 0.87449





Early stopping occurred at epoch 23 with best_epoch = 18 and best_val_0_auc = 0.86733





Early stopping occurred at epoch 17 with best_epoch = 12 and best_val_0_auc = 0.86422





Early stopping occurred at epoch 27 with best_epoch = 22 and best_val_0_auc = 0.86902

Early stopping occurred at epoch 23 with best_epoch = 18 and best_val_0_auc = 0.87156




[INFO][abstract_intensifier.py:515] Added config 546335 as new incumbent because there are no incumbents yet.

Early stopping occurred at epoch 19 with best_epoch = 14 and best_val_0_auc = 0.86241





Early stopping occurred at epoch 28 with best_epoch = 23 and best_val_0_auc = 0.87967





Early stopping occurred at epoch 10 with best_epoch = 5 and best_val_0_auc = 0.85015





Early stopping occurred at epoch 19 with best_epoch = 14 and best_val_0_auc = 0.86577





Early stopping occurred at epoch 17 with best_epoch = 12 and best_val_0_auc = 0.86711





Early stopping occurred at epoch 17 with best_epoch = 12 and best_val_0_auc = 0.86933

Early stopping occurred at epoch 29 with best_epoch = 24 and best_val_0_auc = 0.86991





Early stopping occurred at epoch 19 with best_epoch = 14 and best_val_0_auc = 0.86782





Early stopping occurred at epoch 16 with best_epoch = 11 and best_val_0_auc = 0.86551

Early stopping occurred at epoch 10 with best_epoch = 5 and best_val_0_auc = 0.85664





Early stopping occurred at epoch 21 with best_epoch = 16 and best_val_0_auc = 0.86739

Early stopping occurred at epoch 21 with best_epoch = 16 and best_val_0_auc = 0.86814





Early stopping occurred at epoch 25 with best_epoch = 20 and best_val_0_auc = 0.87221





Early stopping occurred at epoch 20 with best_epoch = 15 and best_val_0_auc = 0.87055





Early stopping occurred at epoch 16 with best_epoch = 11 and best_val_0_auc = 0.86698

Early stopping occurred at epoch 13 with best_epoch = 8 and best_val_0_auc = 0.86298





Early stopping occurred at epoch 20 with best_epoch = 15 and best_val_0_auc = 0.87474





Early stopping occurred at epoch 15 with best_epoch = 10 and best_val_0_auc = 0.86008





Early stopping occurred at epoch 19 with best_epoch = 14 and best_val_0_auc = 0.8729





Early stopping occurred at epoch 21 with best_epoch = 16 and best_val_0_auc = 0.86814




[INFO][abstract_intensifier.py:594] Added config d31eb5 and rejected config 546335 as incumbent because it is not better than the incumbents on 1 instances:

Early stopping occurred at epoch 29 with best_epoch = 24 and best_val_0_auc = 0.8727




[INFO][abstract_intensifier.py:594] Added config 7fcf93 and rejected config d31eb5 as incumbent because it is not better than the incumbents on 1 instances:

Early stopping occurred at epoch 12 with best_epoch = 7 and best_val_0_auc = 0.85674





Early stopping occurred at epoch 12 with best_epoch = 7 and best_val_0_auc = 0.85732





Early stopping occurred at epoch 22 with best_epoch = 17 and best_val_0_auc = 0.86956
Stop training because you reached max_epochs = 30 with best_epoch = 25 and best_val_0_auc = 0.87294




Stop training because you reached max_epochs = 30 with best_epoch = 26 and best_val_0_auc = 0.87477





Early stopping occurred at epoch 19 with best_epoch = 14 and best_val_0_auc = 0.86556





Early stopping occurred at epoch 19 with best_epoch = 14 and best_val_0_auc = 0.87126





Early stopping occurred at epoch 19 with best_epoch = 14 and best_val_0_auc = 0.87585




Stop training because you reached max_epochs = 30 with best_epoch = 27 and best_val_0_auc = 0.87449





Early stopping occurred at epoch 25 with best_epoch = 20 and best_val_0_auc = 0.87221





Early stopping occurred at epoch 16 with best_epoch = 11 and best_val_0_auc = 0.86698





Early stopping occurred at epoch 24 with best_epoch = 19 and best_val_0_auc = 0.87282





Early stopping occurred at epoch 28 with best_epoch = 23 and best_val_0_auc = 0.87967




Stop training because you reached max_epochs = 30 with best_epoch = 28 and best_val_0_auc = 0.87378





Early stopping occurred at epoch 18 with best_epoch = 13 and best_val_0_auc = 0.85801




[INFO][smbo.py:319] Finished 50 trials.
Stop training because you reached max_epochs = 30 with best_epoch = 28 and best_val_0_auc = 0.87441




Stop training because you reached max_epochs = 30 with best_epoch = 25 and best_val_0_auc = 0.87294




Stop training because you reached max_epochs = 30 with best_epoch = 25 and best_val_0_auc = 0.87578





Early stopping occurred at epoch 14 with best_epoch = 9 and best_val_0_auc = 0.86313





Early stopping occurred at epoch 24 with best_epoch = 19 and best_val_0_auc = 0.87272




Stop training because you reached max_epochs = 30 with best_epoch = 28 and best_val_0_auc = 0.87592





Early stopping occurred at epoch 27 with best_epoch = 22 and best_val_0_auc = 0.87188





Early stopping occurred at epoch 16 with best_epoch = 11 and best_val_0_auc = 0.8615





Early stopping occurred at epoch 28 with best_epoch = 23 and best_val_0_auc = 0.87081




[INFO][smbo.py:327] Configuration budget is exhausted:
[INFO][smbo.py:328] --- Remaining wallclock time: -9.492891550064087
[INFO][smbo.py:329] --- Remaining cpu time: inf
[INFO][smbo.py:330] --- Remaining trials: 9933

Early stopping occurred at epoch 24 with best_epoch = 19 and best_val_0_auc = 0.87107





Early stopping occurred at epoch 8 with best_epoch = 3 and best_val_0_auc = 0.85076





Early stopping occurred at epoch 25 with best_epoch = 20 and best_val_0_auc = 0.87221




[INFO][abstract_intensifier.py:594] Added config 571c93 and rejected config 7fcf93 as incumbent because it is not better than the incumbents on 1 instances:

Early stopping occurred at epoch 24 with best_epoch = 19 and best_val_0_auc = 0.87282




Stop training because you reached max_epochs = 30 with best_epoch = 26 and best_val_0_auc = 0.87477




Stop training because you reached max_epochs = 30 with best_epoch = 27 and best_val_0_auc = 0.87449





Early stopping occurred at epoch 21 with best_epoch = 16 and best_val_0_auc = 0.8671




Stop training because you reached max_epochs = 30 with best_epoch = 28 and best_val_0_auc = 0.87441
Parameters: {'n_a': 21, 'n_d': 119, 'verbose': 0}
Cost: 0.1985 | Config ID: 26
Here 3
Here 1
[INFO][abstract_initial_design.py:82] Using `n_configs` and ignoring `n_configs_per_hyperparameter`.
[INFO][abstract_initial_design.py:147] Using 5 initial design configurations and 0 additional configurations.
Here 2
optimizing
[INFO][successive_halving.py:164] Successive Halving uses budget type BUDGETS with eta 3, min budget 100, and max budget 1000.
[INFO][successive_halving.py:323] Number of configs in stage:
[INFO][successive_halving.py:325] --- Bracket 0: [9, 3, 1]
[INFO][successive_halving.py:325] --- Bracket 1: [5, 1]
[INFO][successive_halving.py:325] --- Bracket 2: [3]
[INFO][successive_halving.py:327] Budgets in stage:
[INFO][successive_halving.py:329] --- Bracket 0: [111.1111111111111, 333.3333333333333, 1000.0]
[INFO][successive_halving.py:329] --- Bracket 1: [333.3333333333333, 1000

  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count



Early stopping occurred at epoch 22 with best_epoch = 17 and best_val_0_auc = 0.57786


2024-08-13 13:43:14,646 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.
Traceback (most recent call last):
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/comm/tcp.py", line 225, in read
    frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/worker.py", line 1252, in heartbeat
    response = await retry_operation(
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/utils_comm.py", line 452, in retry_operation
    return await retry(
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/utils_comm.py", line 431, in retry
    return await coro()
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/py


Early stopping occurred at epoch 5 with best_epoch = 0 and best_val_0_auc = 0.52945





Early stopping occurred at epoch 11 with best_epoch = 6 and best_val_0_auc = 0.59027




Stop training because you reached max_epochs = 30 with best_epoch = 28 and best_val_0_auc = 0.62517





Early stopping occurred at epoch 29 with best_epoch = 24 and best_val_0_auc = 0.60774





Early stopping occurred at epoch 18 with best_epoch = 13 and best_val_0_auc = 0.6281





Early stopping occurred at epoch 21 with best_epoch = 16 and best_val_0_auc = 0.61088





Early stopping occurred at epoch 8 with best_epoch = 3 and best_val_0_auc = 0.56136





Early stopping occurred at epoch 15 with best_epoch = 10 and best_val_0_auc = 0.58836





Early stopping occurred at epoch 19 with best_epoch = 14 and best_val_0_auc = 0.59615





Early stopping occurred at epoch 9 with best_epoch = 4 and best_val_0_auc = 0.57254





Early stopping occurred at epoch 20 with best_epoch = 15 and best_val_0_auc = 0.58993





Early stopping occurred at epoch 14 with best_epoch = 9 and best_val_0_auc = 0.57645




[INFO][abstract_intensifier.py:515] Added config 546335 as new incumbent because there are no incumbents yet.
Stop training because you reached max_epochs = 30 with best_epoch = 29 and best_val_0_auc = 0.61897





Early stopping occurred at epoch 9 with best_epoch = 4 and best_val_0_auc = 0.55482

Early stopping occurred at epoch 15 with best_epoch = 10 and best_val_0_auc = 0.5854




[INFO][abstract_intensifier.py:594] Added config 037a0b and rejected config 546335 as incumbent because it is not better than the incumbents on 1 instances:




Stop training because you reached max_epochs = 30 with best_epoch = 29 and best_val_0_auc = 0.61773

Early stopping occurred at epoch 17 with best_epoch = 12 and best_val_0_auc = 0.59813





Early stopping occurred at epoch 23 with best_epoch = 18 and best_val_0_auc = 0.60627




[INFO][abstract_intensifier.py:594] Added config 3587c7 and rejected config 037a0b as incumbent because it is not better than the incumbents on 1 instances:





Early stopping occurred at epoch 6 with best_epoch = 1 and best_val_0_auc = 0.55046

Early stopping occurred at epoch 9 with best_epoch = 4 and best_val_0_auc = 0.57919





Early stopping occurred at epoch 17 with best_epoch = 12 and best_val_0_auc = 0.60284





Early stopping occurred at epoch 16 with best_epoch = 11 and best_val_0_auc = 0.58467





Early stopping occurred at epoch 19 with best_epoch = 14 and best_val_0_auc = 0.59336





Early stopping occurred at epoch 25 with best_epoch = 20 and best_val_0_auc = 0.60882





Early stopping occurred at epoch 19 with best_epoch = 14 and best_val_0_auc = 0.58383





Early stopping occurred at epoch 9 with best_epoch = 4 and best_val_0_auc = 0.56356





Early stopping occurred at epoch 18 with best_epoch = 13 and best_val_0_auc = 0.6281
Stop training because you reached max_epochs = 30 with best_epoch = 29 and best_val_0_auc = 0.61773





Early stopping occurred at epoch 7 with best_epoch = 2 and best_val_0_auc = 0.54745




Stop training because you reached max_epochs = 30 with best_epoch = 28 and best_val_0_auc = 0.62517





Early stopping occurred at epoch 15 with best_epoch = 10 and best_val_0_auc = 0.58901





Early stopping occurred at epoch 18 with best_epoch = 13 and best_val_0_auc = 0.61004

Early stopping occurred at epoch 17 with best_epoch = 12 and best_val_0_auc = 0.60284





Early stopping occurred at epoch 5 with best_epoch = 0 and best_val_0_auc = 0.55027




Stop training because you reached max_epochs = 30 with best_epoch = 29 and best_val_0_auc = 0.61897





Early stopping occurred at epoch 8 with best_epoch = 3 and best_val_0_auc = 0.55717





Early stopping occurred at epoch 7 with best_epoch = 2 and best_val_0_auc = 0.54934





Early stopping occurred at epoch 5 with best_epoch = 0 and best_val_0_auc = 0.57092

Early stopping occurred at epoch 23 with best_epoch = 18 and best_val_0_auc = 0.61439





Early stopping occurred at epoch 13 with best_epoch = 8 and best_val_0_auc = 0.5922





Early stopping occurred at epoch 13 with best_epoch = 8 and best_val_0_auc = 0.58922





Early stopping occurred at epoch 25 with best_epoch = 20 and best_val_0_auc = 0.60882





Early stopping occurred at epoch 19 with best_epoch = 14 and best_val_0_auc = 0.59336





Early stopping occurred at epoch 8 with best_epoch = 3 and best_val_0_auc = 0.56143





Early stopping occurred at epoch 22 with best_epoch = 17 and best_val_0_auc = 0.6006




Stop training because you reached max_epochs = 30 with best_epoch = 28 and best_val_0_auc = 0.63184

Early stopping occurred at epoch 23 with best_epoch = 18 and best_val_0_auc = 0.62135





Early stopping occurred at epoch 22 with best_epoch = 17 and best_val_0_auc = 0.59056





Early stopping occurred at epoch 5 with best_epoch = 0 and best_val_0_auc = 0.54534





Early stopping occurred at epoch 28 with best_epoch = 23 and best_val_0_auc = 0.61842




[INFO][smbo.py:319] Finished 50 trials.

Early stopping occurred at epoch 5 with best_epoch = 0 and best_val_0_auc = 0.55149





Early stopping occurred at epoch 6 with best_epoch = 1 and best_val_0_auc = 0.55434

Early stopping occurred at epoch 5 with best_epoch = 0 and best_val_0_auc = 0.56399





Early stopping occurred at epoch 23 with best_epoch = 18 and best_val_0_auc = 0.61439




[INFO][abstract_intensifier.py:594] Added config d17be8 and rejected config 3587c7 as incumbent because it is not better than the incumbents on 1 instances:

Early stopping occurred at epoch 12 with best_epoch = 7 and best_val_0_auc = 0.57186





Early stopping occurred at epoch 16 with best_epoch = 11 and best_val_0_auc = 0.57509




Stop training because you reached max_epochs = 30 with best_epoch = 29 and best_val_0_auc = 0.61897




[INFO][abstract_intensifier.py:594] Added config 548676 and rejected config d17be8 as incumbent because it is not better than the incumbents on 1 instances:

Early stopping occurred at epoch 11 with best_epoch = 6 and best_val_0_auc = 0.57662





Early stopping occurred at epoch 25 with best_epoch = 20 and best_val_0_auc = 0.60882





Early stopping occurred at epoch 5 with best_epoch = 0 and best_val_0_auc = 0.55149





Early stopping occurred at epoch 9 with best_epoch = 4 and best_val_0_auc = 0.56498





Early stopping occurred at epoch 23 with best_epoch = 18 and best_val_0_auc = 0.62135




Stop training because you reached max_epochs = 30 with best_epoch = 28 and best_val_0_auc = 0.63184





Early stopping occurred at epoch 28 with best_epoch = 23 and best_val_0_auc = 0.61842




Stop training because you reached max_epochs = 30 with best_epoch = 27 and best_val_0_auc = 0.60839





Early stopping occurred at epoch 29 with best_epoch = 24 and best_val_0_auc = 0.59716





Early stopping occurred at epoch 16 with best_epoch = 11 and best_val_0_auc = 0.59849




[INFO][smbo.py:327] Configuration budget is exhausted:
[INFO][smbo.py:328] --- Remaining wallclock time: -17.96246647834778
[INFO][smbo.py:329] --- Remaining cpu time: inf
[INFO][smbo.py:330] --- Remaining trials: 9924

Early stopping occurred at epoch 15 with best_epoch = 10 and best_val_0_auc = 0.57745





Early stopping occurred at epoch 26 with best_epoch = 21 and best_val_0_auc = 0.60801





Early stopping occurred at epoch 16 with best_epoch = 11 and best_val_0_auc = 0.60311




Stop training because you reached max_epochs = 30 with best_epoch = 25 and best_val_0_auc = 0.62591





Early stopping occurred at epoch 18 with best_epoch = 13 and best_val_0_auc = 0.60156




Stop training because you reached max_epochs = 30 with best_epoch = 28 and best_val_0_auc = 0.63184




[INFO][abstract_intensifier.py:594] Added config 007b97 and rejected config 548676 as incumbent because it is not better than the incumbents on 1 instances:
Stop training because you reached max_epochs = 30 with best_epoch = 26 and best_val_0_auc = 0.63319




Stop training because you reached max_epochs = 30 with best_epoch = 27 and best_val_0_auc = 0.61603




Parameters: {'n_a': 144, 'n_d': 145, 'verbose': 0}
Cost: 0.397503285151117 | Config ID: 37
Here 3
Here 1
[INFO][abstract_initial_design.py:82] Using `n_configs` and ignoring `n_configs_per_hyperparameter`.
[INFO][abstract_initial_design.py:147] Using 5 initial design configurations and 0 additional configurations.
Here 2
optimizing
[INFO][successive_halving.py:164] Successive Halving uses budget type BUDGETS with eta 3, min budget 100, and max budget 1000.
[INFO][successive_halving.py:323] Number of configs in stage:
[INFO][successive_halving.py:325] --- Bracket 0: [9, 3, 1]
[INFO][successive_halving.py:325] --- Bracket 1: [5, 1]
[INFO][successive_halving.py:325] --- Bracket 2: [3]
[INFO][successive_halving.py:327] Budgets in stage:
[INFO][successive_halving.py:329] --- Bracket 0: [111.1111111111111, 333.3333333333333, 1000.0]
[INFO][successive_halving.py:329] --- Bracket 1: [333.3333333333333, 1000.0]
[INFO][successive_halving.py:329] --- Bracket 2: [1000.0]
[INFO][smbo.py:319] Finish

  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count



Early stopping occurred at epoch 21 with best_epoch = 16 and best_val_0_auc = 0.83557


2024-08-13 14:19:48,540 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.
Traceback (most recent call last):
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/comm/tcp.py", line 225, in read
    frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/worker.py", line 1252, in heartbeat
    response = await retry_operation(
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/utils_comm.py", line 452, in retry_operation
    return await retry(
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/python3.9/site-packages/distributed/utils_comm.py", line 431, in retry
    return await coro()
  File "/home/ziyan/miniconda3/envs/NeuroData/lib/py

In [10]:
for dataset_name, params in params_dict_Tab.items():
    print(dataset_name, ":", params)

# with open("SmacResults/TabNet_results.json", "w") as f:
#     # for dataset_name, params in params_dict_Tab.items():
#     #     f.write(f"{dataset_name}: {params}\n")
#     f.write("Well done!")