In [1]:
import pandas as pd
import numpy as np
import ast

pd.set_option("display.max_rows", None)

In [2]:
df = pd.read_csv(r"../output/sanity_check.csv")

In [3]:
# List of metric names
metric_names = [
    "f1",
    "mse",
    "accuracy",
    "recall",
    "precision",
    "auc",
    "area_under_pr",
    "r2_score",
    "rmse",
]
dataset = df.copy()
# Create columns based on metric names
# Convert string representations to dictionaries
dataset["output_metrics"] = dataset["output_metrics"].apply(ast.literal_eval)
# Create columns based on metric names
for metric in metric_names:
    dataset[metric] = dataset["output_metrics"].apply(
        lambda x: x.get(metric) if isinstance(x, dict) else np.nan
    )

In [4]:
# Specify the metric you want to maximize
metric_to_maximize = "accuracy"
tomax = {
    "f1": True,
    "mse": False,
    "accuracy": True,
    "recall": True,
    "precision": True,
    "auc": True,
    "area_under_pr": True,
    "rmse": False,
    "r2_score": True,
}


# Filter out rows with NaN values in the specified metric column
filtered_df = dataset.dropna(subset=[metric_to_maximize]).loc[
    dataset[metric_to_maximize] != "nan"
]

# Find the rows that maximize the specified metric for each dataset
if tomax[metric_to_maximize]:
    max_rows = filtered_df.sort_values(["dataset", metric_to_maximize], ascending=False)
else:
    max_rows = filtered_df.sort_values(["dataset", metric_to_maximize], ascending=True)

sel_cols = [
    "dataset",
    "model",
    "run_time",
    "eval_metric",
    "best_params",
    "output_metrics",
    "debug_preds",
    "f1",
    "mse",
    "accuracy",
    "recall",
    "precision",
    "auc",
    "area_under_pr",
    "r2_score",
    "rmse",
]

dataset_name = "heloc"
max_rows[sel_cols].head(50)

Unnamed: 0,dataset,model,run_time,eval_metric,best_params,output_metrics,debug_preds,f1,mse,accuracy,recall,precision,auc,area_under_pr,r2_score,rmse
17,covertype,s1dcnn,946.213729,accuracy,{'AdamW_learning_rate': 0.00034240266638236504...,"{'accuracy': 0.9437188904876319, 'f1': 0.94465...","[0, 1, 1, 0, 1, 0, 4, 5, 1, 0]",0.944653,,0.943719,,,,,,
19,covertype,gate,6604.046805,accuracy,"{'AdamW_weight_decay': 3.5344065438471894e-05,...","{'accuracy': 0.9240910686870495, 'f1': 0.92559...","[0, 1, 1, 0, 1, 0, 2, 5, 1, 0]",0.925594,,0.924091,,,,,,
18,covertype,tabnet,2262.562,accuracy,"{'AdamW_weight_decay': 7.470161009325206e-05, ...","{'accuracy': 0.7596882680564256, 'f1': 0.77015...","[1, 1, 0, 0, 0, 0, 4, 5, 1, 1]",0.770152,,0.759688,,,,,,
8,adult,s1dcnn,57.331957,roc_auc,"{'AdamW_learning_rate': 0.000170180255851598, ...","{'recall': 0.7331632653061224, 'precision': 0....","[0, 1, 1, 0, 1, 0, 1, 0, 0, 0]",0.68331,,0.836384,0.733163,0.639804,0.897894,0.533324,,
15,adult,autoint,308.941073,roc_auc,"{'AdamW_weight_decay': 1.2328658154235702e-05,...","{'recall': 0.8183673469387756, 'precision': 0....","[0, 1, 1, 0, 1, 0, 1, 0, 0, 0]",0.6843,,0.818204,0.818367,0.587977,0.905202,0.52491,,
10,adult,gate,124.146162,roc_auc,"{'AdamW_weight_decay': 3.5344065438471894e-05,...","{'recall': 0.860204081632653, 'precision': 0.5...","[0, 1, 1, 0, 1, 0, 1, 0, 0, 0]",0.687322,,0.811571,0.860204,0.572301,0.910607,0.525953,,
13,adult,gandalf,73.296452,roc_auc,"{'AdamW_weight_decay': 8.836094935673395e-05, ...","{'recall': 0.8698979591836735, 'precision': 0....","[0, 1, 1, 0, 1, 0, 1, 0, 0, 1]",0.689028,,0.810957,0.869898,0.570425,0.911527,0.527534,,
14,adult,node,51.348804,roc_auc,"{'AdamW_weight_decay': 3.5344065438471894e-05,...","{'recall': 0.8494897959183674, 'precision': 0....","[0, 1, 1, 0, 1, 0, 1, 0, 0, 0]",0.680286,,0.807763,0.84949,0.567291,0.907241,0.518145,,
11,adult,fttransformer,209.113578,roc_auc,"{'AdamW_weight_decay': 2.8964661623682113e-05,...","{'recall': 0.8724489795918368, 'precision': 0....","[0, 1, 1, 0, 1, 0, 1, 0, 0, 0]",0.681682,,0.803832,0.872449,0.559372,0.911619,0.518732,,
16,adult,tabtransformer,423.943587,roc_auc,"{'AdamW_weight_decay': 6.894924309234459e-05, ...","{'recall': 0.8469387755102041, 'precision': 0....","[0, 1, 1, 0, 1, 0, 1, 0, 0, 0]",0.671793,,0.800762,0.846939,0.556673,0.902053,0.508319,,


In [18]:
max_rows.loc[max_rows["model"] == "fttransformer"]["best_params"].iloc[0]

"{'AdamW_weight_decay': 0.009146661940655405, 'Adam_weight_decay': 0.0026893683659541257, 'ExponentialLR_gamma': 0.9813824711975316, 'ReduceLROnPlateau_factor': 0.11738557751606361, 'ReduceLROnPlateau_patience': 6, 'StepLR_gamma': 0.9851456952449655, 'StepLR_step_size': 29, 'add_norm_dropout': 0.0992106504569584, 'attn_dropout': 0.20163724049581883, 'batch_size': 512, 'embedding_dropout': 0.056162111868724864, 'embedding_initialization': 'kaiming_uniform', 'ff_dropout': 0.09808690887243214, 'ff_hidden_multiplier': 4, 'learning_rate': 0.0007179902919953625, 'num_attn_blocks': 6, 'optimizer_fn': <class 'torch.optim.adamw.AdamW'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>, 'shared_embedding_fraction': 0.4529901637185145, 'transformer_activation': 'ReGLU', 'outer_params': {'hyperopt_evals': 10, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 6}}"

In [15]:
d = {
    "AdamW_weight_decay": 0.009146661940655405,
    "Adam_weight_decay": 0.0026893683659541257,
    "ExponentialLR_gamma": 0.9813824711975316,
    "ReduceLROnPlateau_factor": 0.11738557751606361,
    "ReduceLROnPlateau_patience": 6,
    "StepLR_gamma": 0.9851456952449655,
    "StepLR_step_size": 29,
    "add_norm_dropout": 0.0992106504569584,
    "attn_dropout": 0.20163724049581883,
    "batch_size": 2496,
    "embedding_dropout": 0.056162111868724864,
    "embedding_initialization": "kaiming_uniform",
    "ff_dropout": 0.09808690887243214,
    "ff_hidden_multiplier": 4,
    "learning_rate": 0.0007179902919953625,
    "num_attn_blocks": 6,
    "shared_embedding_fraction": 0.4529901637185145,
    "transformer_activation": "ReGLU",
    "outer_params": {
        "hyperopt_evals": 10,
        "auto_lr_find": False,
        "max_epochs": 1000,
        "val_size": 0.15,
        "early_stopping_patience": 6,
    },
}

In [16]:
d

{'AdamW_weight_decay': 0.009146661940655405,
 'Adam_weight_decay': 0.0026893683659541257,
 'ExponentialLR_gamma': 0.9813824711975316,
 'ReduceLROnPlateau_factor': 0.11738557751606361,
 'ReduceLROnPlateau_patience': 6,
 'StepLR_gamma': 0.9851456952449655,
 'StepLR_step_size': 29,
 'add_norm_dropout': 0.0992106504569584,
 'attn_dropout': 0.20163724049581883,
 'batch_size': 2496,
 'embedding_dropout': 0.056162111868724864,
 'embedding_initialization': 'kaiming_uniform',
 'ff_dropout': 0.09808690887243214,
 'ff_hidden_multiplier': 4,
 'learning_rate': 0.0007179902919953625,
 'num_attn_blocks': 6,
 'shared_embedding_fraction': 0.4529901637185145,
 'transformer_activation': 'ReGLU',
 'outer_params': {'hyperopt_evals': 10,
  'auto_lr_find': False,
  'max_epochs': 1000,
  'val_size': 0.15,
  'early_stopping_patience': 6}}

In [17]:
maximized_rows["best_params"].iloc[1]

"{'alpha': 1, 'colsample_bytree': 0.7701468147287692, 'gamma': 4, 'lambda': 5, 'learning_rate': 0.019628488779340834, 'max_bin': 156, 'max_depth': 8, 'min_child_weight': 8, 'n_estimators': 255, 'subsample': 0.9280869759290384, 'tree_method': 'auto'}"