In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from pathlib import Path
import optuna
import joblib
from collections import namedtuple

In [2]:
Data = namedtuple('Data', ["type","name", "path", "config"])

In [3]:
DATA_PATH = Path("data")
OUTPUT_PATH = Path("output")
STUDY_PATH = Path("study")

In [4]:
POL_CLS_UUID = "66dc899c4c4f4e4f822d02bdddb18f80"

In [5]:
experiments = [
 'classification|default-of-credit-card-clients',
 'classification|heloc',
 'classification|eye_movements',
 'classification|Higgs',
 'classification|pol',
 'classification|albert',
 'classification|road-safety',
 'classification|MiniBooNE',
 'classification|covertype',
 'classification|jannis',
 'classification|Bioresponse',
 'regression|cpu_act',
 'regression|Ailerons',
 'regression|yprop_4_1',
 'regression|superconduct',
 'regression|Allstate_Claims_Severity',
 'regression|topo_2_1',
 'regression|Mercedes_Benz_Greener_Manufacturing',
]

# experiments = [Experiment(*d.split("|")) for d in experiments]

In [6]:
datasets = []
for experiment in experiments:
    typ, name = experiment.split("|")
    folder = DATA_PATH/name
    config_files = list(folder.glob("*config*"))
    config = np.load(config_files[0], allow_pickle=True).item()
    config["n_iter"] = len(config_files)
    datasets.append(Data(typ, name, folder, config))

In [7]:
dataset_dict = {d.name: d for d in datasets}

In [8]:
dataset_names = list(dataset_dict.keys())
dataset_names

['default-of-credit-card-clients',
 'heloc',
 'eye_movements',
 'Higgs',
 'pol',
 'albert',
 'road-safety',
 'MiniBooNE',
 'covertype',
 'jannis',
 'Bioresponse',
 'cpu_act',
 'Ailerons',
 'yprop_4_1',
 'superconduct',
 'Allstate_Claims_Severity',
 'topo_2_1',
 'Mercedes_Benz_Greener_Manufacturing']

In [9]:
sel_columns = [
 'Unnamed: 0',
 'benchmark',
 'data__keyword',
 '_timestamp',
 'model_name',
 'model_type',
 'one_hot_encoder',

 'mean_time',
 'std_time',

 'mean_test_score',

 'max_test_score',
 'min_test_score',
 'std_test_score',

 'test_scores',
 'times',
 ]

In [10]:
experiment_results = pd.DataFrame(columns=sel_columns)

In [11]:
def get_study(dataset_name):
    if dataset_name == "pol":
        return pd.read_csv(OUTPUT_PATH/f"study_pol_{POL_CLS_UUID}.csv")
    folder = OUTPUT_PATH
    outputs = [f for f in folder.glob("study*") if dataset_name in f.name]
    if len(outputs)==1:
        return pd.read_csv(outputs[0])
    else: # For cncelled and unfinished trials
        folder  = STUDY_PATH
        study_path = [f for f in folder.glob(f"*{dataset_name}*.db") if "full" not in f.name][0]
        study_name = study_path.name.split(".")[0]
        study = optuna.load_study(study_name=study_name, storage=f"sqlite:///{study_path}")
        return study.trials_dataframe()
    
def format_study(study, dataset):
    type = dataset_dict[dataset.name].type
    categorical = dataset_dict[dataset.name].config["data__categorical"]
    if categorical==1.0:
        benchmark = "categorical"
    else:
        benchmark = "numerical"
    benchmark+=f"_{type}_medium"
    study['benchmark'] = benchmark
    study["data__keyword"] = dataset.name
    # try:
    study["duration"] = pd.to_timedelta(study["duration"], errors="coerce")
    study["mean_time"] = study["duration"].dt.total_seconds()/dataset_dict[dataset.name].config["n_iter"]
    # except AttributeError as e:
    #     for i, row in study.iterrows():
    #         try:
    #             study.loc[i, "duration"].dt.total_seconds() 
    #         except AttributeError as e:
    #             print(row["duration"])
    #             raise e
    #     raise e
    rename_dict = {
        "datetime_start": "_timestamp",
        "value": "mean_test_score",
    }
    study.rename(columns=rename_dict, inplace=True)
    study["model_name"] = "GFLU"
    study["model_type"] = "PyTorchTabular"
    param_cols = [c for c in study.columns if c.startswith("params_")]
    study['params']=study[param_cols].apply(dict, axis=1)
    study = study.loc[study.state=="COMPLETE"]
    intersection_cols = list(set(study.columns).intersection(set(sel_columns)))
    study = study.loc[:, intersection_cols+["params"]]
    return study
            

In [12]:
#Add row for each experiment
for dataset in datasets:
    print(dataset.name)
    study = get_study(dataset.name)
    study = format_study(study, dataset)
    experiment_results = pd.concat([experiment_results,study], ignore_index=True)

default-of-credit-card-clients
heloc
eye_movements
Higgs
pol
albert
road-safety
MiniBooNE
covertype
jannis
Bioresponse
cpu_act
Ailerons
yprop_4_1
superconduct
Allstate_Claims_Severity
topo_2_1
Mercedes_Benz_Greener_Manufacturing


In [13]:
experiment_results

Unnamed: 0.1,Unnamed: 0,benchmark,data__keyword,_timestamp,model_name,model_type,one_hot_encoder,mean_time,std_time,mean_test_score,max_test_score,min_test_score,std_test_score,test_scores,times,params
0,,categorical_classification_medium,default-of-credit-card-clients,2023-06-09 13:41:46.516483,GFLU,PyTorchTabular,,14.715429,,0.503480,,,,,,"{'params_gflu_dropout': 0.4753571532049581, 'p..."
1,,categorical_classification_medium,default-of-credit-card-clients,2023-06-09 13:42:30.691539,GFLU,PyTorchTabular,,50.556374,,0.682418,,,,,,"{'params_gflu_dropout': 0.15212112147976886, '..."
2,,categorical_classification_medium,default-of-credit-card-clients,2023-06-09 13:45:02.387713,GFLU,PyTorchTabular,,30.633439,,0.721277,,,,,,"{'params_gflu_dropout': 0.08526206184364576, '..."
3,,categorical_classification_medium,default-of-credit-card-clients,2023-06-09 13:46:34.307562,GFLU,PyTorchTabular,,95.580769,,0.662469,,,,,,"{'params_gflu_dropout': 0.15585553804470548, '..."
4,,categorical_classification_medium,default-of-credit-card-clients,2023-06-09 13:51:21.070666,GFLU,PyTorchTabular,,45.122091,,0.704700,,,,,,"{'params_gflu_dropout': 0.13567451588694796, '..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1626,,categorical_regression_medium,Mercedes_Benz_Greener_Manufacturing,2023-06-06 04:21:03.243799,GFLU,PyTorchTabular,,50.869310,,0.592073,,,,,,"{'params_gflu_dropout': 0.25374588206967824, '..."
1627,,categorical_regression_medium,Mercedes_Benz_Greener_Manufacturing,2023-06-06 04:25:17.620285,GFLU,PyTorchTabular,,36.404743,,0.594517,,,,,,"{'params_gflu_dropout': 0.1803395985027783, 'p..."
1628,,categorical_regression_medium,Mercedes_Benz_Greener_Manufacturing,2023-06-06 04:28:19.667644,GFLU,PyTorchTabular,,88.567702,,0.584889,,,,,,"{'params_gflu_dropout': 0.045087026473713876, ..."
1629,,categorical_regression_medium,Mercedes_Benz_Greener_Manufacturing,2023-06-06 04:35:42.535367,GFLU,PyTorchTabular,,154.126833,,0.593898,,,,,,"{'params_gflu_dropout': 0.10281880884483054, '..."


In [14]:
experiment_results["_timestamp"] = pd.to_datetime(experiment_results["_timestamp"])

In [15]:
experiment_results.to_parquet("data/experiment_results.parquet")

## Results Compilation

In [16]:
def calc_best_row(df):
    if df.shape[0] == 0:
        return df
    best_idx = df.mean_test_score.idxmax()
    best_row = df.loc[best_idx]
    best_row["best"] = True
    return best_row
experiment_results = experiment_results.groupby(["benchmark", "data__keyword", "model_name"]).apply(calc_best_row)

In [17]:
experiment_results = experiment_results.loc[:, ["mean_time", "mean_test_score", "test_scores", "times", "params"]]

In [18]:
experiment_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean_time,mean_test_score,test_scores,times,params
benchmark,data__keyword,model_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
categorical_classification_medium,albert,GFLU,57.64022,0.6853,,,"{'params_gflu_dropout': 0.014136779525448506, ..."
categorical_classification_medium,covertype,GFLU,411.81373,0.9322,,,"{'params_gflu_dropout': 0.0566273509882361, 'p..."
categorical_classification_medium,default-of-credit-card-clients,GFLU,35.262454,0.728023,,,"{'params_gflu_dropout': 0.021064072533635847, ..."
categorical_classification_medium,eye_movements,GFLU,8.093059,0.678936,,,"{'params_gflu_dropout': 0.0111620233049417, 'p..."
categorical_classification_medium,road-safety,GFLU,86.645416,0.8088,,,"{'params_gflu_dropout': 0.06349670783974011, '..."
categorical_regression_medium,Allstate_Claims_Severity,GFLU,172.616248,0.591058,,,"{'params_gflu_dropout': 0.10009198107534531, '..."
categorical_regression_medium,Mercedes_Benz_Greener_Manufacturing,GFLU,166.83606,0.598374,,,"{'params_gflu_dropout': 0.003283795844666519, ..."
categorical_regression_medium,topo_2_1,GFLU,15.962654,0.048455,,,"{'params_gflu_dropout': 0.0573293514747146, 'p..."
numerical_classification_medium,Bioresponse,GFLU,6.701376,0.882231,,,"{'params_gflu_dropout': 0.0008077371137664, 'p..."
numerical_classification_medium,Higgs,GFLU,65.648203,0.7523,,,"{'params_gflu_dropout': 0.0870580844734934, 'p..."


In [19]:
benchmark_results = pd.read_parquet("data/benchmark_best_rows.parquet")

In [20]:
experiment_results = pd.concat([experiment_results, benchmark_results]).sort_values(["benchmark", "data__keyword", "model_name"])

In [21]:
experiment_results.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean_time,mean_test_score,test_scores,times,params
benchmark,data__keyword,model_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
categorical_classification_medium,albert,FT Transformer,94.898633,0.656275,,,
categorical_classification_medium,albert,GFLU,57.64022,0.6853,,,"{'params_gflu_dropout': 0.014136779525448506, ..."
categorical_classification_medium,albert,GradientBoostingTree,3.410833,0.657637,,,
categorical_classification_medium,albert,HistGradientBoostingTree,2.274791,0.657814,,,
categorical_classification_medium,albert,MLP,16.65587,0.653226,,,


In [22]:
experiment_results.to_parquet("data/experiment_results_best_rows.parquet")

In [23]:
test_scores = pd.pivot_table(experiment_results.reset_index(), index=["benchmark", "data__keyword"], columns="model_name", values="mean_test_score")
# format test_scores df so that highest scores is highlighted
test_scores.style.highlight_max(axis=1, color='lightgreen')

Unnamed: 0_level_0,model_name,FT Transformer,GFLU,GradientBoostingTree,HistGradientBoostingTree,MLP,RandomForest,Resnet,SAINT,XGBoost
benchmark,data__keyword,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
categorical_classification_medium,albert,0.656275,0.6853,0.657637,0.657814,0.653226,0.655328,0.652308,0.65518,0.657015
categorical_classification_medium,covertype,0.8593,0.9322,0.85432,0.8498,0.83324,0.85864,0.83962,0.85328,0.86582
categorical_classification_medium,default-of-credit-card-clients,0.719034,0.728023,0.720947,0.71999,0.714132,0.721306,0.714012,0.719034,0.720827
categorical_classification_medium,eye_movements,0.599958,0.678936,0.646654,0.642276,0.605378,0.66208,0.59975,0.605587,0.668543
categorical_classification_medium,road-safety,0.7709,0.8088,0.76308,0.76422,0.7559,0.76126,0.76086,0.76688,0.7689
categorical_regression_medium,Allstate_Claims_Severity,0.520075,0.591058,0.530441,0.527422,0.515831,0.496973,0.514102,0.52602,0.536524
categorical_regression_medium,Mercedes_Benz_Greener_Manufacturing,0.566295,0.598374,0.577604,0.578943,0.559142,0.578128,0.572859,0.564533,0.578725
categorical_regression_medium,topo_2_1,0.053281,0.048455,0.053421,0.07309,0.041507,0.073649,0.050602,0.060504,0.069356
numerical_classification_medium,Bioresponse,0.758172,0.882231,0.785873,,0.767036,0.798615,0.770637,0.768698,0.793075
numerical_classification_medium,Higgs,0.70608,0.7523,0.71044,,0.6897,0.7093,0.69478,0.70822,0.71366


In [24]:
mean_times = pd.pivot_table(experiment_results.reset_index(), index=["benchmark", "data__keyword"], columns="model_name", values="mean_time")
# format mean_times df with a red amber green color scale - reversed
mean_times.style.background_gradient(cmap='RdYlGn_r')

Unnamed: 0_level_0,model_name,FT Transformer,GFLU,GradientBoostingTree,HistGradientBoostingTree,MLP,RandomForest,Resnet,SAINT,XGBoost
benchmark,data__keyword,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
categorical_classification_medium,albert,94.898633,57.64022,3.410833,2.274791,16.65587,4.253269,13.789507,37.515511,25.653255
categorical_classification_medium,covertype,386.784938,411.81373,12.805335,8.320776,68.779024,20.238751,106.134382,221.959233,33.731095
categorical_classification_medium,default-of-credit-card-clients,91.293522,35.262454,2.231475,0.301504,11.068138,3.368987,71.250359,49.074334,21.543752
categorical_classification_medium,eye_movements,15.353444,8.093059,24.199983,2.077334,7.96365,12.325054,4.670044,25.859672,26.155533
categorical_classification_medium,road-safety,39.684858,86.645416,20.735888,3.274366,15.134659,33.492952,23.724616,345.791362,57.066262
categorical_regression_medium,Allstate_Claims_Severity,1008.986509,172.616248,78.04967,24.772782,250.906168,693.146111,51.009762,154.72934,44.353991
categorical_regression_medium,Mercedes_Benz_Greener_Manufacturing,483.600141,166.83606,19.304454,0.385985,96.789895,5.575388,115.415073,580.619575,29.222906
categorical_regression_medium,topo_2_1,1655.656162,15.962654,4.82059,3.113014,8.172072,158.55744,223.990097,1521.665207,55.305754
numerical_classification_medium,Bioresponse,147.998933,6.701376,20.883669,,4.236628,3.458842,27.532449,425.207276,16.934164
numerical_classification_medium,Higgs,46.087328,65.648203,21.281016,,11.306024,26.721154,30.591507,144.051194,25.226039
