In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from pathlib import Path
import optuna
import joblib
from collections import namedtuple

In [2]:
Data = namedtuple('Data', ["type","name", "path", "config"])

In [3]:
DATA_PATH = Path("data")

In [4]:
experiments = [
 'classification|default-of-credit-card-clients',
 'classification|heloc',
 'classification|eye_movements',
 'classification|Higgs',
 'classification|pol',
 'classification|albert',
 'classification|road-safety',
 'classification|MiniBooNE',
 'classification|covertype',
 'classification|jannis',
 'classification|Bioresponse',
 'regression|cpu_act',
 'regression|Ailerons',
 'regression|yprop_4_1',
 'regression|superconduct',
 'regression|Allstate_Claims_Severity',
 'regression|topo_2_1',
 'regression|Mercedes_Benz_Greener_Manufacturing',
]

# experiments = [Experiment(*d.split("|")) for d in experiments]

In [5]:
datasets = []
for experiment in experiments:
    typ, name = experiment.split("|")
    folder = DATA_PATH/name
    config_files = list(folder.glob("*config*"))
    config = np.load(config_files[0], allow_pickle=True).item()
    datasets.append(Data(typ, name, folder, config))

In [6]:
dataset_names = [d.name for d in datasets]
dataset_names

['default-of-credit-card-clients',
 'heloc',
 'eye_movements',
 'Higgs',
 'pol',
 'albert',
 'road-safety',
 'MiniBooNE',
 'covertype',
 'jannis',
 'Bioresponse',
 'cpu_act',
 'Ailerons',
 'yprop_4_1',
 'superconduct',
 'Allstate_Claims_Severity',
 'topo_2_1',
 'Mercedes_Benz_Greener_Manufacturing']

In [7]:
benchmark = pd.read_csv('data/benchmark_total.csv')

  benchmark = pd.read_csv('data/benchmark_total.csv')


In [8]:
benchmark.head()

Unnamed: 0.1,...1,Unnamed: 0,_runtime,_step,_timestamp,_wandb,data__categorical,data__keyword,data__method_name,data__regression,...,...195,...196,...197,...198,...199,...200,...201,...142,...146,...148
0,10.0,10.0,11.961134,0.0,1672953000.0,{'runtime': 11},0.0,jannis,openml_no_transform,0.0,...,,,,,,,,,,
1,12.0,12.0,20.292929,0.0,1672953000.0,{'runtime': 19},0.0,electricity,openml_no_transform,0.0,...,,,,,,,,,,
2,13.0,13.0,10.445445,0.0,1672953000.0,{'runtime': 9},0.0,heloc,openml_no_transform,0.0,...,,,,,,,,,,
3,17.0,17.0,10.317346,0.0,1672953000.0,{'runtime': 9},0.0,california,openml_no_transform,0.0,...,,,,,,,,,,
4,18.0,18.0,24.260395,0.0,1672953000.0,{'runtime': 23},0.0,jannis,openml_no_transform,0.0,...,,,,,,,,,,


In [9]:
# Selecting only openml_no_transform
benchmark  = benchmark .loc[benchmark.data__method_name == "openml_no_transform"]
benchmark .drop(columns=['data__method_name'], inplace=True)
# Selecting only transformed_target == 0
benchmark  = benchmark .loc[benchmark.transformed_target == 0]
benchmark .drop(columns=['transformed_target'], inplace=True)
# selecting only hp="random"
benchmark  = benchmark .loc[benchmark.hp == "random"]
benchmark .drop(columns=['hp'], inplace=True)

In [10]:
sel_columns = [
 'Unnamed: 0',
 'benchmark',
 'data__keyword',
 '_timestamp',
 'model_name',
 'model_type',
 'one_hot_encoder',

 'mean_time',
 'std_time',

 'mean_test_score',

 'max_test_score',
 'min_test_score',
 'std_test_score',

 'test_scores',
 'times',
 ]

In [11]:
benchmark = benchmark.loc[:, sel_columns]

In [12]:
benchmark_l = []
for d in datasets:
    name_mask = benchmark.data__keyword == d.name
    type_mask = benchmark.benchmark.str.contains(d.type)
    cat_mask = benchmark.benchmark.str.contains("categorical") if d.config["data__categorical"] else benchmark.benchmark.str.contains("numerical")
    df = benchmark.loc[name_mask & type_mask & cat_mask]
    benchmark_l.append(df)


In [13]:
benchmark = pd.concat(benchmark_l)

In [14]:
benchmark[['benchmark', 'data__keyword']].drop_duplicates().sort_values("benchmark")

Unnamed: 0,benchmark,data__keyword
96572,categorical_classification_medium,default-of-credit-card-clients
96551,categorical_classification_medium,road-safety
96552,categorical_classification_medium,albert
96554,categorical_classification_medium,covertype
96586,categorical_classification_medium,eye_movements
249133,categorical_regression_medium,topo_2_1
249189,categorical_regression_medium,Allstate_Claims_Severity
249209,categorical_regression_medium,Mercedes_Benz_Greener_Manufacturing
11833,numerical_classification_medium,pol
33,numerical_classification_medium,heloc


In [18]:
benchmark['mean_test_score'] = pd.to_numeric(benchmark['mean_test_score'], errors="coerce")


In [19]:
benchmark.to_parquet("data/benchmark_total.parquet")

## Results Compilation

In [21]:
POL_CLS_UUID = "66dc899c4c4f4e4f822d02bdddb18f80"

In [26]:
def calc_best_row(df):
    if df.shape[0] == 0:
        return df
    best_idx = df.mean_test_score.idxmax()
    best_row = df.loc[best_idx]
    best_row["best"] = True
    return best_row
benchmark = benchmark.groupby(["benchmark", "data__keyword", "model_name"]).apply(calc_best_row)

In [28]:
benchmark = benchmark.loc[:, ["mean_time", "mean_test_score", "test_scores", "times"]]

In [29]:
benchmark

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean_time,mean_test_score,test_scores,times
benchmark,data__keyword,model_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
categorical_classification_medium,albert,FT Transformer,94.898633,0.656275,,
categorical_classification_medium,albert,GradientBoostingTree,3.410833,0.657637,,
categorical_classification_medium,albert,HistGradientBoostingTree,2.274791,0.657814,,
categorical_classification_medium,albert,MLP,16.655870,0.653226,,
categorical_classification_medium,albert,RandomForest,4.253269,0.655328,,
...,...,...,...,...,...,...
numerical_regression_medium,yprop_4_1,MLP,9.533991,0.022334,"[0.0271595548838377, 0.028096886351704597, 0.0...","[11.402910470962524, 8.255438089370728, 8.9436..."
numerical_regression_medium,yprop_4_1,RandomForest,4.994533,0.093881,"[0.026174030891650664, 0.027302874929199757, 0...","[6.466503143310547, 4.327132701873779, 4.18996..."
numerical_regression_medium,yprop_4_1,Resnet,45.061062,0.042830,"[0.02674437314271927, 0.028033148497343063, 0....","[47.17852449417114, 36.85503840446472, 51.1496..."
numerical_regression_medium,yprop_4_1,SAINT,180.976444,0.059454,"[0.026659198554938267, 0.0277189787766971, 0.0...","[207.14980340003967, 146.79630780220032, 188.9..."


In [30]:
benchmark.to_parquet("data/benchmark_best_rows.parquet")