In [2]:
import pandas as pd
import glob

In [3]:
classification_datasets = ['diabetes','banana','breast_cancer','breast']
classification_models = ['LogisticRegression','LogisticGAM','ExplainableBoostingClassifierNo','ExplainableBoostingClassifier','XGBClassifier','SymbolicClassifier']


In [4]:
# Iterate over the classification datasets and models, read the data, and add a row to the results dataframe

data_list = []
for dataset in classification_datasets:
    for model in classification_models:
        files = glob.glob(f'../results/{dataset}_{model}_*.csv')
        print(files)
        assert len(files) <= 1
        if len(files) == 0:
            continue
        file = files[0]
        data = pd.read_csv(file)
        data_dict = {'dataset': dataset, 'model': model, 'score_mean': data['score'].mean(), 'score_std': data['score'].std(), 'time': data['time'].sum()}
        data_list.append(data_dict)

df = pd.DataFrame(data_list, columns=['dataset','model','score_mean','score_std','time'])

df['mean_std'] = df.apply(lambda row: f"{row['score_mean']:.3f} ({row['score_std']:.3f})", axis=1)

# Now pivot the DataFrame
pivot_df = df.pivot(index='model', columns='dataset', values='mean_std')

print(pivot_df)




['../results\\diabetes_LogisticRegression_20240307-151422.csv']
['../results\\diabetes_LogisticGAM_20240307-163451.csv']
['../results\\diabetes_ExplainableBoostingClassifierNo_20240307-152346.csv']
['../results\\diabetes_ExplainableBoostingClassifier_20240307-151437.csv']
['../results\\diabetes_XGBClassifier_20240307-151424.csv']
['../results\\diabetes_SymbolicClassifier_20231203-003154.csv']
['../results\\banana_LogisticRegression_20240307-152359.csv']
['../results\\banana_LogisticGAM_20240307-185936.csv']
['../results\\banana_ExplainableBoostingClassifierNo_20240307-185945.csv']
['../results\\banana_ExplainableBoostingClassifier_20240307-152412.csv']
['../results\\banana_XGBClassifier_20240307-152400.csv']
['../results\\banana_SymbolicClassifier_20231203-170915.csv']
['../results\\breast_cancer_LogisticRegression_20240307-163847.csv']
['../results\\breast_cancer_LogisticGAM_20240307-163754.csv']
['../results\\breast_cancer_ExplainableBoostingClassifierNo_20240307-163934.csv']
['../re

In [5]:
pivot_df

dataset,banana,breast,breast_cancer,diabetes
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ExplainableBoostingClassifier,0.957 (0.001),0.997 (0.001),0.651 (0.007),0.847 (0.003)
ExplainableBoostingClassifierNo,0.800 (0.001),0.995 (0.001),0.645 (0.014),0.857 (0.004)
LogisticGAM,0.804 (0.000),0.992 (0.000),0.650 (0.000),0.862 (0.000)
LogisticRegression,0.555 (0.000),0.997 (0.000),0.595 (0.000),0.850 (0.000)
SymbolicClassifier,0.915 (0.000),0.998 (0.001),0.668 (0.032),0.846 (0.010)
XGBClassifier,0.800 (0.002),0.995 (0.002),0.652 (0.028),0.847 (0.002)


In [6]:
columns_in_order = ['banana', 'breast_cancer', 'breast', 'diabetes']
column_names = ['banana', 'cancer', 'breast', 'diabetes']

ready_df = pivot_df[columns_in_order]
ready_df.columns = column_names

models_in_order = ['LogisticRegression', 'LogisticGAM', 'ExplainableBoostingClassifierNo', 'ExplainableBoostingClassifier', 'XGBClassifier', 'SymbolicClassifier']
ready_df = ready_df.reindex(models_in_order)
model_names = ['Linear', 'GAM-S', 'EBM-1', 'EBM-2', 'XGBoost', 'SHARE']
ready_df.index = model_names

# Remove the index names
ready_df.index.name = None
ready_df.columns.name = None


# Format to latex
print(ready_df.to_latex())

# Save to tex file
with open('tables/Table_13.tex', 'w') as f:
    f.write(ready_df.to_latex())

\begin{tabular}{lllll}
\toprule
{} &         banana &         cancer &         breast &       diabetes \\
\midrule
Linear  &  0.555 (0.000) &  0.595 (0.000) &  0.997 (0.000) &  0.850 (0.000) \\
GAM-S   &  0.804 (0.000) &  0.650 (0.000) &  0.992 (0.000) &  0.862 (0.000) \\
EBM-1   &  0.800 (0.001) &  0.645 (0.014) &  0.995 (0.001) &  0.857 (0.004) \\
EBM-2   &  0.957 (0.001) &  0.651 (0.007) &  0.997 (0.001) &  0.847 (0.003) \\
XGBoost &  0.800 (0.002) &  0.652 (0.028) &  0.995 (0.002) &  0.847 (0.002) \\
SHARE   &  0.915 (0.000) &  0.668 (0.032) &  0.998 (0.001) &  0.846 (0.010) \\
\bottomrule
\end{tabular}



  print(ready_df.to_latex())
  f.write(ready_df.to_latex())
