In [2]:
import pandas as pd
import glob

classification_datasets = ['diabetes','banana','breast_cancer','breast']
classification_models = ['LogisticRegression','LogisticGAM','ExplainableBoostingClassifierNo','ExplainableBoostingClassifier','XGBClassifier','SymbolicClassifier']


In [7]:
# Iterate over the classification datasets and models, read the data, and add a row to the results dataframe

data_list = []
for dataset in classification_datasets:
    for model in classification_models:
        files = glob.glob(f'../results/{dataset}_{model}_*.csv')
        print(files)
        assert len(files) <= 1
        if len(files) == 0:
            continue
        file = files[0]
        data = pd.read_csv(file)
        data_dict = {'dataset': dataset, 'model': model, 'score_mean': data['score'].mean(), 'score_std': data['score'].std(), 'time': f"{data['time'].mean():.2f}"}
        data_list.append(data_dict)

df = pd.DataFrame(data_list, columns=['dataset','model','score_mean','score_std','time'])

# Now pivot the DataFrame
pivot_df = df.pivot(index='model', columns='dataset', values='time')

print(pivot_df)




['../results\\diabetes_LogisticRegression_20240307-151422.csv']
['../results\\diabetes_LogisticGAM_20240307-163451.csv']
['../results\\diabetes_ExplainableBoostingClassifierNo_20240307-152346.csv']
['../results\\diabetes_ExplainableBoostingClassifier_20240307-151437.csv']
['../results\\diabetes_XGBClassifier_20240307-151424.csv']
['../results\\diabetes_SymbolicClassifier_20231203-003154.csv']
['../results\\banana_LogisticRegression_20240307-152359.csv']
['../results\\banana_LogisticGAM_20240307-185936.csv']
['../results\\banana_ExplainableBoostingClassifierNo_20240307-185945.csv']
['../results\\banana_ExplainableBoostingClassifier_20240307-152412.csv']
['../results\\banana_XGBClassifier_20240307-152400.csv']
['../results\\banana_SymbolicClassifier_20231203-170915.csv']
['../results\\breast_cancer_LogisticRegression_20240307-163847.csv']
['../results\\breast_cancer_LogisticGAM_20240307-163754.csv']
['../results\\breast_cancer_ExplainableBoostingClassifierNo_20240307-163934.csv']
['../re

In [8]:
columns_in_order = ['banana', 'breast_cancer', 'breast', 'diabetes']
column_names = ['banana', 'cancer', 'breast', 'diabetes']

ready_df = pivot_df[columns_in_order]
ready_df.columns = column_names

models_in_order = ['LogisticRegression', 'LogisticGAM', 'ExplainableBoostingClassifierNo', 'ExplainableBoostingClassifier', 'XGBClassifier', 'SymbolicClassifier']
ready_df = ready_df.reindex(models_in_order)
model_names = ['Linear', 'GAM-S', 'EBM-1', 'EBM-2', 'XGBoost', 'SHARE']
ready_df.index = model_names

# Remove the index names
ready_df.index.name = None
ready_df.columns.name = None


# Format to latex
print(ready_df.to_latex())

# Save to tex file
with open('tables/Table_14.tex', 'w') as f:
    f.write(ready_df.to_latex())

\begin{tabular}{lllll}
\toprule
{} &  banana &   cancer &   breast & diabetes \\
\midrule
Linear  &    0.01 &     0.00 &     0.00 &     0.01 \\
GAM-S   &    0.09 &     0.47 &     0.17 &     0.09 \\
EBM-1   &    0.57 &     0.04 &     0.08 &     0.08 \\
EBM-2   &    6.66 &     0.20 &     0.53 &     1.17 \\
XGBoost &    0.09 &     0.01 &     0.02 &     0.05 \\
SHARE   &  874.04 &  2321.60 &  7535.68 &  6133.11 \\
\bottomrule
\end{tabular}



  print(ready_df.to_latex())
  f.write(ready_df.to_latex())


In [9]:
all_time = 0
data_list = []
for dataset in classification_datasets:
    for model in classification_models:
        files = glob.glob(f'../results/{dataset}_{model}_*.csv')
        print(files)
        assert len(files) <= 1
        if len(files) == 0:
            continue
        file = files[0]
        data = pd.read_csv(file)
        all_time = data['time'].sum()

# Read the time from the files
file_names = ["../results/Figure_4_time.txt", "../results/Figure_5_time.txt", "../results/Figure_6_time.txt", "../results/Figure_7_time.txt","../results/Table_1_time.txt","../results/Table_3_time.txt"]
for file_name in file_names:
    with open(file_name, 'r') as file:
        time = float(file.read())
        all_time += time


['../results\\diabetes_LogisticRegression_20240307-151422.csv']
['../results\\diabetes_LogisticGAM_20240307-163451.csv']
['../results\\diabetes_ExplainableBoostingClassifierNo_20240307-152346.csv']
['../results\\diabetes_ExplainableBoostingClassifier_20240307-151437.csv']
['../results\\diabetes_XGBClassifier_20240307-151424.csv']
['../results\\diabetes_SymbolicClassifier_20231203-003154.csv']
['../results\\banana_LogisticRegression_20240307-152359.csv']
['../results\\banana_LogisticGAM_20240307-185936.csv']
['../results\\banana_ExplainableBoostingClassifierNo_20240307-185945.csv']
['../results\\banana_ExplainableBoostingClassifier_20240307-152412.csv']
['../results\\banana_XGBClassifier_20240307-152400.csv']
['../results\\banana_SymbolicClassifier_20231203-170915.csv']
['../results\\breast_cancer_LogisticRegression_20240307-163847.csv']
['../results\\breast_cancer_LogisticGAM_20240307-163754.csv']
['../results\\breast_cancer_ExplainableBoostingClassifierNo_20240307-163934.csv']
['../re

In [11]:
print(f"Total time: {all_time:.2f}s")
print(f"Total time: {all_time/3600:.2f}h")

Total time: 52918.82s
Total time: 14.70h
