In [1]:
import os
import pandas as pd

In [2]:
df_all = []
for file_name in os.listdir("../results/"):
    if file_name.endswith(".csv"):
        df = pd.read_csv("../results/"+file_name)
        df["method"] = file_name.split(".")[0]
        df_all.append(df)

df_all = pd.concat(df_all)

In [3]:
col_map = {
    'parameters/disc_model/_target_': "clf",
    'parameters/dataset/_target_': "dataset",
    'method': 'method',
    'metrics/cf/model_returned_smth': 'coverage',
    'metrics/cf/valid_cf_disc': 'validity',
    'metrics/cf/dissimilarity_proximity_continuous_euclidean': 'proximity_{cont\_eucl}',
    'metrics/cf/dissimilarity_proximity_continuous_mad': 'proximity_{cont\_mad}',
    'metrics/cf/dissimilarity_proximity_continuous_manhatan': 'proximity_{cont\_manh}',
    'metrics/cf/dissimilarity_proximity_categorical_hamming': 'proximity_{cat\_hamm}',
    'metrics/cf/dissimilarity_proximity_categorical_jaccard': 'proximity_{cat_\jacc}',
    'metrics/cf/kde_log_density': 'kde\_log\_dens',
    'metrics/cf/plausibility': 'plausibility',
    'metrics/cf/sparsity': 'sparsity',
    'metrics/cf/distance_l2_jaccard': 'distance_{l2\_jaccard}',
    'metrics/cf/distance_mad_hamming': 'distance_{mad\_hamming}',
}

clf_map = {
    "LogisticRegression": "LR",
    "MLPClassifier": "MLP",
}

In [4]:
df_all.rename(columns=col_map, inplace=True)
df_all = df_all[col_map.values()]
df_all.dropna(subset=["clf", "dataset"], inplace=True)
df_all["clf"] = df_all["clf"].apply(lambda x: x.split(".")[-1])
df_all["clf"] = df_all["clf"].apply(lambda x: clf_map[x] if x in clf_map else x)
df_all["dataset"] = df_all["dataset"].apply(lambda x: x.split(".")[-1].removesuffix("Dataset"))

In [5]:
def pick_best_cf(df):
    return df.sort_values("validity", ascending=False).iloc[0]
    
df_all = df_all.groupby(["dataset", "clf", "method"], as_index=False).apply(pick_best_cf)

In [6]:
df_all = df_all.round(3)
df_all = df_all.style.format(precision=3)

In [7]:
print(df_all.to_latex())

\begin{tabular}{llllrrrrrrrrrrrr}
 & clf & dataset & method & coverage & validity & proximity_{cont\_eucl} & proximity_{cont\_mad} & proximity_{cont\_manh} & proximity_{cat\_hamm} & proximity_{cat_\jacc} & kde\_log\_dens & plausibility & sparsity & distance_{l2\_jaccard} & distance_{mad\_hamming} \\
0 & LR & Adult & artelth & nan & nan & nan & nan & nan & nan & nan & nan & nan & nan & nan & nan \\
1 & LR & Adult & cbce & 1.000 & 1.000 & 0.240 & 3.269 & 0.288 & 0.193 & 0.569 & 33.362 & 0.406 & 0.242 & nan & nan \\
2 & LR & Adult & gs23 & nan & 0.650 & 0.203 & 3.200 & 0.257 & 0.882 & 0.882 & -17.397 & 1.058 & 0.882 & 0.832 & 1.054 \\
3 & MLP & Adult & cegp & 1.000 & 1.000 & 0.228 & 3.835 & 0.284 & 0.135 & 0.351 & 20.221 & 0.449 & 0.190 & nan & nan \\
4 & MLP & Adult & cem & 1.000 & 1.000 & 0.334 & 5.612 & 0.396 & 0.079 & 0.211 & 15.678 & 0.540 & 0.124 & nan & nan \\
5 & MLP & Adult & gs23 & 1.000 & 0.739 & 0.235 & 3.572 & 0.295 & 1.000 & 1.000 & -15.705 & 1.186 & 1.000 & 0.943 & 1.191 \\

In [8]:
df_all

Unnamed: 0,clf,dataset,method,coverage,validity,proximity_{cont\_eucl},proximity_{cont\_mad},proximity_{cont\_manh},proximity_{cat\_hamm},proximity_{cat_\jacc},kde\_log\_dens,plausibility,sparsity,distance_{l2\_jaccard},distance_{mad\_hamming}
0,LR,Adult,artelth,,,,,,,,,,,,
1,LR,Adult,cbce,1.0,1.0,0.24,3.269,0.288,0.193,0.569,33.362,0.406,0.242,,
2,LR,Adult,gs23,,0.65,0.203,3.2,0.257,0.882,0.882,-17.397,1.058,0.882,0.832,1.054
3,MLP,Adult,cegp,1.0,1.0,0.228,3.835,0.284,0.135,0.351,20.221,0.449,0.19,,
4,MLP,Adult,cem,1.0,1.0,0.334,5.612,0.396,0.079,0.211,15.678,0.54,0.124,,
5,MLP,Adult,gs23,1.0,0.739,0.235,3.572,0.295,1.0,1.0,-15.705,1.186,1.0,0.943,1.191
6,LR,Compas,artelth,0.461,1.0,0.219,4.164,0.219,1.0,1.0,12.115,3.866,1.0,,
7,LR,Compas,cbce,1.0,1.0,1.231,29.247,2.001,0.29,0.596,17.039,13.534,0.451,,
8,LR,Compas,gs23,1.0,0.843,0.706,74.103,1.422,1.0,1.0,-0.162,34.885,1.0,0.863,35.115
9,MLP,Compas,cegp,1.0,1.0,0.15,43.907,0.232,0.024,0.053,13.037,20.708,0.207,,
