In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
dataset_name = "MoonsDataset"
experiment_output_folder = "../models"
METHOD = "ppcef"
disc_model = "MultinomialLogisticRegression"  # MultilayerPerceptron
columns = {
    "dataset": "dataset",
    "method": "method",
    "model_returned_smth": "Coverage",
    "valid_cf_disc": "validity",
    "flow_prob_condition_acc": "Prob. Plaus.",
    "lof_scores_cfs": "LOF",
    "isolation_forest_scores_cfs": "IsoForest",
    "flow_log_density_cfs": "Log Dens.",
    "dissimilarity_proximity_continuous_manhatan": "L1",
    "dissimilarity_proximity_continuous_euclidean": "L2",
    "time": "Time",
}

In [3]:
# First batch
datasets = [
    "MoonsDataset",
    "LawDataset",
    "AuditDataset",
    "HelocDataset",
    "BlobsDataset",
    "DigitsDataset",
    "WineDataset",
]
methods = [
    "cbce",
    "CEGP",
    "CEM",
    "wach",
    "ppcef",
]  #  "artelth20",
df_results = pd.DataFrame()
for dataset_name in datasets:
    for method in methods:
        output_folder = os.path.join(experiment_output_folder, dataset_name)
        os.makedirs(output_folder, exist_ok=True)
        save_folder = os.path.join(output_folder, method)
        os.makedirs(save_folder, exist_ok=True)

        df_part_results = pd.read_csv(
            os.path.join(save_folder, f"metrics_{disc_model}_cv.csv")
        )
        means = df_part_results.iloc[0]
        output = {k: None for k in columns.values()}
        # output = dict()
        for key, value in columns.items():
            if value == "IsoForest":
                output[value] = f"{means.get(key, np.nan):.3f}"
            elif value == "LOF":
                if means.get(key, float("inf")) < 10:
                    output[value] = f"{means.get(key, np.nan):.2f}"
                else:
                    output[value] = f"{means.get(key, np.nan):.2e}"
            else:
                output[value] = f"{means.get(key, np.nan):.2f}"
        output["dataset"] = dataset_name.removesuffix("Dataset")
        output["method"] = method
        df_part_results = (
            pd.Series(output).to_frame().T.rename(columns=columns)[columns.values()]
        )

        df_results = pd.concat([df_results, df_part_results], axis=0, ignore_index=True)

In [4]:
df_results

Unnamed: 0,dataset,method,Coverage,validity,Prob. Plaus.,LOF,IsoForest,Log Dens.,L1,L2,Time
0,Moons,cbce,1.0,1.0,0.0,6130000.0,0.026,-7.29,0.62,0.48,0.08
1,Moons,CEGP,1.0,0.36,0.0,1.06,0.03,-10.35,0.39,0.3,0.0
2,Moons,CEM,1.0,0.79,0.18,1.07,-0.054,-6.92,0.7,0.61,0.0
3,Moons,wach,1.0,1.0,0.11,1.07,0.002,-8.85,0.52,0.37,0.0
4,Moons,ppcef,1.0,1.0,1.0,1.12,0.037,1.69,0.48,0.38,1.37
5,Law,cbce,1.0,1.0,1.0,2920000.0,0.065,1.97,0.59,0.39,0.22
6,Law,CEGP,1.0,1.0,0.48,1.06,0.043,1.01,0.21,0.18,0.0
7,Law,CEM,1.0,0.86,0.28,1.06,-0.017,-0.26,0.45,0.37,0.0
8,Law,wach,1.0,1.0,0.48,1.06,0.003,-0.04,0.48,0.35,0.0
9,Law,ppcef,1.0,1.0,1.0,1.06,0.061,2.06,0.38,0.23,7.56


In [5]:
print(df_results.to_latex(index=False))

\begin{tabular}{lllllllllll}
\toprule
dataset & method & Coverage & validity & Prob. Plaus. & LOF & IsoForest & Log Dens. & L1 & L2 & Time \\
\midrule
Moons & cbce & 1.00 & 1.00 & 0.00 & 6.13e+06 & 0.026 & -7.29 & 0.62 & 0.48 & 0.08 \\
Moons & CEGP & 1.00 & 0.36 & 0.00 & 1.06 & 0.030 & -10.35 & 0.39 & 0.30 & 0.00 \\
Moons & CEM & 1.00 & 0.79 & 0.18 & 1.07 & -0.054 & -6.92 & 0.70 & 0.61 & 0.00 \\
Moons & wach & 1.00 & 1.00 & 0.11 & 1.07 & 0.002 & -8.85 & 0.52 & 0.37 & 0.00 \\
Moons & ppcef & 1.00 & 1.00 & 1.00 & 1.12 & 0.037 & 1.69 & 0.48 & 0.38 & 1.37 \\
Law & cbce & 1.00 & 1.00 & 1.00 & 2.92e+06 & 0.065 & 1.97 & 0.59 & 0.39 & 0.22 \\
Law & CEGP & 1.00 & 1.00 & 0.48 & 1.06 & 0.043 & 1.01 & 0.21 & 0.18 & 0.00 \\
Law & CEM & 1.00 & 0.86 & 0.28 & 1.06 & -0.017 & -0.26 & 0.45 & 0.37 & 0.00 \\
Law & wach & 1.00 & 1.00 & 0.48 & 1.06 & 0.003 & -0.04 & 0.48 & 0.35 & 0.00 \\
Law & ppcef & 1.00 & 1.00 & 1.00 & 1.06 & 0.061 & 2.06 & 0.38 & 0.23 & 7.56 \\
Audit & cbce & 1.00 & 0.90 & 0.02 & 4.02e+

In [6]:
# All batches
datasets = [
    "MoonsDataset",
    "LawDataset",
    "AuditDataset",
    "HelocDataset",
    "BlobsDataset",
    "DigitsDataset",
    "WineDataset",
]
methods = ["cbce", "CEGP", "CEM", "wach", "artelth20", "ppcef"]  #
df_results = pd.DataFrame()
for dataset_name in datasets:
    for method in methods:
        output_folder = os.path.join(experiment_output_folder, dataset_name)
        os.makedirs(output_folder, exist_ok=True)
        save_folder = os.path.join(output_folder, method)
        os.makedirs(save_folder, exist_ok=True)

        df_part_results = pd.read_csv(
            os.path.join(save_folder, f"metrics_{disc_model}_cv.csv")
        )
        means = df_part_results.mean().round(3).to_dict()
        stds = df_part_results.std().round(3).to_dict()
        output = {k: None for k in columns.values()}
        # output = dict()
        for key, value in columns.items():
            if value == "IsoForest":
                output[value] = (
                    f"{means.get(key, np.nan):.3f}$\pm${stds.get(key, np.nan):.3f}"
                )
            elif value == "LOF":
                if means.get(key, float("inf")) < 10:
                    output[value] = (
                        f"{means.get(key, np.nan):.2f}$\pm${stds.get(key, np.nan):.2f}"
                    )
                else:
                    output[value] = (
                        f"{means.get(key, np.nan):.2e}$\pm${stds.get(key, np.nan):.2e}"
                    )
            else:
                output[value] = (
                    f"{means.get(key, np.nan):.2f}$\pm${stds.get(key, np.nan):.2f}"
                )
        output["dataset"] = dataset_name.removesuffix("Dataset")
        output["method"] = method
        df_part_results = (
            pd.Series(output).to_frame().T.rename(columns=columns)[columns.values()]
        )

        df_results = pd.concat([df_results, df_part_results], axis=0, ignore_index=True)

In [7]:
df_results

Unnamed: 0,dataset,method,Coverage,validity,Prob. Plaus.,LOF,IsoForest,Log Dens.,L1,L2,Time
0,Moons,cbce,1.00$\pm$0.00,1.00$\pm$0.00,0.10$\pm$0.23,5.82e+06$\pm$3.60e+05,0.028$\pm$0.003,-5.78$\pm$3.69,0.62$\pm$0.07,0.48$\pm$0.05,0.06$\pm$0.01
1,Moons,CEGP,1.00$\pm$nan,0.36$\pm$nan,0.00$\pm$nan,1.06$\pm$nan,0.030$\pm$nan,-10.35$\pm$nan,0.39$\pm$nan,0.30$\pm$nan,0.00$\pm$nan
2,Moons,CEM,1.00$\pm$0.00,0.88$\pm$0.16,0.10$\pm$0.07,1.07$\pm$0.01,-0.085$\pm$0.020,-8.50$\pm$3.53,0.63$\pm$0.08,0.57$\pm$0.06,0.00$\pm$0.00
3,Moons,wach,1.00$\pm$0.00,0.96$\pm$0.07,0.10$\pm$0.03,1.07$\pm$0.00,0.003$\pm$0.005,-5.91$\pm$1.93,0.50$\pm$0.03,0.35$\pm$0.02,0.00$\pm$0.00
4,Moons,artelth20,1.00$\pm$0.00,1.00$\pm$0.00,0.08$\pm$0.05,1.07$\pm$0.00,-0.031$\pm$0.008,-10.96$\pm$6.52,0.34$\pm$0.04,0.34$\pm$0.04,4.07$\pm$0.43
5,Moons,ppcef,1.00$\pm$0.00,1.00$\pm$0.00,1.00$\pm$0.00,1.15$\pm$0.04,0.034$\pm$0.004,1.71$\pm$0.10,0.48$\pm$0.04,0.38$\pm$0.03,1.34$\pm$0.16
6,Law,cbce,1.00$\pm$0.00,1.00$\pm$0.00,0.50$\pm$0.35,4.22e+06$\pm$8.81e+05,0.045$\pm$0.021,1.29$\pm$0.41,0.61$\pm$0.03,0.40$\pm$0.02,0.22$\pm$0.00
7,Law,CEGP,1.00$\pm$nan,1.00$\pm$nan,0.47$\pm$nan,1.06$\pm$nan,0.043$\pm$nan,1.01$\pm$nan,0.21$\pm$nan,0.18$\pm$nan,0.00$\pm$nan
8,Law,CEM,1.00$\pm$nan,0.86$\pm$nan,0.28$\pm$nan,1.06$\pm$nan,-0.017$\pm$nan,-0.26$\pm$nan,0.45$\pm$nan,0.37$\pm$nan,0.00$\pm$nan
9,Law,wach,1.00$\pm$0.00,1.00$\pm$0.00,0.46$\pm$0.05,1.05$\pm$0.00,-0.001$\pm$0.009,-0.17$\pm$0.25,0.47$\pm$0.01,0.35$\pm$0.01,0.00$\pm$0.00


In [8]:
print(df_results.to_latex(index=False))

\begin{tabular}{lllllllllll}
\toprule
dataset & method & Coverage & validity & Prob. Plaus. & LOF & IsoForest & Log Dens. & L1 & L2 & Time \\
\midrule
Moons & cbce & 1.00$\pm$0.00 & 1.00$\pm$0.00 & 0.10$\pm$0.23 & 5.82e+06$\pm$3.60e+05 & 0.028$\pm$0.003 & -5.78$\pm$3.69 & 0.62$\pm$0.07 & 0.48$\pm$0.05 & 0.06$\pm$0.01 \\
Moons & CEGP & 1.00$\pm$nan & 0.36$\pm$nan & 0.00$\pm$nan & 1.06$\pm$nan & 0.030$\pm$nan & -10.35$\pm$nan & 0.39$\pm$nan & 0.30$\pm$nan & 0.00$\pm$nan \\
Moons & CEM & 1.00$\pm$0.00 & 0.88$\pm$0.16 & 0.10$\pm$0.07 & 1.07$\pm$0.01 & -0.085$\pm$0.020 & -8.50$\pm$3.53 & 0.63$\pm$0.08 & 0.57$\pm$0.06 & 0.00$\pm$0.00 \\
Moons & wach & 1.00$\pm$0.00 & 0.96$\pm$0.07 & 0.10$\pm$0.03 & 1.07$\pm$0.00 & 0.003$\pm$0.005 & -5.91$\pm$1.93 & 0.50$\pm$0.03 & 0.35$\pm$0.02 & 0.00$\pm$0.00 \\
Moons & artelth20 & 1.00$\pm$0.00 & 1.00$\pm$0.00 & 0.08$\pm$0.05 & 1.07$\pm$0.00 & -0.031$\pm$0.008 & -10.96$\pm$6.52 & 0.34$\pm$0.04 & 0.34$\pm$0.04 & 4.07$\pm$0.43 \\
Moons & ppcef & 1.00$\pm$0.0

In [9]:
cols = [
    "dataset",
    "method",
    "Coverage",
    "validity",
    "Prob. Plaus.",
    "Log Dens.",
    "L1",
    "L2",
    "Time",
]
add_cols = [
    "dataset",
    "method",
    "LOF",
    "IsoForest",
]
print(df_results[add_cols].to_latex(index=False))

\begin{tabular}{llll}
\toprule
dataset & method & LOF & IsoForest \\
\midrule
Moons & cbce & 5.82e+06$\pm$3.60e+05 & 0.028$\pm$0.003 \\
Moons & CEGP & 1.06$\pm$nan & 0.030$\pm$nan \\
Moons & CEM & 1.07$\pm$0.01 & -0.085$\pm$0.020 \\
Moons & wach & 1.07$\pm$0.00 & 0.003$\pm$0.005 \\
Moons & artelth20 & 1.07$\pm$0.00 & -0.031$\pm$0.008 \\
Moons & ppcef & 1.15$\pm$0.04 & 0.034$\pm$0.004 \\
Law & cbce & 4.22e+06$\pm$8.81e+05 & 0.045$\pm$0.021 \\
Law & CEGP & 1.06$\pm$nan & 0.043$\pm$nan \\
Law & CEM & 1.06$\pm$nan & -0.017$\pm$nan \\
Law & wach & 1.05$\pm$0.00 & -0.001$\pm$0.009 \\
Law & artelth20 & 1.06$\pm$0.00 & 0.028$\pm$0.004 \\
Law & ppcef & 1.06$\pm$0.00 & 0.073$\pm$0.008 \\
Audit & cbce & 3.38e+07$\pm$3.82e+06 & 0.140$\pm$0.003 \\
Audit & CEGP & 3.26e+04$\pm$nan & 0.015$\pm$nan \\
Audit & CEM & 5.55e+05$\pm$1.15e+06 & -0.058$\pm$0.028 \\
Audit & wach & 1.95e+07$\pm$1.09e+07 & 0.050$\pm$0.013 \\
Audit & artelth20 & 5.00e+05$\pm$1.04e+06 & 0.086$\pm$0.029 \\
Audit & ppcef & 4.26e+06$

In [10]:
pd.read_csv(
    "../models/MoonsDataset/wach/counterfactuals_MultinomialLogisticRegression_0.csv"
).values

array([[0.6037769 , 0.39476472],
       [0.3380288 , 0.45301062],
       [0.27897477, 0.45700952],
       [0.34161198, 0.45468643],
       [0.5842367 , 0.50993437],
       [0.31472054, 0.47327065],
       [0.7285197 , 0.5969248 ],
       [0.69648844, 0.72365004],
       [0.6020125 , 0.46415234],
       [0.25773907, 0.4378627 ],
       [0.31112075, 0.37534606],
       [0.51963717, 0.4753429 ],
       [0.696515  , 0.63260484],
       [0.5422234 , 0.6202796 ],
       [0.30290002, 0.3904327 ],
       [0.7260681 , 0.61969805],
       [0.5596188 , 0.5706929 ],
       [0.5395216 , 0.6228145 ],
       [0.5883356 , 0.589842  ],
       [0.60168695, 0.59603584],
       [0.66232514, 0.4828552 ],
       [0.33911616, 0.49356955],
       [0.498944  , 0.4434896 ],
       [0.67624366, 0.55685   ],
       [0.6804062 , 0.46451193],
       [0.29795712, 0.22716865],
       [0.62685424, 0.45817727],
       [0.6975589 , 0.57642955],
       [0.74126446, 0.87041456],
       [0.45936424, 0.43977904],
       [0.