In [4]:
import os
import numpy as np
import pandas as pd

In [11]:
dataset_name = "MoonsDataset"
experiment_output_folder = "../models"
METHOD = "ppcef"
disc_model = "MultilayerPerceptron"  # MultilayerPerceptron, MultinomialLogisticRegression, LogisticRegression, NODE
columns = {
    "dataset": "dataset",
    "method": "method",
    "model_returned_smth": "Coverage",
    "valid_cf_disc": "validity",
    "flow_prob_condition_acc": "Prob. Plaus.",
    "lof_scores_cfs": "LOF",
    # "lof_scores_xs": "LOF_x",
    "isolation_forest_scores_cfs": "IsoForest",
    # "isolation_forest_scores_xs": "IsoForest_x",
    "flow_log_density_cfs": "Log Dens.",
    "dissimilarity_proximity_continuous_manhatan": "L1",
    "dissimilarity_proximity_continuous_euclidean": "L2",
    "time": "Time",
}

In [12]:
# First batch
datasets = [
    "MoonsDataset",
    "LawDataset",
    "AuditDataset",
    "HelocDataset",
    "BlobsDataset",
    "DigitsDataset",
    "WineDataset",
]
methods = [
    "cbce",
    "CEGP",
    "CEM",
    "wach",
    "ppcef",
    # "artelth20"
]  #  ,
df_results = pd.DataFrame()
for dataset_name in datasets:
    for method in methods:
        output_folder = os.path.join(experiment_output_folder, dataset_name)
        os.makedirs(output_folder, exist_ok=True)
        save_folder = os.path.join(output_folder, method)
        os.makedirs(save_folder, exist_ok=True)

        df_part_results = pd.read_csv(
            os.path.join(save_folder, f"metrics_{disc_model}_cv.csv")
        )
        means = df_part_results.iloc[0]
        output = {k: None for k in columns.values()}
        # output = dict()
        for key, value in columns.items():
            if value == "IsoForest":
                output[value] = f"{means.get(key, np.nan):.3f}"
            elif value == "LOF":
                if means.get(key, float("inf")) < 10:
                    output[value] = f"{means.get(key, np.nan):.2f}"
                else:
                    output[value] = f"{means.get(key, np.nan):.2e}"
            else:
                output[value] = f"{means.get(key, np.nan):.2f}"
        output["dataset"] = dataset_name.removesuffix("Dataset")
        output["method"] = method
        df_part_results = (
            pd.Series(output).to_frame().T.rename(columns=columns)[columns.values()]
        )

        df_results = pd.concat([df_results, df_part_results], axis=0, ignore_index=True)

In [13]:
df_results

Unnamed: 0,dataset,method,Coverage,validity,Prob. Plaus.,LOF,IsoForest,Log Dens.,L1,L2,Time
0,Moons,cbce,1.0,1.0,0.5,1.03,0.012,1.2,0.59,0.46,0.08
1,Moons,CEGP,1.0,0.52,0.02,1.5,-0.008,-3.42,0.28,0.24,1711.53
2,Moons,CEM,0.96,0.5,0.0,2.54,-0.092,-21.63,0.54,0.53,729.37
3,Moons,wach,0.99,0.66,0.01,1.41,0.002,-2.04,0.22,0.18,1584.36
4,Moons,ppcef,1.0,1.0,1.0,0.98,0.04,1.57,0.44,0.35,22.74
5,Law,cbce,1.0,0.85,0.97,1.03,0.066,1.96,0.62,0.41,0.27
6,Law,CEGP,0.93,0.26,0.57,1.06,0.043,1.34,0.23,0.16,3025.87
7,Law,CEM,1.0,0.61,0.27,1.26,-0.016,-0.14,0.29,0.28,1507.87
8,Law,wach,1.0,0.75,0.42,1.15,0.012,0.55,0.37,0.28,2523.31
9,Law,ppcef,1.0,0.94,1.0,1.03,0.072,2.01,0.39,0.24,21.09


In [14]:
print(df_results.to_latex(index=False))

\begin{tabular}{lllllllllll}
\toprule
dataset & method & Coverage & validity & Prob. Plaus. & LOF & IsoForest & Log Dens. & L1 & L2 & Time \\
\midrule
Moons & cbce & 1.00 & 1.00 & 0.50 & 1.03 & 0.012 & 1.20 & 0.59 & 0.46 & 0.08 \\
Moons & CEGP & 1.00 & 0.52 & 0.02 & 1.50 & -0.008 & -3.42 & 0.28 & 0.24 & 1711.53 \\
Moons & CEM & 0.96 & 0.50 & 0.00 & 2.54 & -0.092 & -21.63 & 0.54 & 0.53 & 729.37 \\
Moons & wach & 0.99 & 0.66 & 0.01 & 1.41 & 0.002 & -2.04 & 0.22 & 0.18 & 1584.36 \\
Moons & ppcef & 1.00 & 1.00 & 1.00 & 0.98 & 0.040 & 1.57 & 0.44 & 0.35 & 22.74 \\
Law & cbce & 1.00 & 0.85 & 0.97 & 1.03 & 0.066 & 1.96 & 0.62 & 0.41 & 0.27 \\
Law & CEGP & 0.93 & 0.26 & 0.57 & 1.06 & 0.043 & 1.34 & 0.23 & 0.16 & 3025.87 \\
Law & CEM & 1.00 & 0.61 & 0.27 & 1.26 & -0.016 & -0.14 & 0.29 & 0.28 & 1507.87 \\
Law & wach & 1.00 & 0.75 & 0.42 & 1.15 & 0.012 & 0.55 & 0.37 & 0.28 & 2523.31 \\
Law & ppcef & 1.00 & 0.94 & 1.00 & 1.03 & 0.072 & 2.01 & 0.39 & 0.24 & 21.09 \\
Audit & cbce & 1.00 & 0.90 & 0.4

In [18]:
# All batches
datasets = [
    "MoonsDataset",
    "LawDataset",
    "AuditDataset",
    "HelocDataset",
    "BlobsDataset",
    "DigitsDataset",
    "WineDataset",
]
methods = ["cbce", "CEM","CEGP","wach",  "ppcef"]  #"artelth20"
df_results = pd.DataFrame()
for dataset_name in datasets:
    for method in methods:
        output_folder = os.path.join(experiment_output_folder, dataset_name)
        os.makedirs(output_folder, exist_ok=True)
        save_folder = os.path.join(output_folder, method)
        os.makedirs(save_folder, exist_ok=True)

        df_part_results = pd.read_csv(
            os.path.join(save_folder, f"metrics_{disc_model}_cv.csv")
        )
        means = df_part_results.mean().round(3).to_dict()
        stds = df_part_results.std().round(3).to_dict()
        output = {k: None for k in columns.values()}
        # output = dict()
        for key, value in columns.items():
            if value == "IsoForest":
                output[value] = (
                    f"{means.get(key, np.nan):.3f}$\pm${stds.get(key, np.nan):.3f}"
                )
            elif value == "LOF":
                if means.get(key, float("inf")) < 10:
                    output[value] = (
                        f"{means.get(key, np.nan):.2f}$\pm${stds.get(key, np.nan):.2f}"
                    )
                else:
                    output[value] = (
                        f"{means.get(key, np.nan):.2e}$\pm${stds.get(key, np.nan):.2e}"
                    )
            else:
                output[value] = (
                    f"{means.get(key, np.nan):.2f}$\pm${stds.get(key, np.nan):.2f}"
                )
        output["dataset"] = dataset_name.removesuffix("Dataset")
        output["method"] = method
        df_part_results = (
            pd.Series(output).to_frame().T.rename(columns=columns)[columns.values()]
        )

        df_results = pd.concat([df_results, df_part_results], axis=0, ignore_index=True)

In [19]:
df_results

Unnamed: 0,dataset,method,Coverage,validity,Prob. Plaus.,LOF,IsoForest,Log Dens.,L1,L2,Time
0,Moons,cbce,1.00$\pm$0.00,0.84$\pm$0.24,0.58$\pm$0.16,1.03$\pm$0.03,0.019$\pm$0.006,1.23$\pm$0.27,0.71$\pm$0.16,0.53$\pm$0.11,0.08$\pm$0.00
1,Moons,CEM,0.99$\pm$0.02,0.45$\pm$0.15,0.03$\pm$0.04,2.29$\pm$0.16,-0.075$\pm$0.010,-11.92$\pm$6.48,0.49$\pm$0.04,0.48$\pm$0.04,784.34$\pm$33.83
2,Moons,CEGP,0.97$\pm$0.03,0.33$\pm$0.13,0.06$\pm$0.07,1.39$\pm$0.10,-0.003$\pm$0.005,-3.67$\pm$1.16,0.29$\pm$0.07,0.23$\pm$0.05,1562.34$\pm$119.65
3,Moons,wach,1.00$\pm$0.01,0.56$\pm$0.06,0.04$\pm$0.05,1.52$\pm$0.12,-0.009$\pm$0.009,-3.71$\pm$2.54,0.28$\pm$0.09,0.24$\pm$0.08,1452.01$\pm$99.08
4,Moons,ppcef,1.00$\pm$0.00,0.98$\pm$0.01,1.00$\pm$0.00,0.99$\pm$0.02,0.034$\pm$0.005,1.62$\pm$0.04,0.44$\pm$0.05,0.34$\pm$0.04,20.44$\pm$1.75
5,Law,cbce,1.00$\pm$0.00,0.79$\pm$0.11,0.36$\pm$0.40,1.05$\pm$0.02,0.026$\pm$0.031,1.18$\pm$0.45,0.67$\pm$0.08,0.44$\pm$0.05,0.28$\pm$0.02
6,Law,CEM,1.00$\pm$0.00,0.61$\pm$0.04,0.27$\pm$0.01,1.26$\pm$0.01,-0.017$\pm$0.004,-0.22$\pm$0.11,0.29$\pm$0.01,0.28$\pm$0.01,1413.03$\pm$97.23
7,Law,CEGP,0.93$\pm$0.02,0.28$\pm$0.01,0.53$\pm$0.05,1.07$\pm$0.00,0.041$\pm$0.003,1.23$\pm$0.16,0.24$\pm$0.02,0.17$\pm$0.01,2986.44$\pm$60.71
8,Law,wach,1.00$\pm$0.00,0.74$\pm$0.04,0.42$\pm$0.03,1.14$\pm$0.01,0.014$\pm$0.003,0.58$\pm$0.09,0.38$\pm$0.01,0.29$\pm$0.01,2459.88$\pm$78.30
9,Law,ppcef,1.00$\pm$0.00,0.95$\pm$0.01,1.00$\pm$0.00,1.03$\pm$0.00,0.071$\pm$0.003,2.04$\pm$0.02,0.40$\pm$0.01,0.24$\pm$0.01,20.63$\pm$1.08


In [20]:
print(df_results.to_latex(index=False))

\begin{tabular}{lllllllllll}
\toprule
dataset & method & Coverage & validity & Prob. Plaus. & LOF & IsoForest & Log Dens. & L1 & L2 & Time \\
\midrule
Moons & cbce & 1.00$\pm$0.00 & 0.84$\pm$0.24 & 0.58$\pm$0.16 & 1.03$\pm$0.03 & 0.019$\pm$0.006 & 1.23$\pm$0.27 & 0.71$\pm$0.16 & 0.53$\pm$0.11 & 0.08$\pm$0.00 \\
Moons & CEM & 0.99$\pm$0.02 & 0.45$\pm$0.15 & 0.03$\pm$0.04 & 2.29$\pm$0.16 & -0.075$\pm$0.010 & -11.92$\pm$6.48 & 0.49$\pm$0.04 & 0.48$\pm$0.04 & 784.34$\pm$33.83 \\
Moons & CEGP & 0.97$\pm$0.03 & 0.33$\pm$0.13 & 0.06$\pm$0.07 & 1.39$\pm$0.10 & -0.003$\pm$0.005 & -3.67$\pm$1.16 & 0.29$\pm$0.07 & 0.23$\pm$0.05 & 1562.34$\pm$119.65 \\
Moons & wach & 1.00$\pm$0.01 & 0.56$\pm$0.06 & 0.04$\pm$0.05 & 1.52$\pm$0.12 & -0.009$\pm$0.009 & -3.71$\pm$2.54 & 0.28$\pm$0.09 & 0.24$\pm$0.08 & 1452.01$\pm$99.08 \\
Moons & ppcef & 1.00$\pm$0.00 & 0.98$\pm$0.01 & 1.00$\pm$0.00 & 0.99$\pm$0.02 & 0.034$\pm$0.005 & 1.62$\pm$0.04 & 0.44$\pm$0.05 & 0.34$\pm$0.04 & 20.44$\pm$1.75 \\
Law & cbce & 1.00$\