In [55]:
import os
import numpy as np
import pandas as pd

In [58]:
def check_tags(tags):
    if "ARES" in tags:
        return "ARES"
    if "GLOBAL_CE" in tags:
        return "GLOBAL\_CE"
    if "PPCEF" in tags:
        return "PPCEF"
    else:
        return "OUR"


df = pd.read_csv("../Counterfactuals_plaus_all_methods.csv")
df["Tags"] = df["Tags"].apply(eval).apply(check_tags)

column_mapping = {
    "Tags": "Method",
    "parameters/disc_model/model_name": "Model",
    "parameters/dataset": "Dataset",
    "parameters/counterfactuals/origin_class": "Origin Class",
    "metrics/cf/K_vectors": "K",
    "metrics/cf/valid_cf_disc": "Validity",
    "metrics/cf/flow_prob_condition_acc": "Prob. Plaus.",
    "metrics/cf/cf_belongs_to_group": "CFs assigned to group",
    "metrics/cf/flow_log_density_cfs": "Log Dens.",
    "metrics/cf/dissimilarity_proximity_continuous_manhatan": "L1",
    "metrics/cf/dissimilarity_proximity_continuous_euclidean": "L2",
    "metrics/cf/isolation_forest_scores_cfs": "IsoForest",
    "metrics/cf/lof_scores_cfs": "LOF",
}
df = df.rename(columns=column_mapping)[column_mapping.values()]
df["Model"] = df["Model"].replace(
    {"MultinomialLogisticRegression": "LogisticRegression"}
)
df["Model"] = df["Model"].replace(
    {
        "LogisticRegression": "LR",
        "MultilayerPerceptron": "MLP",
    }
)
df["Dataset"] = df["Dataset"].apply(lambda x: x[:-7])
df = (
    df.groupby(["Method", "Model", "Dataset"], as_index=False)
    .mean()
    .round(2)
    .drop(columns=["Origin Class"])
)

In [59]:
print(df.to_latex(index=False, float_format="%.2f"))

\begin{tabular}{lllrrrrrrrrr}
\toprule
Method & Model & Dataset & K & Validity & Prob. Plaus. & CFs assigned to group & Log Dens. & L1 & L2 & IsoForest & LOF \\
\midrule
ARES & LR & Blobs & 1.00 & 1.00 & 0.46 & 1.00 & 2.58 & 0.75 & 0.54 & 0.01 & 1.09 \\
ARES & LR & Law & 1.00 & 0.95 & 0.37 & 1.00 & 1.26 & 0.70 & 0.41 & 0.01 & 1.09 \\
ARES & LR & Moons & 1.00 & 0.92 & 0.23 & 1.00 & -0.18 & 0.65 & 0.47 & -0.01 & 1.29 \\
ARES & LR & Wine & 1.00 & 1.00 & 0.17 & 1.00 & 4.74 & 2.36 & 0.74 & 0.04 & 1.11 \\
ARES & MLP & Blobs & 1.00 & 1.00 & 0.46 & 1.00 & 2.58 & 0.74 & 0.53 & 0.01 & 1.09 \\
ARES & MLP & Law & 1.00 & 0.69 & 0.56 & 1.00 & 1.64 & 0.43 & 0.27 & 0.03 & 1.08 \\
ARES & MLP & Moons & 1.00 & 0.63 & 0.17 & 1.00 & -0.67 & 0.46 & 0.34 & -0.02 & 1.55 \\
ARES & MLP & Wine & 1.00 & 0.93 & 0.15 & 1.00 & 4.57 & 2.02 & 0.66 & 0.04 & 1.09 \\
GLOBAL\_CE & LR & Blobs & 1.00 & 1.00 & 0.74 & 1.00 & 2.88 & 0.69 & 0.50 & 0.03 & 1.05 \\
GLOBAL\_CE & LR & Law & 1.00 & 1.00 & 0.73 & 1.00 & 1.86 & 0.35 & 

In [None]:
dataset_name = "MoonsDataset"
experiment_output_folder = "../models"
METHOD = "ppcef"
disc_model = "MultinomialLogisticRegression"  # MultilayerPerceptron, MultinomialLogisticRegression, LogisticRegression, NODE
columns = {
    "dataset": "dataset",
    "method": "method",
    "model_returned_smth": "Coverage",
    "valid_cf_disc": "validity",
    "flow_prob_condition_acc": "Prob. Plaus.",
    "lof_scores_cfs": "LOF",
    # "lof_scores_xs": "LOF_x",
    "isolation_forest_scores_cfs": "IsoForest",
    # "isolation_forest_scores_xs": "IsoForest_x",
    "flow_log_density_cfs": "Log Dens.",
    "dissimilarity_proximity_continuous_manhatan": "L1",
    "dissimilarity_proximity_continuous_euclidean": "L2",
    "time": "Time",
}

In [None]:
# First batch
datasets = [
    "MoonsDataset",
    "LawDataset",
    "AuditDataset",
    "HelocDataset",
    # "BlobsDataset",
    # "DigitsDataset",
    # "WineDataset",
]
methods = [
    # "cbce",
    # "CEGP",
    # "CEM",
    # "wach",
    "ppcef",
    # "artelth20"
]  #  ,
df_results = pd.DataFrame()
for dataset_name in datasets:
    for method in methods:
        output_folder = os.path.join(experiment_output_folder, dataset_name)
        os.makedirs(output_folder, exist_ok=True)
        save_folder = os.path.join(output_folder, method)
        os.makedirs(save_folder, exist_ok=True)

        df_part_results = pd.read_csv(
            os.path.join(save_folder, f"metrics_{disc_model}_cv.csv")
        )
        means = df_part_results.iloc[0]
        output = {k: None for k in columns.values()}
        # output = dict()
        for key, value in columns.items():
            if value == "IsoForest":
                output[value] = f"{means.get(key, np.nan):.3f}"
            elif value == "LOF":
                if means.get(key, float("inf")) < 10:
                    output[value] = f"{means.get(key, np.nan):.2f}"
                else:
                    output[value] = f"{means.get(key, np.nan):.2e}"
            else:
                output[value] = f"{means.get(key, np.nan):.2f}"
        output["dataset"] = dataset_name.removesuffix("Dataset")
        output["method"] = method
        df_part_results = (
            pd.Series(output).to_frame().T.rename(columns=columns)[columns.values()]
        )

        df_results = pd.concat([df_results, df_part_results], axis=0, ignore_index=True)

In [None]:
df_results

In [None]:
print(df_results.to_latex(index=False))

In [None]:
# All batches
datasets = [
    # "MoonsDataset",
    # "LawDataset",
    # "AuditDataset",
    # "HelocDataset",
    "BlobsDataset",
    "DigitsDataset",
    "WineDataset",
]
methods = ["ppcef"]  # "artelth20","cbce", "CEM","CEGP","wach",
df_results = pd.DataFrame()
for dataset_name in datasets:
    for method in methods:
        output_folder = os.path.join(experiment_output_folder, dataset_name)
        os.makedirs(output_folder, exist_ok=True)
        save_folder = os.path.join(output_folder, method)
        os.makedirs(save_folder, exist_ok=True)

        df_part_results = pd.read_csv(
            os.path.join(save_folder, f"metrics_{disc_model}_CE_cv.csv")
        )
        means = df_part_results.mean().round(3).to_dict()
        stds = df_part_results.std().round(3).to_dict()
        output = {k: None for k in columns.values()}
        # output = dict()
        for key, value in columns.items():
            if value == "IsoForest":
                output[value] = (
                    f"{means.get(key, np.nan):.3f}$\pm${stds.get(key, np.nan):.3f}"
                )
            elif value == "LOF":
                if means.get(key, float("inf")) < 10:
                    output[value] = (
                        f"{means.get(key, np.nan):.2f}$\pm${stds.get(key, np.nan):.2f}"
                    )
                else:
                    output[value] = (
                        f"{means.get(key, np.nan):.2e}$\pm${stds.get(key, np.nan):.2e}"
                    )
            else:
                output[value] = (
                    f"{means.get(key, np.nan):.2f}$\pm${stds.get(key, np.nan):.2f}"
                )
        output["dataset"] = dataset_name.removesuffix("Dataset")
        output["method"] = method
        df_part_results = (
            pd.Series(output).to_frame().T.rename(columns=columns)[columns.values()]
        )

        df_results = pd.concat([df_results, df_part_results], axis=0, ignore_index=True)

In [None]:
df_results

In [None]:
df_results

In [None]:
print(df_results.to_latex(index=False))

In [None]:
df_res = pd.DataFrame()
for i in [1, 2, 5, 10, 100, 1000]:
    df = pd.read_csv(
        f"../models/LawDataset/ppcef/metrics_LogisticRegression_lambda_{i}_cv.csv"
    ).mean()
    df["lambda"] = i
    df_res = pd.concat([df_res, df.to_frame()], axis=1)

In [None]:
columns = {
    "lambda": "lambda",
    # "dataset": "dataset",
    # "method": "method",
    "model_returned_smth": "Coverage",
    "valid_cf_disc": "validity",
    "flow_prob_condition_acc": "Prob. Plaus.",
    "lof_scores_cfs": "LOF",
    # "lof_scores_xs": "LOF_x",
    "isolation_forest_scores_cfs": "IsoForest",
    # "isolation_forest_scores_xs": "IsoForest_x",
    "flow_log_density_cfs": "Log Dens.",
    "dissimilarity_proximity_continuous_manhatan": "L1",
    "dissimilarity_proximity_continuous_euclidean": "L2",
    "time": "Time",
}

df_res.T.rename(columns=columns)[columns.values()].round(2)