In [49]:
import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)
import pandas as pd  # noqa: E402

In [50]:
def get_results(dataset, disc_model, method, columns):
    print(dataset, disc_model, method)

    df = pd.DataFrame(columns=columns)

    for fold_n in range(5):
        try:
            metrics_path = f"../models/{dataset}/{method}/fold_{fold_n}/cf_metrics_{disc_model}.csv"
            df_ = pd.read_csv(metrics_path)
            df = pd.concat([df, df_], axis=0)
        except Exception as e:
            pass
            # print(f"File not found: {metrics_path}")
            # df_ = pd.DataFrame(columns=columns)
            # df = pd.concat([df, df_], axis=0)
    # print(df.shape)
    df["dataset"] = dataset
    df["disc_model"] = disc_model
    df["method"] = method

    return df

In [None]:
columns = [
    "dataset",
    "disc_model",
    "method",
    "K_vectors",
    "validity",
    "prob_plausibility",
    "cf_belongs_to_group",
    "log_density_cf",
    "proximity_continuous_manhattan",
    "proximity_continuous_euclidean",
    "isolation_forest_scores_cf",
    "lof_scores_cf",
    "time",
]
datasets = [
    "BlobsDataset",
    "LawDataset",
    "MoonsDataset",
    "WineDataset",
    "HelocDataset",
    "DigitsDataset",
]
disc_models = ["MultinomialLogisticRegression", "MultilayerPerceptron"]
global_methods = [
    "AReS",
    "GLOBE_CE",
    "PUMAL_GLOBAL",
]
local_methods = ["wach", "Artelt", "PUMAL_LOCAL"]
group_methods = ["PUMAL", "GlobalGLANCE", "ArteltGW"]
methods = global_methods + local_methods + group_methods

dataset = datasets[5]
disc_model = disc_models[0]
# method = local_methods[2]

df_all = pd.DataFrame(columns=columns)
for disc_model in disc_models:
    for dataset in datasets:
        for method in global_methods:
            df = get_results(dataset, disc_model, method, columns)
            df_all = pd.concat([df_all, df], axis=0)

In [52]:
# df_all.to_csv("results_GLOBAL.csv", index=False)

In [None]:
df_all.groupby(["disc_model", "dataset", "method"]).mean().round(2)

In [None]:
columns = [
    "dataset",
    "disc_model",
    "method",
    "K_vectors",
    "validity",
    "prob_plausibility",
    "cf_belongs_to_group",
    "log_density_cf",
    "proximity_continuous_manhattan",
    "proximity_continuous_euclidean",
    "isolation_forest_scores_cf",
    "lof_scores_cf",
    "time",
]
datasets = [
    "BlobsDataset",
    "LawDataset",
    "MoonsDataset",
    "WineDataset",
    "HelocDataset",
    "DigitsDataset",
]
disc_models = ["MultinomialLogisticRegression", "MultilayerPerceptron"]
global_methods = ["AReS", "GLOBE_CE", "PUMAL_GLOBAL"]  # "GCE"
local_methods = ["wach", "Artelt", "DiCE", "PUMAL_LOCAL"]  # "PPCEF_2"
group_methods = ["PUMAL"]  # "GLANCE",
methods = global_methods + local_methods + group_methods

dataset = datasets[5]
disc_model = disc_models[0]
# method = local_methods[2]

df_all = pd.DataFrame(columns=columns)
for disc_model in disc_models:
    for dataset in datasets:
        for method in local_methods:
            df = get_results(dataset, disc_model, method, columns)
            df_all = pd.concat([df_all, df], axis=0)

In [55]:
# df_all.to_csv("results_LOCAL.csv", index=False)

In [None]:
df_all.groupby(["disc_model", "dataset", "method"]).mean().round(2)

In [None]:
columns = [
    "dataset",
    "disc_model",
    "method",
    "K_vectors",
    "validity",
    "prob_plausibility",
    "cf_belongs_to_group",
    "log_density_cf",
    "proximity_continuous_manhattan",
    "proximity_continuous_euclidean",
    "isolation_forest_scores_cf",
    "lof_scores_cf",
    "time",
]
datasets = [
    "BlobsDataset",
    "LawDataset",
    "MoonsDataset",
    "WineDataset",
    "HelocDataset",
    "DigitsDataset",
]
disc_models = ["MultinomialLogisticRegression", "MultilayerPerceptron"]
global_methods = ["AReS", "GLOBE_CE", "PUMAL_GLOBAL"]  # "GCE"
local_methods = ["wach", "Artelt", "PUMAL_LOCAL"]
group_methods = ["PUMAL", "GlobalGLANCE", "ArteltGW"]
methods = global_methods + local_methods + group_methods

dataset = datasets[5]
disc_model = disc_models[0]
# method = local_methods[2]

df_all = pd.DataFrame(columns=columns)
for disc_model in disc_models:
    for dataset in datasets:
        for method in group_methods:
            df = get_results(dataset, disc_model, method, columns)
            df_all = pd.concat([df_all, df], axis=0)

In [67]:
# df_all.to_csv("results_GROUP.csv", index=False)

In [64]:
cols = [
    "K_vectors",
    "coverage",
    "validity",
    "proximity_continuous_euclidean",
    "prob_plausibility",
    "log_density_cf",
    "isolation_forest_scores_cf",
    "lof_scores_cf",
    "time",
]

In [None]:
print(
    df_all.groupby(["disc_model", "dataset", "method"])[cols]
    .mean()
    .round(2)
    .to_latex(float_format="%.2f")
)
df_all.groupby(["disc_model", "dataset", "method"])[cols].mean().round(2)

In [None]:
columns_map = {
    "dataset": "dataset",
    "disc_model": "model",
    "method": "method",
    "validity": "Validity",
    "proximity_continuous_euclidean": "L2",
    "prob_plausibility": "Prob. Plaus.",
    "log_density_cf": "Log Density",
    "isolation_forest_scores_cf": "IsoForest",
    "lof_scores_cf": "LOF",
    "time": "Time",
}
df_global = pd.read_csv("results_GLOBAL.csv")
df_global = df_global.rename(columns=columns_map)[columns_map.values()]
df_global_mean = (
    df_global.groupby(["model", "dataset", "method"]).mean().reset_index().round(2)
)
df_global_std = (
    df_global.groupby(["model", "dataset", "method"]).std().reset_index().round(2)
)

for column in df_global_mean.columns:
    if column in ["model", "dataset", "method"]:
        continue
    df_global_mean[column] = (
        "$"
        + df_global_mean[column].astype(str)
        + "\pm"
        + df_global_std[column].astype(str)
        + "$"
    )

print(df_global_mean.to_latex(float_format="%.2f", escape=False))

In [None]:
columns_map = {
    "dataset": "dataset",
    "disc_model": "model",
    "method": "method",
    "K_vectors": "# of Groups",
    "coverage": "Coverage",
    "validity": "Validity",
    "proximity_continuous_euclidean": "L2",
    "prob_plausibility": "Prob. Plaus.",
    "log_density_cf": "Log Density",
    "isolation_forest_scores_cf": "IsoForest",
    "lof_scores_cf": "LOF",
    "time": "Time",
}
df_group = pd.read_csv("results_GROUP.csv")
df_group = df_group.rename(columns=columns_map)[columns_map.values()]
df_group.groupby(["model", "dataset", "method"]).mean().round(2)
df_mean = df_group.groupby(["model", "dataset", "method"]).mean().reset_index().round(2)
df_std = df_group.groupby(["model", "dataset", "method"]).std().reset_index().round(2)

for column in df_mean.columns:
    if column in ["model", "dataset", "method"]:
        continue
    df_mean[column] = (
        "$" + df_mean[column].astype(str) + "\pm" + df_std[column].astype(str) + "$"
    )

print(df_mean.to_latex(float_format="%.2f", escape=False))

In [None]:
columns_map = {
    "dataset": "dataset",
    "disc_model": "model",
    "method": "method",
    "coverage": "Coverage",
    "validity": "Validity",
    "proximity_continuous_euclidean": "L2",
    "prob_plausibility": "Prob. Plaus.",
    "log_density_cf": "Log Density",
    "isolation_forest_scores_cf": "IsoForest",
    "lof_scores_cf": "LOF",
    "time": "Time",
}
df_local = pd.read_csv("results_LOCAL.csv")
df_local = df_local.rename(columns=columns_map)[columns_map.values()]
df_local.groupby(["model", "dataset", "method"]).mean().round(2)
df_mean = df_local.groupby(["model", "dataset", "method"]).mean().reset_index().round(2)
df_std = df_local.groupby(["model", "dataset", "method"]).std().reset_index().round(2)

for column in df_mean.columns:
    if column in ["model", "dataset", "method"]:
        continue
    df_mean[column] = (
        "$" + df_mean[column].astype(str) + "\pm" + df_std[column].astype(str) + "$"
    )

print(df_mean.to_latex(float_format="%.2f", escape=False))

In [48]:
columns_map = {
    "dataset": "dataset",
    "disc_model": "model",
    "method": "method",
    "coverage": "Coverage",
    "validity": "Validity",
    "proximity_continuous_euclidean": "L2",
    "prob_plausibility": "Prob. Plaus.",
    "log_density_cf": "Log Density",
    "isolation_forest_scores_cf": "IsoForest",
    "lof_scores_cf": "LOF",
    "time": "Time",
}
df_local = pd.read_csv("results_LOCAL.csv")
df_local = df_local.rename(columns=columns_map)[columns_map.values()]
df_local.replace("PUMAL_LOCAL", "$OUR_{LOCAL}$", inplace=True)
df_local.replace("Artelt", "$Artelt$", inplace=True)
df_local.replace("wach", "$Wach$", inplace=True)
df_local = df_local.rename(columns={"disc_model": "model"})
df_local.groupby(["model", "dataset", "method"]).mean().round(2)
df_mean = df_local.groupby(["model", "dataset", "method"]).mean().reset_index().round(2)
# df_mean.to_csv("results_LOCAL_grouped.csv", index=False)

In [70]:
columns_map = {
    "dataset": "dataset",
    "disc_model": "model",
    "method": "method",
    "coverage": "Coverage",
    "validity": "Validity",
    "proximity_continuous_euclidean": "L2",
    "prob_plausibility": "Prob. Plaus.",
    "log_density_cf": "Log Density",
    "isolation_forest_scores_cf": "IsoForest",
    "lof_scores_cf": "LOF",
    "time": "Time",
}
df_local = pd.read_csv("results_GROUP.csv")
df_local = df_local.rename(columns=columns_map)[columns_map.values()]
df_local.replace("PUMAL", "$OUR_{GROUP}$", inplace=True)
df_local.replace("GlobalGLANCE", "$GLANCE$", inplace=True)
df_local.replace("ArteltGW", "$EA$", inplace=True)
df_local = df_local.rename(columns={"disc_model": "model"})
df_local.groupby(["model", "dataset", "method"]).mean().round(2)
df_mean = df_local.groupby(["model", "dataset", "method"]).mean().reset_index().round(2)
# df_mean.to_csv("results_GROUP_grouped.csv", index=False)

In [47]:
columns_map = {
    "dataset": "dataset",
    "disc_model": "model",
    "method": "method",
    "coverage": "Coverage",
    "validity": "Validity",
    "proximity_continuous_euclidean": "L2",
    "prob_plausibility": "Prob. Plaus.",
    "log_density_cf": "Log Density",
    "isolation_forest_scores_cf": "IsoForest",
    "lof_scores_cf": "LOF",
    "time": "Time",
}
df_local = pd.read_csv("results_GLOBAL.csv")
df_local = df_local.rename(columns=columns_map)[columns_map.values()]
df_local.replace("PUMAL_GLOBAL", "$OUR_{GLOBAL}$", inplace=True)
df_local.replace("AReS", "$AReS$", inplace=True)
df_local.replace("GLOBE_CE", "$GLOBE-CE$", inplace=True)
df_local = df_local.rename(columns={"disc_model": "model"})
df_local.groupby(["model", "dataset", "method"]).mean().round(2)
df_mean = df_local.groupby(["model", "dataset", "method"]).mean().reset_index().round(2)
# df_mean.to_csv("results_GLOBAL_grouped.csv", index=False)

# Ablation Study Results

In [None]:
import pandas as pd

ds = ["0.0", "0.1", "10.0", "100", "1000"]

df_all = pd.DataFrame()
for fold_n in range(5):
    for d in ds:
        try:
            metrics_path = f"../models/MoonsDataset/PUMAL/fold_{fold_n}/cf_metrics_MultilayerPerceptron_lambda_d_{d}.csv"
            df = pd.read_csv(metrics_path)
            df["d"] = d
            df_all = pd.concat([df_all, df], axis=0)
        except Exception as e:
            print(f"File not found: {metrics_path}")

In [None]:
df_all.columns

In [None]:
columns = [
    "validity",
    "log_density_cf",
    "proximity_continuous_euclidean",
    "pairwise_cosine_sim_min",
    "distance_to_centroid_mean",
]
df_all.groupby(["d"]).mean().round(2)[columns]

In [None]:
columns = [
    "validity",
    "log_density_cf",
    "proximity_continuous_euclidean",
    "pairwise_cosine_sim_min",
    "distance_to_centroid_mean",
]
df_all.groupby(["d"]).std().round(2)[columns]