In [2]:
import numpy as np
import pandas as pd

df = pd.read_csv("../Counterfactuals_lambda_ablation.csv")

column_mapping = {
    "parameters/method": "Method",
    "parameters/disc_model/model_name": "Model",
    "parameters/dataset": "Dataset",
    "parameters/counterfactuals/origin_class": "Origin Class",
    # "parameters/counterfactuals/K": "K",
    "metrics/cf/K_vectors": "K",
    "metrics/cf/valid_cf_disc": "Validity",
    "metrics/cf/flow_prob_condition_acc": "Prob. Plaus.",
    "metrics/cf/cf_belongs_to_group": "CFs assigned to group",
    # "metrics/cf/flow_log_density_cfs": "Log Dens.",
    "metrics/cf/dissimilarity_proximity_continuous_manhatan": "L1",
    "metrics/cf/dissimilarity_proximity_continuous_euclidean": "L2",
    # "metrics/cf/isolation_forest_scores_cfs": "IsoForest",
    # "metrics/cf/lof_scores_cfs": "LOF",
    # "metrics/cf/plausibility": "Plaus.",
    "parameters/counterfactuals/alpha": "Alpha",
    "parameters/counterfactuals/alpha_k": "Alpha K",
    "parameters/counterfactuals/alpha_s": "Alpha S",
}
df = df.rename(columns=column_mapping)[column_mapping.values()]
df["Model"] = df["Model"].replace(
    {"MultinomialLogisticRegression": "LogisticRegression"}
)
df["Model"] = df["Model"].replace(
    {
        "LogisticRegression": "LR",
        "MultilayerPerceptron": "MLP",
    }
)

df["Dataset"] = df["Dataset"].apply(lambda x: x[:-7])
data = (
    df.groupby(
        ["Model", "Dataset", "Method", "Alpha", "Alpha K", "Alpha S"], as_index=False
    )
    .mean()
    .round(2)
    .drop(columns=["Origin Class"])
    .drop(columns=["Method"])
)

Analysis of influence of $\lambda_K$ on number of created subgroups.

In [3]:
data.groupby(["Alpha K"])[["Validity", "Prob. Plaus.", "L1", "L2", "K"]].mean()

Unnamed: 0_level_0,Validity,Prob. Plaus.,L1,L2,K
Alpha K,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0.988516,0.966875,0.836875,0.418984,26.480469
10,0.985703,0.943281,0.875078,0.433047,15.719766
100,0.98,0.896016,0.9,0.440156,6.385469
1000,0.973984,0.848047,0.905078,0.441406,2.497734


In [11]:
print(
    data.groupby(["Alpha K"])[["Validity", "Prob. Plaus.", "L1", "L2", "K"]]
    .mean()
    .to_latex(float_format="%.2f")
)

\begin{tabular}{lrrrrr}
\toprule
 & Validity & Prob. Plaus. & L1 & L2 & K \\
Alpha K &  &  &  &  &  \\
\midrule
1 & 0.99 & 0.97 & 0.84 & 0.42 & 26.48 \\
10 & 0.99 & 0.94 & 0.88 & 0.43 & 15.72 \\
100 & 0.98 & 0.90 & 0.90 & 0.44 & 6.39 \\
1000 & 0.97 & 0.85 & 0.91 & 0.44 & 2.50 \\
\bottomrule
\end{tabular}



We can observe that with the increase of $\lambda_K$ number of subgroups decreases, meaning more clear explanations. However, it impacts (as expected) validity, probabilistic plausability and the distances in the negative way.

Analysis of influence of $\lambda_S$ on number of correctly assigned CF to group, i.e., exactly one subgroup was selected.

In [5]:
data.groupby(["Alpha S"])[
    ["Validity", "Prob. Plaus.", "L1", "L2", "CFs assigned to group"]
].mean()

Unnamed: 0_level_0,Validity,Prob. Plaus.,L1,L2,CFs assigned to group
Alpha S,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0.983516,0.909687,0.900312,0.439766,0.947109
10,0.981406,0.907422,0.888516,0.435859,0.969297
100,0.982344,0.913203,0.871875,0.43125,0.985859
1000,0.980938,0.923906,0.856328,0.426719,0.997891


In [10]:
print(
    data.groupby(["Alpha S"])[
        ["Validity", "Prob. Plaus.", "L1", "L2", "CFs assigned to group"]
    ]
    .mean()
    .to_latex(float_format="%.2f")
)

\begin{tabular}{lrrrrr}
\toprule
 & Validity & Prob. Plaus. & L1 & L2 & CFs assigned to group \\
Alpha S &  &  &  &  &  \\
\midrule
1 & 0.98 & 0.91 & 0.90 & 0.44 & 0.95 \\
10 & 0.98 & 0.91 & 0.89 & 0.44 & 0.97 \\
100 & 0.98 & 0.91 & 0.87 & 0.43 & 0.99 \\
1000 & 0.98 & 0.92 & 0.86 & 0.43 & 1.00 \\
\bottomrule
\end{tabular}



We can observe that with the increase of $\lambda_S$, number of correctly assigned CFs to relevant shifting vector increase, together with improvement with other metrics.

Analysis of influence of $\lambda$ on validity, plausability and distance.

In [7]:
data.groupby(["Alpha"])[["Validity", "Prob. Plaus.", "L1", "L2"]].mean()

Unnamed: 0_level_0,Validity,Prob. Plaus.,L1,L2
Alpha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0.969609,0.818984,0.772266,0.389531
10,0.979766,0.902266,0.810859,0.404687
100,0.986328,0.948203,0.910937,0.444141
1000,0.9925,0.984766,1.022969,0.495234


In [9]:
print(
    data.groupby(["Alpha"])[["Validity", "Prob. Plaus.", "L1", "L2"]]
    .mean()
    .to_latex(float_format="%.2f")
)

\begin{tabular}{lrrrr}
\toprule
 & Validity & Prob. Plaus. & L1 & L2 \\
Alpha &  &  &  &  \\
\midrule
1 & 0.97 & 0.82 & 0.77 & 0.39 \\
10 & 0.98 & 0.90 & 0.81 & 0.40 \\
100 & 0.99 & 0.95 & 0.91 & 0.44 \\
1000 & 0.99 & 0.98 & 1.02 & 0.50 \\
\bottomrule
\end{tabular}



We can observe that with the increase of $\lambda$ metrics of validity and probabilistic plausability increase but in the cost of distance. It's a expected trade-off as valid and plausbile CFs usually lies farther away from the starting observation.

(Optional??) Overall average results of K, CFs assigned and other metrics.

In [6]:
f = np.logical_and(
    np.logical_and(data["Alpha"] == 1000, data["Alpha S"] == 1000),
    data["Alpha K"] == 1000,
)

data.loc[
    f, ["Validity", "Prob. Plaus.", "L1", "L2", "K", "CFs assigned to group"]
].mean()

Validity                 0.98875
Prob. Plaus.             0.94375
L1                       1.02750
L2                       0.50250
K                        5.50000
CFs assigned to group    0.98000
dtype: float64