In [28]:
import pandas as pd
import config as cfg
from sklearn.preprocessing import MinMaxScaler
from pingouin import friedman
import numpy as np
from scikit_posthocs import posthoc_nemenyi_friedman
from scipy.stats import rankdata

In [29]:
def normalize_proximity_metrics(df: pd.DataFrame) -> pd.DataFrame:
    """
    Normalize proximity metrics (L1, L2, DTW) to [0,1] within each Dataset.

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe containing 'Dataset' and proximity metric columns:
        - 'Proximity L1'
        - 'Proximity L2'
        - 'Proximity DTW'

    Returns
    -------
    pd.DataFrame
        Copy of the dataframe with three new normalized columns:
        - 'Proximity L1_norm'
        - 'Proximity L2_norm'
        - 'Proximity DTW_norm'
    """
    proximity_cols = ["Proximity L1", "Proximity L2", "Proximity DTW"]
    data = df.copy()

    for col in proximity_cols:
        data[col] = (
            data.groupby("Dataset")[col]
              .transform(lambda x: MinMaxScaler().fit_transform(x.values.reshape(-1, 1)).flatten())
        )
    return data


In [30]:
def print_values_with_ranks(metric: str, metric_df: pd.DataFrame, ranked_df: pd.DataFrame) -> None:
    combined = metric_df.copy()
    for col in metric_df.columns:
        combined[col] = metric_df[col].map("{:.3f}".format) + " (" + ranked_df[col].map("{:.1f}".format) + ")"

    avg_ranks = ranked_df.mean(axis=0).to_frame().T
    avg_ranks.index = ["Avg Rank"]
    avg_ranks = avg_ranks.map("{:.2f}".format)
    print(f"\n=== {metric} ===")
    print(combined.to_string(index=False))
    print("\nAverage ranks:")
    print(avg_ranks.to_string(index=False))

In [31]:
def analyze_results(results: pd.DataFrame, metric: str, direction: str = "low"):

    averages = results.groupby("Explainer").mean(numeric_only=True).reset_index()
    stds = results.groupby("Explainer").std(numeric_only=True).reset_index()

    ALPHA = 0.05

    metric_df = results.pivot_table(index="Dataset", columns="Explainer", values=metric)
    if direction == "high":
        ranked_values = rankdata(-metric_df, axis=1)
    else:
        ranked_values = rankdata(metric_df, axis=1)
    ranked_df = pd.DataFrame(ranked_values, columns=metric_df.columns, index=metric_df.index)

    print_values_with_ranks(metric, metric_df, ranked_df)
    if direction == "high":
        metric_df = -metric_df

    stats = friedman(metric_df, method="f")
    p = stats.loc["Friedman", "p-unc"]
    f = stats.loc["Friedman", "F"]

    if not np.isnan(p) and p < ALPHA:
        print(f"Significant differences found in {metric} (p = {p:.4f}, F = {f:.3f}).")
        nemenyi_matrix = posthoc_nemenyi_friedman(metric_df)

        for i in range(len(nemenyi_matrix)):
            for j in range(i + 1, len(nemenyi_matrix)):
                p_value = nemenyi_matrix.iloc[i, j]
                if p_value < ALPHA:
                    method1 = nemenyi_matrix.index[i]
                    method2 = nemenyi_matrix.columns[j]
                    average_ranks = ranked_df.mean(axis=0)
                    average1 = average_ranks[method1]
                    average2 = average_ranks[method2]
                    print(f"  {method1} (Avg. Rank: {average1}) vs {method2} (Avg. Rank: {average2}): p = {p_value:.4f}")
    else:
        print(f"No significant differences found in {metric} (p = {p:.4f}, F = {f:.3f}).")

In [32]:
results = pd.read_csv(cfg.EXPERIMENT_OBJECTIVES / "objectives_summary.csv")
results = normalize_proximity_metrics(results)

prettify_names = {"CO_SP_PR" : "All Objectives",
                  "CO_SP" : "Coverage/Sparsity",
                  "CO_PR" : "Coverage/Proximity",
                  "SP_PR" : "Sparsity/Proximity",}

results['Explainer'] = results['Explainer'].map(prettify_names)

In [33]:
results.head()

Unnamed: 0,Computation Time,Confidence,Coverage,Dataset,Explainer,Proximity DTW,Proximity L1,Proximity L2,Sparsity,Validity,yNN
0,236.746095,0.774062,100.0,ArticularyWordRecognition,All Objectives,0.220096,0.295917,0.325587,0.89556,1.0,0.99599
1,242.473747,0.814648,100.0,ArticularyWordRecognition,Coverage/Sparsity,1.0,0.585576,0.883996,0.895099,1.0,0.996125
2,266.92721,0.800402,100.0,ArticularyWordRecognition,Coverage/Proximity,0.320415,1.0,1.0,0.882231,1.0,0.99608
3,312.507864,0.617674,100.0,BasicMotions,All Objectives,0.343184,0.535843,0.432317,0.844057,1.0,0.9965
4,301.789208,0.666854,100.0,BasicMotions,Coverage/Sparsity,1.0,1.0,1.0,0.844605,1.0,0.996737


In [34]:
analyze_results(results, 'Coverage', direction="high")


=== Coverage ===
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
 100.000 (2.5)      100.000 (2.5)     100.000 (2.5)      100.000 (2.5)
 100.000 (2.5)      100.000 (2.5)     100.000 (2.5)      100.000 (2.5)
 100.000 (2.5)      100.000 (2.5)     100.000 (2.5)      100.000 (2.5)
 100.000 (2.5)      100.000 (2.5)     100.000 (2.5)      100.000 (2.5)
 100.000 (2.5)      100.000 (2.5)     100.000 (2.5)      100.000 (2.5)
 100.000 (2.5)      100.000 (2.5)     100.000 (2.5)      100.000 (2.5)
 100.000 (2.5)      100.000 (2.5)     100.000 (2.5)      100.000 (2.5)

Average ranks:
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
          2.50               2.50              2.50               2.50
No significant differences found in Coverage (p = nan, F = nan).


  W = (12 * ssbn - 3 * n**2 * k * (k + 1) ** 2) / (n**2 * k * (k - 1) * (k + 1) - n * ties)


In [35]:
analyze_results(results, 'Validity', direction="high")


=== Validity ===
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
   1.000 (2.5)        1.000 (2.5)       1.000 (2.5)        1.000 (2.5)
   1.000 (2.5)        1.000 (2.5)       1.000 (2.5)        1.000 (2.5)
   1.000 (2.5)        1.000 (2.5)       1.000 (2.5)        1.000 (2.5)
   1.000 (2.5)        1.000 (2.5)       1.000 (2.5)        1.000 (2.5)
   1.000 (2.5)        1.000 (2.5)       1.000 (2.5)        1.000 (2.5)
   1.000 (2.5)        1.000 (2.5)       1.000 (2.5)        1.000 (2.5)
   1.000 (2.5)        1.000 (2.5)       1.000 (2.5)        1.000 (2.5)

Average ranks:
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
          2.50               2.50              2.50               2.50
No significant differences found in Validity (p = nan, F = nan).


  W = (12 * ssbn - 3 * n**2 * k * (k + 1) ** 2) / (n**2 * k * (k - 1) * (k + 1) - n * ties)


In [36]:
analyze_results(results, 'Sparsity', direction="high")


=== Sparsity ===
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
   0.896 (2.0)        0.882 (4.0)       0.895 (3.0)        0.899 (1.0)
   0.844 (3.0)        0.827 (4.0)       0.845 (2.0)        0.854 (1.0)
   0.915 (3.0)        0.896 (4.0)       0.916 (2.0)        0.930 (1.0)
   0.817 (3.0)        0.798 (4.0)       0.817 (2.0)        0.821 (1.0)
   0.820 (2.0)        0.792 (4.0)       0.819 (3.0)        0.844 (1.0)
   0.867 (2.0)        0.850 (4.0)       0.864 (3.0)        0.873 (1.0)
   0.916 (2.0)        0.893 (4.0)       0.916 (3.0)        0.932 (1.0)

Average ranks:
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
          2.43               4.00              2.57               1.00
Significant differences found in Sparsity (p = 0.0000, F = 55.250).
  Coverage/Proximity (Avg. Rank: 4.0) vs Sparsity/Proximity (Avg. Rank: 1.0): p = 0.0001


In [37]:
analyze_results(results, 'Confidence', direction="high")


=== Confidence ===
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
   0.774 (3.0)        0.800 (2.0)       0.815 (1.0)        0.727 (4.0)
   0.618 (3.0)        0.635 (2.0)       0.667 (1.0)        0.540 (4.0)
   0.790 (3.0)        0.820 (2.0)       0.821 (1.0)        0.660 (4.0)
   0.640 (3.0)        0.643 (2.0)       0.684 (1.0)        0.525 (4.0)
   0.723 (2.0)        0.705 (3.0)       0.731 (1.0)        0.634 (4.0)
   0.626 (3.0)        0.675 (2.0)       0.711 (1.0)        0.528 (4.0)
   0.643 (3.0)        0.683 (2.0)       0.720 (1.0)        0.576 (4.0)

Average ranks:
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
          2.86               2.14              1.00               4.00
Significant differences found in Confidence (p = 0.0000, F = 116.500).
  All Objectives (Avg. Rank: 2.857142857142857) vs Coverage/Sparsity (Avg. Rank: 1.0): p = 0.0358
  Coverage/Proximity (Avg. Rank: 2.142857142857143) vs Sparsity/Proximity (Avg. Rank:

In [38]:
analyze_results(results, 'Proximity L1')


=== Proximity L1 ===
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
   0.296 (2.0)        1.000 (4.0)       0.586 (3.0)        0.000 (1.0)
   0.536 (2.0)        0.817 (3.0)       1.000 (4.0)        0.000 (1.0)
   0.409 (2.0)        1.000 (4.0)       0.515 (3.0)        0.000 (1.0)
   0.405 (2.0)        1.000 (4.0)       0.801 (3.0)        0.000 (1.0)
   0.527 (2.0)        1.000 (4.0)       0.741 (3.0)        0.000 (1.0)
   0.212 (2.0)        0.688 (3.0)       1.000 (4.0)        0.000 (1.0)
   0.601 (2.0)        0.946 (3.0)       1.000 (4.0)        0.000 (1.0)

Average ranks:
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
          2.00               3.57              3.43               1.00
Significant differences found in Proximity L1 (p = 0.0000, F = 55.250).
  Coverage/Proximity (Avg. Rank: 3.5714285714285716) vs Sparsity/Proximity (Avg. Rank: 1.0): p = 0.0011
  Coverage/Sparsity (Avg. Rank: 3.4285714285714284) vs Sparsity/Proximity (A

In [39]:
analyze_results(results, 'Proximity L2')


=== Proximity L2 ===
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
   0.326 (2.0)        1.000 (4.0)       0.884 (3.0)        0.000 (1.0)
   0.432 (2.0)        0.523 (3.0)       1.000 (4.0)        0.000 (1.0)
   0.368 (2.0)        1.000 (4.0)       0.640 (3.0)        0.000 (1.0)
   0.435 (2.0)        0.711 (3.0)       1.000 (4.0)        0.000 (1.0)
   0.488 (2.0)        0.932 (3.0)       1.000 (4.0)        0.000 (1.0)
   0.171 (2.0)        0.417 (3.0)       1.000 (4.0)        0.000 (1.0)
   0.553 (3.0)        0.471 (2.0)       1.000 (4.0)        0.000 (1.0)

Average ranks:
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
          2.14               3.14              3.71               1.00
Significant differences found in Proximity L2 (p = 0.0000, F = 34.833).
  Coverage/Proximity (Avg. Rank: 3.142857142857143) vs Sparsity/Proximity (Avg. Rank: 1.0): p = 0.0103
  Coverage/Sparsity (Avg. Rank: 3.7142857142857144) vs Sparsity/Proximity (Av

In [40]:
analyze_results(results, 'Proximity DTW')


=== Proximity DTW ===
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
   0.220 (2.0)        0.320 (3.0)       1.000 (4.0)        0.000 (1.0)
   0.343 (3.0)        0.334 (2.0)       1.000 (4.0)        0.000 (1.0)
   0.337 (2.0)        0.417 (3.0)       1.000 (4.0)        0.000 (1.0)
   0.392 (3.0)        0.261 (2.0)       1.000 (4.0)        0.000 (1.0)
   0.318 (3.0)        0.218 (2.0)       1.000 (4.0)        0.000 (1.0)
   0.198 (2.0)        0.417 (3.0)       1.000 (4.0)        0.000 (1.0)
   0.252 (2.0)        0.416 (3.0)       1.000 (4.0)        0.000 (1.0)

Average ranks:
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
          2.43               2.57              4.00               1.00
Significant differences found in Proximity DTW (p = 0.0000, F = 55.250).
  Coverage/Sparsity (Avg. Rank: 4.0) vs Sparsity/Proximity (Avg. Rank: 1.0): p = 0.0001


In [41]:
analyze_results(results, 'yNN', direction="high")


=== yNN ===
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
   0.996 (3.0)        0.996 (2.0)       0.996 (1.0)        0.996 (4.0)
   0.997 (3.0)        0.997 (2.0)       0.997 (1.0)        0.996 (4.0)
   0.990 (1.0)        0.990 (2.0)       0.990 (4.0)        0.990 (3.0)
   0.997 (3.0)        0.997 (2.0)       0.997 (1.0)        0.997 (4.0)
   0.990 (2.0)        0.989 (3.5)       0.990 (1.0)        0.989 (3.5)
   0.989 (2.5)        0.989 (2.5)       0.989 (1.0)        0.989 (4.0)
   0.982 (3.0)        0.983 (2.0)       0.983 (1.0)        0.982 (4.0)

Average ranks:
All Objectives Coverage/Proximity Coverage/Sparsity Sparsity/Proximity
          2.50               2.29              1.43               3.79
Significant differences found in yNN (p = 0.0016, F = 8.497).
  Coverage/Sparsity (Avg. Rank: 1.4285714285714286) vs Sparsity/Proximity (Avg. Rank: 3.7857142857142856): p = 0.0036
