# Fairness Results Visualisation Notebook

This notebook loads fairness experiment results stored as JSON files and produces
summary tables and plots. It is tailored to the JSON structure produced by your
framework, for example:

```json
{
  "dataset": "adult",
  "protected_attr": "gender",
  "dp": false,
  "model_type": "logreg",
  "global_metrics": {
    "accuracy_overall": 0.79,
    "accuracy_by_group": { ... },
    "ks_stat": ...,
    "ks_pvalue": ...,
    "welch_t_stat": ...,
    "welch_t_pvalue": ...,
    "tv_distance": ...,
    "epsilon_empirical": ...
  },
  "groups": [
    {
      "group_id": 0,
      "group_label": "Female",
      "num_samples": ...,
      "accuracy": ...,
      "ks_stat": ...,
      "ks_pvalue": ...,
      "welch_t_stat": ...,
      "welch_t_pvalue": ...,
      "tv_distance": ...,
      "epsilon_empirical": ...
    },
    ...
  ]
}
```

You can point it at a directory containing files like:

- `fairness_adult_gender_dpFalse.json`
- `fairness_compas_gender_dpFalse.json`
- `fairness_compas_race_dpTrue.json`

and it will aggregate and visualise the results.


In [None]:
import json
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Make plots inline if you're in a Jupyter environment
%matplotlib inline

plt.rcParams['figure.figsize'] = (8, 4)
plt.rcParams['axes.grid'] = True

## Load and parse JSON result files

In [None]:
def load_fairness_results(results_dir: str):
    """
    Load all fairness_*.json files from a directory and return two DataFrames:

    - global_df: one row per experiment (dataset/protected_attr/dp/model_type)
    - groups_df: one row per group per experiment
    """
    results_dir = Path(results_dir)
    json_files = sorted(results_dir.glob("fairness_*.json"))

    global_rows = []
    group_rows = []

    for path in json_files:
        with path.open("r") as f:
            data = json.load(f)

        dataset = data.get("dataset")
        protected_attr = data.get("protected_attr")
        dp = bool(data.get("dp"))
        model_type = data.get("model_type", "")

        gm = data["global_metrics"]
        global_rows.append(
            {
                "file": path.name,
                "dataset": dataset,
                "protected_attr": protected_attr,
                "dp": dp,
                "model_type": model_type,
                "accuracy_overall": gm["accuracy_overall"],
                "ks_stat": gm["ks_stat"],
                "ks_pvalue": gm["ks_pvalue"],
                "welch_t_stat": gm["welch_t_stat"],
                "welch_t_pvalue": gm["welch_t_pvalue"],
                "tv_distance": gm["tv_distance"],
                "epsilon_empirical": gm["epsilon_empirical"],
            }
        )

        for g in data["groups"]:
            row = {
                "file": path.name,
                "dataset": dataset,
                "protected_attr": protected_attr,
                "dp": dp,
                "model_type": model_type,
                "group_id": g["group_id"],
                "group_label": g["group_label"],
                "num_samples": g["num_samples"],
                "accuracy": g["accuracy"],
                "ks_stat": g["ks_stat"],
                "ks_pvalue": g["ks_pvalue"],
                "welch_t_stat": g["welch_t_stat"],
                "welch_t_pvalue": g["welch_t_pvalue"],
                "tv_distance": g["tv_distance"],
                "epsilon_empirical": g["epsilon_empirical"],
            }
            group_rows.append(row)

    global_df = pd.DataFrame(global_rows)
    groups_df = pd.DataFrame(group_rows)
    return global_df, groups_df

## Load your results

Set `RESULTS_DIR` to the directory where you store your JSON files.
For example, if your project writes them to `results/`, just leave the default.


In [None]:
RESULTS_DIR = "results"  # change this if needed

global_df, groups_df = load_fairness_results(RESULTS_DIR)

print("Global metrics:")
display(global_df)

print("\nPer-group metrics:")
display(groups_df)

## Global comparison: accuracy and ε by dataset / attribute / DP

In [None]:
# Create a convenient label for plotting
global_df = global_df.copy()
global_df["config"] = (
    global_df["dataset"] + "_" +
    global_df["protected_attr"] + "_dp" + global_df["dp"].astype(int).astype(str)
)

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Accuracy
ax = axes[0]
sorted_idx = np.argsort(global_df["accuracy_overall"].values)
ax.bar(
    np.arange(len(global_df)),
    global_df["accuracy_overall"].values[sorted_idx]
)
ax.set_xticks(np.arange(len(global_df)))
ax.set_xticklabels(global_df["config"].values[sorted_idx], rotation=45, ha="right")
ax.set_ylabel("Accuracy")
ax.set_title("Overall accuracy by configuration")

# Epsilon
ax = axes[1]
sorted_idx_eps = np.argsort(global_df["epsilon_empirical"].values)
ax.bar(
    np.arange(len(global_df)),
    global_df["epsilon_empirical"].values[sorted_idx_eps]
)
ax.set_xticks(np.arange(len(global_df)))
ax.set_xticklabels(global_df["config"].values[sorted_idx_eps], rotation=45, ha="right")
ax.set_ylabel("Empirical epsilon")
ax.set_title("Empirical ε by configuration")

plt.tight_layout()
plt.show()

## Per-group metrics

Pick a specific configuration (dataset, protected attribute, DP on/off) and
visualise accuracy and ε per group.


In [None]:
# Choose a configuration to inspect in detail
CFG_DATASET = "compas"       # e.g. "adult" or "compas"
CFG_PROTECTED = "gender"     # e.g. "gender" or "race"
CFG_DP = False               # True or False

mask = (
    (groups_df["dataset"] == CFG_DATASET)
    & (groups_df["protected_attr"] == CFG_PROTECTED)
    & (groups_df["dp"] == CFG_DP)
)

subset = groups_df[mask].copy()
if subset.empty:
    print("No groups found for this configuration.")
else:
    subset = subset.sort_values("group_id")
    print("Groups for", CFG_DATASET, CFG_PROTECTED, "DP:", CFG_DP)
    display(subset[["group_id", "group_label", "num_samples", "accuracy", "epsilon_empirical"]])

    fig, axes = plt.subplots(1, 2, figsize=(12, 4))

    # Accuracy per group
    ax = axes[0]
    ax.bar(np.arange(len(subset)), subset["accuracy"].values)
    ax.set_xticks(np.arange(len(subset)))
    ax.set_xticklabels(subset["group_label"].values, rotation=45, ha="right")
    ax.set_ylabel("Accuracy")
    ax.set_title("Accuracy per group")

    # epsilon per group
    ax = axes[1]
    ax.bar(np.arange(len(subset)), subset["epsilon_empirical"].values)
    ax.set_xticks(np.arange(len(subset)))
    ax.set_xticklabels(subset["group_label"].values, rotation=45, ha="right")
    ax.set_ylabel("Empirical epsilon")
    ax.set_title("Empirical ε per group")

    plt.tight_layout()
    plt.show()

## KS p-values across configurations

In [None]:
fig, ax = plt.subplots(figsize=(8, 4))

sorted_idx = np.argsort(global_df["ks_pvalue"].values)
ax.bar(
    np.arange(len(global_df)),
    global_df["ks_pvalue"].values[sorted_idx]
)
ax.set_xticks(np.arange(len(global_df)))
ax.set_xticklabels(global_df["config"].values[sorted_idx], rotation=45, ha="right")
ax.set_ylabel("KS p-value")
ax.set_title("KS test p-values by configuration")
plt.tight_layout()
plt.show()