# Setup

In [None]:
%cd ~/cma/CMA_Fairness

In [None]:
import os
print("Current working directory:", os.getcwd())

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

In [None]:
RUN_TO_ANALYSE = "1"
CP_DIR = Path("output") / "runs" / str(RUN_TO_ANALYSE)

df = pd.read_csv(CP_DIR / "combined_cp_metrics.csv")

In [None]:
cols = ['coverage', 'cov_frau1', 'cov_nongerman', 'cov_nongerman_male', 'cov_nongerman_female']

In [None]:
df_sub = df[cols]

In [None]:
df_sub

In [None]:
fig, ax = plt.subplots(figsize=(8, 6))

# Positions for each box
positions = np.arange(1, len(cols) + 1)

# Draw the boxplots
bp = ax.boxplot(
    [df_sub[col] for col in cols],
    positions=positions,
    widths=0.6,
    patch_artist=True,
    notch=True
)

colors = ["darkgray", "#CAB2D6", "lightskyblue", "aquamarine", "#FFFF99",]

# Style the boxes and medians
for patch, median, color in zip(bp["boxes"], bp["medians"], colors):
    patch.set_facecolor(color)
    patch.set_edgecolor("black")
    patch.set_linewidth(1.5)
    median.set_color("black")
    median.set_linewidth(2)

# Style whiskers and caps
for whisker in bp["whiskers"]:
    whisker.set_color("black")
    whisker.set_linewidth(1.5)
for cap in bp["caps"]:
    cap.set_color("black")
    cap.set_linewidth(1.5)

# Overlay the actual data points with a little horizontal jitter
for i, col in enumerate(cols):
    y = df_sub[col]
    x = np.random.normal(positions[i], 0.04, size=len(y))
    ax.scatter(
        x,
        y,
        alpha=0.7,
        color=colors[i],
        edgecolors="black",
        linewidths=0.5,
        s=40
    )

# Add a horizontal line at coverage = 0.90
ax.axhline(y=0.90, color='red', linestyle='--', linewidth=1.5, label='Target coverage = 0.90')
# Add a light grid
ax.yaxis.grid(True, linestyle="--", linewidth=0.7, alpha=0.7)

new_labels = [
    "overall",
    "female",
    "non-german",
    "non-german male",
    "non-german female"
]
ax.set_xticks(positions)
ax.set_xticklabels(new_labels, rotation=30)

ax.set_ylabel("Coverage", fontsize=20)

ax.tick_params(axis='x', labelsize=18)
ax.tick_params(axis='y', labelsize=18)

plt.tight_layout()

output_path = CP_DIR / "coverage_subgroups.png"
plt.savefig(output_path, dpi=300, bbox_inches='tight')

plt.show()

In [None]:
# Plot histogram
plt.figure(figsize=(7, 4))
plt.hist(df["avg_size"], bins=30, color='aquamarine', edgecolor='black')

plt.tick_params(axis='both', which='major', labelsize=16)

plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

plt.xlabel("Average Prediction Set Size", fontsize=18)
plt.ylabel("Count", fontsize=18)
plt.grid(False)
plt.tight_layout()

output_path = CP_DIR / "avg_size_distribution.png"
plt.savefig(output_path, dpi=300, bbox_inches='tight')

plt.show()

In [None]:
max_value = df["avg_size"].max()
min_value = df["avg_size"].min()

print("Max avg_size:", max_value)
print("Min avg_size:", min_value)