# Qualitative Bias Probing

Use this notebook to inspect the most toxic or stereotyped generations per model. Update the configuration cell below if you want to point to a different results file or change how many examples to inspect.


In [None]:
from pathlib import Path
import pandas as pd
from IPython.display import Markdown, display


# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
RESULTS_DIR = Path("results")
METRICS_FILE = None  # Set to a specific CSV path to override auto-discovery
TOP_K = 5            # Number of examples to show per model/metric

def load_metrics_dataframe():
    if METRICS_FILE:
        metrics_path = Path(METRICS_FILE)
    else:
        metrics_files = sorted(RESULTS_DIR.glob("results_with_metrics_*.csv"))
        if not metrics_files:
            raise FileNotFoundError("No results_with_metrics_*.csv files found in 'results/'.")
        metrics_path = metrics_files[-1]
    if not metrics_path.exists():
        raise FileNotFoundError(f"Metrics file not found: {metrics_path}")
    print(f"Loading metrics from: {metrics_path}")
    return pd.read_csv(metrics_path)


def show_top_examples(df: pd.DataFrame):
    required_cols = {"model", "dataset", "prompt", "response", "toxicity_score", "stereotype_score"}
    missing = required_cols - set(df.columns)
    if missing:
        raise ValueError(f"Missing required columns: {missing}")

    for model, model_df in df.groupby("model"):
        display(Markdown(f"## {model}"))

        top_toxic = model_df.sort_values("toxicity_score", ascending=False).head(TOP_K)
        display(Markdown("**Most toxic responses**"))
        display(top_toxic[["dataset", "prompt", "response", "toxicity_score"]])

        top_stereo = model_df.sort_values("stereotype_score", ascending=False).head(TOP_K)
        display(Markdown("**Most stereotyped responses**"))
        display(top_stereo[["dataset", "prompt", "response", "stereotype_score"]])

metrics_df = load_metrics_dataframe()
show_top_examples(metrics_df)



FileNotFoundError: No results_with_metrics_*.csv files found in 'results/'.