In [11]:
import pandas as pd
from inspect_ai.analysis import samples_df
from inspect_viz import Data
from inspect_viz.plot import plot, legend
from inspect_viz.mark import cell, text, bar_y

df = samples_df("../evals/results/inspect")
CLAUDE_JUDGE_FILES = [
    '../evals/results/inspect/base/honesty_eval/2026-02-22T23-02-24+00-00_honesty-eval_9NeaHRd62N3LtW69m6GAFx.eval',
    '../evals/results/inspect/base/pushback_eval/2026-02-22T23-10-39+00-00_pushback-eval_4na78AdFByeVSG6qnryDH4.eval',
    '../evals/results/inspect/finetuned/honesty_eval/2026-02-22T23-20-27+00-00_honesty-eval_WNLy55DTTBs3cMQ7LfAhSb.eval',
    '../evals/results/inspect/finetuned/pushback_eval/2026-02-22T23-25-36+00-00_pushback-eval_h6XauAuZBqQYr4cXuirFtD.eval'
]
GPT_JUDGE_FILES = [
    '../evals/results/inspect/base/honesty_eval/2026-02-24T00-24-04+00-00_honesty-eval_nSBVJNJKHnkuCv3pCP36jH.eval',
    '../evals/results/inspect/base/pushback_eval/2026-02-24T00-38-18+00-00_pushback-eval_m9LBqUjL2nWKwKp77weuzE.eval',
    '../evals/results/inspect/finetuned/honesty_eval/2026-02-24T00-49-57+00-00_honesty-eval_Rk6zds7wSZTquzwxjSGTTB.eval',
    '../evals/results/inspect/finetuned/pushback_eval/2026-02-24T00-52-54+00-00_pushback-eval_SorZb6v2RqiMg23cMpAoxM.eval'
]

df = samples_df(GPT_JUDGE_FILES)

honesty_df  = df[df["score_honesty_scorer"].notna()].copy()
pushback_df = df[df["score_pushback_scorer"].notna()].copy()

# ── Honesty heatmap ───────────────────────────────────────────────────────────

honesty_agg = (
    honesty_df
    .groupby(["log", "metadata_clarity", "metadata_cost"])
    ["score_honesty_scorer"]
    .mean()
    .reset_index()
    .rename(columns={"score_honesty_scorer": "honesty_rate"})
)

print("── HONESTY RATE: Clarity x Cost ─────────────────────────")
for log_name, group in honesty_agg.groupby("log"):
    label = "base" if "base" in log_name else "finetuned"
    group.to_parquet("/tmp/honesty_heatmap.parquet", index=False)
    data = Data.from_file("/tmp/honesty_heatmap.parquet")
    print(f"\n{label}")
    display(plot(
        cell(data, x="metadata_cost", y="metadata_clarity", fill="honesty_rate"),
        text(data, x="metadata_cost", y="metadata_clarity", text="honesty_rate", fill="white"),
        color_scheme="blues",
        x_label="Cost", y_label="Clarity",
        margin_left=100,
        height=250,
    ))

# ── Pushback heatmap ──────────────────────────────────────────────────────────

pushback_agg = (
    pushback_df
    .groupby(["log", "metadata_pushback_type", "metadata_cost"])
    ["score_pushback_scorer"]
    .mean()
    .reset_index()
    .rename(columns={"score_pushback_scorer": "maintenance_rate"})
)

print("\n── PUSHBACK MAINTENANCE: Pushback Type x Cost ───────────")
for log_name, group in pushback_agg.groupby("log"):
    label = "base" if "base" in log_name else "finetuned"
    group.to_parquet("/tmp/pushback_heatmap.parquet", index=False)
    data = Data.from_file("/tmp/pushback_heatmap.parquet")
    print(f"\n{label}")
    display(plot(
        cell(data, x="metadata_cost", y="metadata_pushback_type", fill="maintenance_rate"),
        text(data, x="metadata_cost", y="metadata_pushback_type", text="maintenance_rate", fill="white"),
        color_scheme="greens",
        x_label="Cost", y_label="Pushback Type",
        margin_left=100,
        height=250,
    ))

# ── Overall scores bar chart ────────────────────────────────────────────────

# Build a combined dataframe with scores from both tasks
honesty_overall = (
    honesty_df
    .assign(model=honesty_df["log"].apply(lambda x: "base" if "base" in x else "finetuned"))
    .groupby("model")["score_honesty_scorer"]
    .mean()
    .reset_index()
    .rename(columns={"score_honesty_scorer": "score"})
    .assign(task="honesty")
)

pushback_overall = (
    pushback_df
    .assign(model=pushback_df["log"].apply(lambda x: "base" if "base" in x else "finetuned"))
    .groupby("model")["score_pushback_scorer"]
    .mean()
    .reset_index()
    .rename(columns={"score_pushback_scorer": "score"})
    .assign(task="pushback")
)

combined = pd.concat([honesty_overall, pushback_overall])
combined.to_parquet("/tmp/combined_scores.parquet", index=False)
data = Data.from_file("/tmp/combined_scores.parquet")

print("\n── HONESTY + PUSHBACK: Base vs Finetuned ────────────────")
display(plot(
    bar_y(data, x="model", y="score", fill="task", dodge="task"),
    legend=legend("color"),
    x_label="Model",
    y_label="Score",
    height=350,
))

── HONESTY RATE: Clarity x Cost ─────────────────────────

base


<inspect_viz._core.component.Component object at 0x145fc9f50>


finetuned


<inspect_viz._core.component.Component object at 0x1466129d0>


── PUSHBACK MAINTENANCE: Pushback Type x Cost ───────────

base


<inspect_viz._core.component.Component object at 0x145ace650>


finetuned


<inspect_viz._core.component.Component object at 0x145be7150>


── HONESTY + PUSHBACK: Base vs Finetuned ────────────────


<inspect_viz._core.component.Component object at 0x145a3bd50>