In [1]:
import pandas as pd
from pathlib import Path

base = Path.cwd()
candidate = base / "outputs" / "trustreg_results.csv"
if not candidate.exists():
    candidate = base.parent / "outputs" / "trustreg_results.csv"

df = pd.read_csv(candidate)


In [2]:
print(df.columns.tolist())


['question', 'source_text', 'binary_violation', 'prompt_type', 'llm_answer', 'confidence', 'RetrievalMismatch', 'FactRisk', 'InterpretationRisk', 'ConfidenceGap', 'GovScore', 'TrustRegDecision']


In [3]:
def decision_harm(predicted, true):
    if predicted==1 and true==0:
        return 5
    if predicted==0 and true==1:
        return 1
    return 0

def trustreg_binary(decision):
    if decision=="APPROVE":
        return 1
    return 0

df["LLM_pred"] = 1
df["TrustReg_pred"] = df["TrustRegDecision"].apply(trustreg_binary)

df["LLM_harm"] = df.apply(lambda r: decision_harm(r["LLM_pred"], r["binary_violation"]), axis=1)
df["TrustReg_harm"] = df.apply(lambda r: decision_harm(r["TrustReg_pred"], r["binary_violation"]), axis=1)

print("Raw LLM harm:", df["LLM_harm"].sum())
print("TrustReg harm:", df["TrustReg_harm"].sum())


Raw LLM harm: 1860
TrustReg harm: 1914


In [4]:
df["TrustReg_pred"] = df["TrustRegDecision"].apply(lambda d: 1 if d=="APPROVE" else 0)

df["TrustReg_harm"] = df.apply(
    lambda r: decision_harm(r["TrustReg_pred"], r["binary_violation"]),
    axis=1
)

print("New TrustReg harm:", df["TrustReg_harm"].sum())


New TrustReg harm: 1914
