In [2]:
import pandas as pd
from pathlib import Path

base = Path.cwd()
candidate = base / "outputs" / "trustreg_results.csv"
if not candidate.exists():
    candidate = base.parent / "outputs" / "trustreg_results.csv"

df = pd.read_csv(candidate)


In [3]:
def domain_risk(text):
    legal = ["court","law","violation","article","judgment","legal","rights"]
    medical = ["treatment","diagnosis","dose","disease","symptom"]
    finance = ["investment","stock","loan","interest","profit"]

    t = text.lower()

    if any(w in t for w in legal): return 1.0
    if any(w in t for w in medical): return 0.9
    if any(w in t for w in finance): return 0.7
    return 0.3


In [4]:
def actionability(text):
    actions = ["should","must","recommend","advise","therefore","you can"]
    return 1.0 if any(w in text.lower() for w in actions) else 0.3


In [5]:
def authority_claim(text):
    claims = ["according to the court","it is established","the law states","precedent"]
    return 1.0 if any(w in text.lower() for w in claims) else 0.4


In [6]:
df["DomainRisk"] = df["llm_answer"].apply(domain_risk)
df["ActionRisk"] = df["llm_answer"].apply(actionability)
df["AuthorityRisk"] = df["llm_answer"].apply(authority_claim)


In [8]:
from sklearn.linear_model import LogisticRegression
X = df[[
    "FactRisk","InterpretationRisk","RetrievalMismatch","ConfidenceGap",
    "DomainRisk","ActionRisk","AuthorityRisk"
]]

y = df["binary_violation"]

clf = LogisticRegression(class_weight={0:1,1:5}, max_iter=1000)
clf.fit(X,y)

df["GovProb"] = clf.predict_proba(X)[:,1]
df["LearnedTrustRegDecision"] = df["GovProb"].apply(lambda p: "BLOCK" if p>0.5 else "APPROVE")


In [10]:
def decision_harm(pred, actual):
    return 1 if pred == 1 and actual == 1 else 0

df["LearnedTrustReg_pred"] = df["LearnedTrustRegDecision"].apply(lambda d: 1 if d=="APPROVE" else 0)

df["LearnedTrustReg_harm"] = df.apply(
    lambda r: decision_harm(r["LearnedTrustReg_pred"], r["binary_violation"]),
    axis=1
)

print("Learned TrustReg harm:", df["LearnedTrustReg_harm"].sum())


Learned TrustReg harm: 0


In [15]:
df["LearnedTrustRegDecision"].value_counts()



LearnedTrustRegDecision
BLOCK    3600
Name: count, dtype: int64