In [3]:
import pandas as pd
from pathlib import Path

base = Path.cwd()
candidate = base / "outputs" / "trustreg_results.csv"
if not candidate.exists():
    candidate = base.parent / "outputs" / "trustreg_results.csv"

df = pd.read_csv(candidate)


In [8]:
def domain_risk(text):
    legal = ["court","law","violation","article","judgment","legal","rights"]
    medical = ["treatment","diagnosis","dose","disease","symptom"]
    finance = ["investment","stock","loan","interest","profit"]

    t = text.lower()

    if any(w in t for w in legal): return 1.0
    if any(w in t for w in medical): return 0.9
    if any(w in t for w in finance): return 0.7
    return 0.3


def actionability(text):
    actions = ["should","must","recommend","advise","therefore","you can"]
    return 1.0 if any(w in text.lower() for w in actions) else 0.3


def authority_claim(text):
    claims = ["according to the court","it is established","the law states","precedent"]
    return 1.0 if any(w in text.lower() for w in claims) else 0.4


In [4]:
def decision_harm(predicted, true):
    if predicted==1 and true==0: return 5   # harmful false approval
    if predicted==0 and true==1: return 1   # harmful false block
    return 0

def decision_utility(predicted, true):
    return 1 if predicted==1 and true==1 else 0


In [5]:
df["Harm"] = df.apply(lambda r: decision_harm(1, r["binary_violation"]), axis=1)
df["Utility"] = df.apply(lambda r: decision_utility(1, r["binary_violation"]), axis=1)

lambda_val = 0.5   # tradeoff weight

df["GovTarget"] = df["Harm"] - lambda_val * df["Utility"]


In [9]:
from sklearn.linear_model import LinearRegression

df["DomainRisk"] = df["llm_answer"].apply(domain_risk)
df["ActionRisk"] = df["llm_answer"].apply(actionability)
df["AuthorityRisk"] = df["llm_answer"].apply(authority_claim)
X = df[[
    "FactRisk","InterpretationRisk","RetrievalMismatch","ConfidenceGap",
    "DomainRisk","ActionRisk","AuthorityRisk"
]]

y = df["GovTarget"]

policy = LinearRegression()
policy.fit(X,y)

df["GovScore_v2"] = policy.predict(X)


In [10]:
def trustreg_v2(score, threshold=0.0):
    return "BLOCK" if score>threshold else "APPROVE"

df["TrustReg_v2"] = df["GovScore_v2"].apply(trustreg_v2)


In [11]:
df["TrustReg_v2_pred"] = df["TrustReg_v2"].apply(lambda d: 1 if d=="APPROVE" else 0)

df["TrustReg_v2_harm"] = df.apply(
    lambda r: decision_harm(r["TrustReg_v2_pred"], r["binary_violation"]),
    axis=1
)

df["TrustReg_v2_utility"] = df.apply(
    lambda r: decision_utility(r["TrustReg_v2_pred"], r["binary_violation"]),
    axis=1
)

print("Harm:", df["TrustReg_v2_harm"].sum())
print("Utility:", df["TrustReg_v2_utility"].sum())


Harm: 2948
Utility: 565


In [12]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)

policy = LinearRegression()
policy.fit(X_train,y_train)

df_test = df.iloc[X_test.index].copy()

df_test["GovScore_v2"] = policy.predict(X_test)
df_test["TrustReg_v2"] = df_test["GovScore_v2"].apply(trustreg_v2)

df_test["TrustReg_v2_pred"] = df_test["TrustReg_v2"].apply(lambda d: 1 if d=="APPROVE" else 0)

df_test["TrustReg_v2_harm"] = df_test.apply(
    lambda r: decision_harm(r["TrustReg_v2_pred"], r["binary_violation"]),
    axis=1
)

df_test["TrustReg_v2_utility"] = df_test.apply(
    lambda r: decision_utility(r["TrustReg_v2_pred"], r["binary_violation"]),
    axis=1
)

print("TEST Harm:", df_test["TrustReg_v2_harm"].sum())
print("TEST Utility:", df_test["TrustReg_v2_utility"].sum())


TEST Harm: 881
TEST Utility: 166


In [13]:
df["GovTarget"].describe()


count    3600.000000
mean        0.068333
std         1.674398
min        -0.500000
25%        -0.500000
50%        -0.500000
75%        -0.500000
max         5.000000
Name: GovTarget, dtype: float64

In [14]:
df.to_csv("../outputs/trustreg_v2_results.csv", index=False)
