In [2]:
import pandas as pd
from pathlib import Path
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# --------------------------------------------------
# Load Data
# --------------------------------------------------

base = Path.cwd()
candidate = base / "outputs" / "trustreg_v2_results.csv"
if not candidate.exists():
    candidate = base.parent / "outputs" / "trustreg_v2_results.csv"

df = pd.read_csv(candidate)

# --------------------------------------------------
# Harm & Utility Definitions
# --------------------------------------------------

def decision_harm(predicted, true):
    if predicted==1 and true==0: return 5
    if predicted==0 and true==1: return 1
    return 0

def decision_utility(predicted, true):
    return 1 if predicted==1 and true==1 else 0

df["Harm"] = df.apply(lambda r: decision_harm(1, r["binary_violation"]), axis=1)
df["Utility"] = df.apply(lambda r: decision_utility(1, r["binary_violation"]), axis=1)

lambda_val = 0.5
df["GovTarget"] = df["Harm"] - lambda_val * df["Utility"]

# --------------------------------------------------
# Governance Feature Functions
# --------------------------------------------------

def domain_risk(text):
    legal = ["court","law","violation","article","judgment","legal","rights"]
    medical = ["treatment","diagnosis","dose","disease","symptom"]
    finance = ["investment","stock","loan","interest","profit"]

    t = text.lower()
    if any(w in t for w in legal): return 1.0
    if any(w in t for w in medical): return 0.9
    if any(w in t for w in finance): return 0.7
    return 0.3

def actionability(text):
    actions = ["should","must","recommend","advise","therefore","you can"]
    return 1.0 if any(w in text.lower() for w in actions) else 0.3

def authority_claim(text):
    claims = ["according to the court","it is established","the law states","precedent"]
    return 1.0 if any(w in text.lower() for w in claims) else 0.4

def severity_score(text):
    severe = ["torture","death","imprisonment","discrimination","inhuman","degrading"]
    moderate = ["detention","restriction","delay"]
    t = text.lower()
    if any(w in t for w in severe): return 1.0
    if any(w in t for w in moderate): return 0.6
    return 0.3

def article_risk(text):
    high = ["article 2","article 3","article 5","article 6"]
    medium = ["article 8","article 10","article 14"]
    t = text.lower()
    if any(w in t for w in high): return 1.0
    if any(w in t for w in medium): return 0.7
    return 0.4

def impact_risk(text):
    impact = ["appeal","file a case","legal action","court","claim compensation"]
    return 1.0 if any(w in text.lower() for w in impact) else 0.4

# --------------------------------------------------
# Apply v3 Features
# --------------------------------------------------

df["DomainRisk"] = df["llm_answer"].apply(domain_risk)
df["ActionRisk"] = df["llm_answer"].apply(actionability)
df["AuthorityRisk"] = df["llm_answer"].apply(authority_claim)
df["SeverityRisk"] = df["llm_answer"].apply(severity_score)
df["ArticleRisk"] = df["llm_answer"].apply(article_risk)
df["ImpactRisk"] = df["llm_answer"].apply(impact_risk)

# --------------------------------------------------
# Feature Matrix
# --------------------------------------------------

X = df[[
    "FactRisk","InterpretationRisk","RetrievalMismatch","ConfidenceGap",
    "DomainRisk","ActionRisk","AuthorityRisk",
    "SeverityRisk","ArticleRisk","ImpactRisk"
]]

y = df["GovTarget"]

# --------------------------------------------------
# Train/Test Split
# --------------------------------------------------

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

policy = LinearRegression()
policy.fit(X_train, y_train)

# --------------------------------------------------
# Governance Decision
# --------------------------------------------------

def trustreg_v3(score, threshold=0.0):
    return "BLOCK" if score>threshold else "APPROVE"

df_test = df.loc[X_test.index].copy()

df_test["GovScore_v3"] = policy.predict(X_test)
df_test["TrustReg_v3"] = df_test["GovScore_v3"].apply(trustreg_v3)
df_test["TrustReg_v3_pred"] = df_test["TrustReg_v3"].apply(lambda d: 1 if d=="APPROVE" else 0)

# --------------------------------------------------
# Evaluation
# --------------------------------------------------

df_test["TrustReg_v3_harm"] = df_test.apply(
    lambda r: decision_harm(r["TrustReg_v3_pred"], r["binary_violation"]),
    axis=1
)

df_test["TrustReg_v3_utility"] = df_test.apply(
    lambda r: decision_utility(r["TrustReg_v3_pred"], r["binary_violation"]),
    axis=1
)

print("TEST TrustReg v3 Harm:", df_test["TrustReg_v3_harm"].sum())
print("TEST TrustReg v3 Utility:", df_test["TrustReg_v3_utility"].sum())


TEST TrustReg v3 Harm: 740
TEST TrustReg v3 Utility: 427


In [27]:
df.to_csv("../outputs/trustreg_v3_results.csv", index=False)

In [3]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators=200, max_depth=6, random_state=42)
rf.fit(X_train, y_train)

df_test["GovScore_rf"] = rf.predict(X_test)
df_test["TrustReg_rf"] = df_test["GovScore_rf"].apply(trustreg_v3)

print("TEST TrustReg RF Harm:", df_test.apply(
    lambda r: decision_harm(1 if r["TrustReg_rf"]=="APPROVE" else 0, r["binary_violation"]),
    axis=1
).sum())
print("TEST TrustReg RF Utility:", df_test.apply(
    lambda r: decision_utility(1 if r["TrustReg_rf"]=="APPROVE" else 0, r["binary_violation"]),
    axis=1
).sum())

TEST TrustReg RF Harm: 676
TEST TrustReg RF Utility: 461


In [9]:
# ...existing code...
from xgboost import XGBRegressor

xgb = XGBRegressor(n_estimators=300, max_depth=5, learning_rate=0.05)
xgb.fit(X_train, y_train)

df_test["GovScore_xgb"] = xgb.predict(X_test)
df_test["TrustReg_xgb"] = df_test["GovScore_xgb"].apply(trustreg_v3)

# ...existing code...
df_test["TrustReg_xgb_harm"] = df_test.apply(
    lambda r: decision_harm(1 if r["TrustReg_xgb"]=="APPROVE" else 0, r["binary_violation"]),
    axis=1
)

df_test["TrustReg_xgb_utility"] = df_test.apply(
    lambda r: decision_utility(1 if r["TrustReg_xgb"]=="APPROVE" else 0, r["binary_violation"]),
    axis=1
)

print("TEST TrustReg XGB Harm:", df_test["TrustReg_xgb_harm"].sum())
print("TEST TrustReg XGB Utility:", df_test["TrustReg_xgb_utility"].sum())
# ...existing code...

TEST TrustReg XGB Harm: 684
TEST TrustReg XGB Utility: 533


In [11]:
# Assuming X_train, X_test, rf, xgb, trustreg_v3 already trained

df_test = df.loc[X_test.index].copy()

df_test["TrustReg_rf"] = rf.predict(X_test)
df_test["TrustReg_rf"] = df_test["TrustReg_rf"].apply(trustreg_v3)

df_test["TrustReg_xgb"] = xgb.predict(X_test)
df_test["TrustReg_xgb"] = df_test["TrustReg_xgb"].apply(trustreg_v3)

# Also keep LR
df_test["TrustReg_v3"] = df_test["GovScore"].apply(trustreg_v3)

# Save final comparison CSV
df_test.to_csv("../outputs/trustreg_model_comparison.csv", index=False)
