In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score

# === Load synthetic LAW data ===
df_synthetic = pd.read_csv("generated_data_Our_prompts_Law.csv")    # Our Synthetic Data for LAW

# === Sensitive attribute ===
sensitive_attr = "race"

# === Decode byte strings in synthetic data if any ===
for col in df_synthetic.columns:
    if df_synthetic[col].dtype == object:
        df_synthetic[col] = df_synthetic[col].apply(lambda x: x.decode("utf-8") if isinstance(x, bytes) else x)

# === Convert categorical columns to numeric ===
categorical_columns = [
    "race", "sex", "age", "lsat", "ugpa", "region_first", "region_kirch", "region_midatl",
    "region_neweng", "region_other", "region_pacific", "region_south", "region_southwest", "region_west"
]
for col in categorical_columns:
    if col in df_synthetic.columns:
        df_synthetic[col] = pd.to_numeric(df_synthetic[col], errors="coerce")

# === Drop missing values ===
df_synthetic = df_synthetic.dropna()

# === Train-test split (70% train, 30% test) ===
X = df_synthetic.drop(columns=["bar_passed"])
y = df_synthetic["bar_passed"]
race = df_synthetic[sensitive_attr]

X_train, X_test, y_train, y_test, race_train, race_test = train_test_split(
    X, y, race, test_size=0.3, random_state=42, stratify=y
)

# === Define models ===
models = {
    "Decision_Tree": DecisionTreeClassifier(random_state=42),
    "Logistic_Regression": LogisticRegression(max_iter=1000),
    "Random_Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True),
    "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
}

# === Train, predict, evaluate ===
print("\nModel Performance (Accuracy, Precision, Recall, AUROC):")
print("---------------------------------------------------------")
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    if hasattr(model, "predict_proba"):
        proba = model.predict_proba(X_test)[:, 1]
    elif hasattr(model, "decision_function"):
        proba = model.decision_function(X_test)
    else:
        proba = preds

    acc = accuracy_score(y_test, preds)
    prec = precision_score(y_test, preds, zero_division=0)
    rec = recall_score(y_test, preds, zero_division=0)
    auroc = roc_auc_score(y_test, proba)

    print(f"{name:20s} Acc={acc:.4f}  Prec={prec:.4f}  Rec={rec:.4f}  AUROC={auroc:.4f}")

    # Save predictions
    out_df = X_test.copy()
    out_df["bar_passed"] = preds
    out_df["race"] = race_test.values 
    out_df.to_csv(f"Our_TS_LAW_predictions_{name}.csv", index=False)

print("\nSaved prediction CSVs for all models.")



Model Performance (Accuracy, Precision, Recall, AUROC):
---------------------------------------------------------
Decision_Tree        Acc=0.9064  Prec=0.8933  Rec=0.9302  AUROC=0.9056
Logistic_Regression  Acc=0.9540  Prec=0.9556  Rec=0.9556  AUROC=0.9922
Random_Forest        Acc=0.9573  Prec=0.9769  Rec=0.9397  AUROC=0.9911
SVM                  Acc=0.9360  Prec=0.9423  Rec=0.9333  AUROC=0.9824
XGBoost              Acc=0.9442  Prec=0.9607  Rec=0.9302  AUROC=0.9894

Saved prediction CSVs for all models.


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score

# === Load LAW real data only ===
df_law = pd.read_csv("bar_pass_prediction (processed version).csv")  # Real LAW data
df_law["bar_passed"] = df_law["bar_passed"].astype(int)

# === Sensitive attribute ===
sensitive_attr = "race"

# === Convert categorical columns to numeric ===
categorical_columns = [
    "race", "sex", "age", "lsat", "ugpa", "region_first", "region_kirch", "region_midatl",
    "region_neweng", "region_other", "region_pacific", "region_south", "region_southwest", "region_west"
]
for col in categorical_columns:
    if col in df_law.columns:
        df_law[col] = pd.to_numeric(df_law[col], errors="coerce")

df_law = df_law.dropna()

# === Extract sensitive attribute ===
race_law = df_law[sensitive_attr]

# === Train-test split: 70% train, 30% test ===
X_law = df_law.drop(columns=["bar_passed"])
y_law = df_law["bar_passed"]
X_train, X_test, y_train, y_test, race_train, race_test = train_test_split(
    X_law, y_law, race_law, test_size=0.3, random_state=42, stratify=y_law
)

# === Define models ===
models = {
    "Decision_Tree": DecisionTreeClassifier(random_state=42),
    "Logistic_Regression": LogisticRegression(max_iter=1000),
    "Random_Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True),
    "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
}

# === Train, predict, evaluate ===
print("\nModel Performance (Accuracy, Precision, Recall, AUROC):")
print("---------------------------------------------------------")
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    if hasattr(model, "predict_proba"):
        proba = model.predict_proba(X_test)[:, 1]
    elif hasattr(model, "decision_function"):
        proba = model.decision_function(X_test)
    else:
        proba = preds

    acc = accuracy_score(y_test, preds)
    prec = precision_score(y_test, preds, zero_division=0)
    rec = recall_score(y_test, preds, zero_division=0)
    auroc = roc_auc_score(y_test, proba)

    print(f"{name:20s} Acc={acc:.4f}  Prec={prec:.4f}  Rec={rec:.4f}  AUROC={auroc:.4f}")

    # Save predictions
    out_df = X_test.copy()
    out_df["bar_passed"] = preds
    out_df["race"] = race_test.loc[X_test.index].values  # preserve exact row order
    out_df.to_csv(f"LAW_predictions_{name}.csv", index=False)

print("\nSaved prediction CSVs for all models.")



Model Performance (Accuracy, Precision, Recall, AUROC):
---------------------------------------------------------
Decision_Tree        Acc=0.9134  Prec=0.9575  Rec=0.9509  AUROC=0.5876
Logistic_Regression  Acc=0.9493  Prec=0.9523  Rec=0.9965  AUROC=0.8580
Random_Forest        Acc=0.9468  Prec=0.9533  Rec=0.9926  AUROC=0.7983
SVM                  Acc=0.9484  Prec=0.9484  Rec=1.0000  AUROC=0.6191
XGBoost              Acc=0.9465  Prec=0.9538  Rec=0.9916  AUROC=0.8222

Saved prediction CSVs for all models.


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score

# === Load synthetic LAW data ===
df_synthetic = pd.read_csv("generated_data_CLLM_prompt_Law.csv")   # CLLM Synthetic Data for LAW

# === Sensitive attribute ===
sensitive_attr = "race"

# === Decode byte strings in synthetic data if any ===
for col in df_synthetic.columns:
    if df_synthetic[col].dtype == object:
        df_synthetic[col] = df_synthetic[col].apply(lambda x: x.decode("utf-8") if isinstance(x, bytes) else x)

# === Convert categorical columns to numeric ===
categorical_columns = [
    "race", "sex", "age", "lsat", "ugpa", "region_first", "region_kirch", "region_midatl",
    "region_neweng", "region_other", "region_pacific", "region_south", "region_southwest", "region_west"
]
for col in categorical_columns:
    if col in df_synthetic.columns:
        df_synthetic[col] = pd.to_numeric(df_synthetic[col], errors="coerce")

# === Drop missing values ===
df_synthetic = df_synthetic.dropna()

# === Train-test split on synthetic data only ===
X = df_synthetic.drop(columns=["bar_passed"])
y = df_synthetic["bar_passed"]
race = df_synthetic[sensitive_attr]

X_train, X_test, y_train, y_test, race_train, race_test = train_test_split(
    X, y, race, test_size=0.3, random_state=42, stratify=y
)

# === Define models ===
models = {
    "Decision_Tree": DecisionTreeClassifier(random_state=42),
    "Logistic_Regression": LogisticRegression(max_iter=1000),
    "Random_Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True),  # Enable for AUROC
    "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
}

# === Train, predict, evaluate ===
print("\nModel Performance (Accuracy, Precision, Recall, AUROC):")
print("---------------------------------------------------------")
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    # Get prediction probabilities
    if hasattr(model, "predict_proba"):
        proba = model.predict_proba(X_test)[:, 1]
    elif hasattr(model, "decision_function"):
        proba = model.decision_function(X_test)
    else:
        proba = preds  # fallback if unavailable

    acc = accuracy_score(y_test, preds)
    prec = precision_score(y_test, preds, zero_division=0)
    rec = recall_score(y_test, preds, zero_division=0)
    auroc = roc_auc_score(y_test, proba)

    print(f"{name:20s} Acc={acc:.4f}  Prec={prec:.4f}  Rec={rec:.4f}  AUROC={auroc:.4f}")

    # Save predictions
    out_df = X_test.copy()
    out_df["bar_passed"] = preds
    out_df["race"] = race_test.values  # ensures full alignment without NaNs
    out_df.to_csv(f"CLLM_TS_LAW_predictions_{name}.csv", index=False)

print("\nSaved prediction CSVs for all models.")



Model Performance (Accuracy, Precision, Recall, AUROC):
---------------------------------------------------------
Decision_Tree        Acc=0.9300  Prec=0.9434  Rec=0.9259  AUROC=0.9304
Logistic_Regression  Acc=0.9500  Prec=0.9868  Rec=0.9198  AUROC=0.9907
Random_Forest        Acc=0.9467  Prec=0.9803  Rec=0.9198  AUROC=0.9859
SVM                  Acc=0.9067  Prec=0.8941  Rec=0.9383  AUROC=0.9725
XGBoost              Acc=0.9433  Prec=0.9677  Rec=0.9259  AUROC=0.9874

Saved prediction CSVs for all models.


In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score

# === Load dataset ===
df = pd.read_csv("synthetic_law_data_decaf.csv")  # DECaF synthetic LAW data
df["bar_passed"] = df["bar_passed"].astype(int)
sensitive_col = 'race'

# === Prepare features and labels ===
X = df.drop(columns=["bar_passed"])
y = df["bar_passed"]

# === Initialize models ===
models = {
    "Decision_Tree": DecisionTreeClassifier(random_state=42),
    "Logistic_Regression": LogisticRegression(max_iter=1000),
    "Random_Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True),
    "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
}

# === Initialize results storage and save tracking ===
results = {model: {"Accuracy": [], "Precision": [], "Recall": [], "AUROC": []} for model in models}
saved_flags = {model: False for model in models}

# === Repeat evaluation for 25 seeds ===
for seed in range(25):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed)
    race_test = X_test[sensitive_col].values

    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, zero_division=0)
        rec = recall_score(y_test, y_pred, zero_division=0)
        auc = roc_auc_score(y_test, y_prob) if y_prob is not None else np.nan

        results[name]["Accuracy"].append(acc)
        results[name]["Precision"].append(prec)
        results[name]["Recall"].append(rec)
        results[name]["AUROC"].append(auc)

        # Save once per model when recall < 1
        if rec < 1.0 and not saved_flags[name]:
            out_df = X_test.copy()
            out_df[f"{name}_pred"] = y_pred
            out_df["race"] = race_test
            out_df["bar_passed"] = y_test.values
            out_df.to_csv(f"DECAF_predictions_{name}.csv", index=False)
            saved_flags[name] = True

# === Format and print results table only ===
def fmt(metric_list):
    return f"{np.mean(metric_list):.3f} ± {np.std(metric_list):.3f}"

print(f"{'Model':<20} {'Accuracy↑':<15} {'Precision↑':<15} {'Recall↑':<15} {'AUROC↑':<15}")
print("-" * 80)

for name in models:
    acc = fmt(results[name]["Accuracy"])
    p = fmt(results[name]["Precision"])
    r = fmt(results[name]["Recall"])
    a = fmt(results[name]["AUROC"])
    print(f"{name:<20} {acc:<15} {p:<15} {r:<15} {a:<15}")


Model                Accuracy↑       Precision↑      Recall↑         AUROC↑         
--------------------------------------------------------------------------------
Decision_Tree        0.938 ± 0.012   0.969 ± 0.009   0.967 ± 0.014   0.495 ± 0.024  
Logistic_Regression  0.969 ± 0.009   0.969 ± 0.009   1.000 ± 0.000   0.412 ± 0.080  
Random_Forest        0.969 ± 0.009   0.969 ± 0.009   1.000 ± 0.000   0.401 ± 0.102  
SVM                  0.969 ± 0.009   0.969 ± 0.009   1.000 ± 0.000   0.497 ± 0.105  
XGBoost              0.965 ± 0.009   0.969 ± 0.009   0.996 ± 0.003   0.371 ± 0.081  
