In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# === Load dataset ===
df = pd.read_csv("bar_pass_prediction (processed version).csv")    #Original dataset
df["bar_passed"] = df["bar_passed"].astype(int)
sensitive_col = 'race'  # Change if needed

# === Prepare features and labels ===
X = df.drop(columns=["bar_passed"])
y = df["bar_passed"]

# === Initialize models ===
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True),
    "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
}

# === Repeated Evaluation (for mean ± std) ===
N_REPEATS = 5
results = {model: {"Precision": [], "Recall": [], "AUC": [], "FTU": [], "DP": []} for model in models}

for seed in range(N_REPEATS):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        # Save metrics
        results[name]["Precision"].append(precision_score(y_test, y_pred))
        results[name]["Recall"].append(recall_score(y_test, y_pred))
        results[name]["AUC"].append(roc_auc_score(y_test, y_prob) if y_prob is not None else np.nan)

        # === FTU ===
        X_test_flipped = X_test.copy()
        if sensitive_col in X_test_flipped.columns:
            X_test_flipped[sensitive_col] = 1 - X_test_flipped[sensitive_col]
            y_pred_flipped = model.predict(X_test_flipped)
            ftu = np.mean(np.abs(y_pred - y_pred_flipped))
        else:
            ftu = np.nan
        results[name]["FTU"].append(ftu)

        # === DP (Demographic Parity) ===
        if sensitive_col in X_test.columns:
            mask_priv = X_test[sensitive_col] == 7
            mask_unpriv = ~mask_priv
            p_priv = y_pred[mask_priv].mean() if np.any(mask_priv) else 0
            p_unpriv = y_pred[mask_unpriv].mean() if np.any(mask_unpriv) else 0
            dp = abs(p_priv - p_unpriv)
        else:
            dp = np.nan
        results[name]["DP"].append(dp)

# === Format Results Table ===
def format_metric(values):
    mean = np.mean(values)
    std = np.std(values)
    return f"{mean:.3f} ± {std:.3f}"

# Print Header
print(f"{'Model':<20} {'Precision↑':<15} {'Recall↑':<15} {'AUROC↑':<15} {'FTU↓':<15} {'DP↓':<15}")
print("-" * 95)

# Print formatted table
for model in models:
    row = [model]
    for metric in ["Precision", "Recall", "AUC", "FTU", "DP"]:
        row.append(format_metric(results[model][metric]))
    print(f"{row[0]:<20} {row[1]:<15} {row[2]:<15} {row[3]:<15} {row[4]:<15} {row[5]:<15}")


Model                Precision↑      Recall↑         AUROC↑          FTU↓            DP↓            
-----------------------------------------------------------------------------------------------
Decision Tree        0.958 ± 0.002   0.946 ± 0.003   0.583 ± 0.007   0.083 ± 0.023   0.132 ± 0.009  
Logistic Regression  0.953 ± 0.003   0.996 ± 0.000   0.855 ± 0.005   0.008 ± 0.002   0.049 ± 0.004  
Random Forest        0.955 ± 0.003   0.991 ± 0.002   0.807 ± 0.008   0.010 ± 0.002   0.067 ± 0.008  
SVM                  0.949 ± 0.003   1.000 ± 0.000   0.565 ± 0.055   0.000 ± 0.000   0.000 ± 0.000  
XGBoost              0.955 ± 0.003   0.990 ± 0.001   0.826 ± 0.006   0.010 ± 0.001   0.074 ± 0.007  


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# === Load dataset ===
df = pd.read_csv("generated_data_Our_prompts_Law.csv")    # Our prompt with Law data
df["bar_passed"] = df["bar_passed"].astype(int)
sensitive_col = 'race'  # Update this if the sensitive feature is different

# === Prepare features and labels ===
X = df.drop(columns=["bar_passed"])
y = df["bar_passed"]

# === Initialize models ===
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True),
    "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
}

# === Repeated Evaluation (for mean ± std) ===
N_REPEATS = 5
results = {model: {"Precision": [], "Recall": [], "AUC": [], "FTU": [], "DP": []} for model in models}

for seed in range(N_REPEATS):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        # Save metrics
        results[name]["Precision"].append(precision_score(y_test, y_pred))
        results[name]["Recall"].append(recall_score(y_test, y_pred))
        results[name]["AUC"].append(roc_auc_score(y_test, y_prob) if y_prob is not None else np.nan)

        # === FTU ===
        X_test_flipped = X_test.copy()
        if sensitive_col in X_test_flipped.columns:
            X_test_flipped[sensitive_col] = 1 - X_test_flipped[sensitive_col]
            y_pred_flipped = model.predict(X_test_flipped)
            ftu = np.mean(np.abs(y_pred - y_pred_flipped))
        else:
            ftu = np.nan
        results[name]["FTU"].append(ftu)

        # === DP (Demographic Parity) ===
        if sensitive_col in X_test.columns:
            mask_priv = X_test[sensitive_col] == 7
            mask_unpriv = ~mask_priv
            p_priv = y_pred[mask_priv].mean() if np.any(mask_priv) else 0
            p_unpriv = y_pred[mask_unpriv].mean() if np.any(mask_unpriv) else 0
            dp = abs(p_priv - p_unpriv)
        else:
            dp = np.nan
        results[name]["DP"].append(dp)

# === Format Results Table ===
def format_metric(values):
    mean = np.mean(values)
    std = np.std(values)
    return f"{mean:.3f} ± {std:.3f}"

# Print Header
print(f"{'Model':<20} {'Precision↑':<15} {'Recall↑':<15} {'AUROC↑':<15} {'FTU↓':<15} {'DP↓':<15}")
print("-" * 95)

# Print formatted table
for model in models:
    row = [model]
    for metric in ["Precision", "Recall", "AUC", "FTU", "DP"]:
        row.append(format_metric(results[model][metric]))
    print(f"{row[0]:<20} {row[1]:<15} {row[2]:<15} {row[3]:<15} {row[4]:<15} {row[5]:<15}")


Model                Precision↑      Recall↑         AUROC↑          FTU↓            DP↓            
-----------------------------------------------------------------------------------------------
Decision Tree        0.906 ± 0.023   0.926 ± 0.020   0.914 ± 0.012   0.056 ± 0.018   0.051 ± 0.021  
Logistic Regression  0.958 ± 0.015   0.937 ± 0.017   0.988 ± 0.004   0.042 ± 0.010   0.059 ± 0.026  
Random Forest        0.958 ± 0.020   0.931 ± 0.016   0.985 ± 0.005   0.022 ± 0.007   0.057 ± 0.022  
SVM                  0.941 ± 0.022   0.916 ± 0.018   0.982 ± 0.005   0.010 ± 0.004   0.048 ± 0.027  
XGBoost              0.942 ± 0.020   0.929 ± 0.020   0.983 ± 0.006   0.032 ± 0.010   0.058 ± 0.026  


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# === Load dataset ===
df = pd.read_csv("synthetic_law_data_decaf.csv")    # DECAF Generated Data  for Law data
df["bar_passed"] = df["bar_passed"].astype(int)
sensitive_col = 'race'  # Update this if the sensitive feature is different

# === Prepare features and labels ===
X = df.drop(columns=["bar_passed"])
y = df["bar_passed"]

# === Initialize models ===
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True),
    "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
}

# === Repeated Evaluation (for mean ± std) ===
N_REPEATS = 5
results = {model: {"Precision": [], "Recall": [], "AUC": [], "FTU": [], "DP": []} for model in models}

for seed in range(N_REPEATS):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        # Save metrics
        results[name]["Precision"].append(precision_score(y_test, y_pred))
        results[name]["Recall"].append(recall_score(y_test, y_pred))
        results[name]["AUC"].append(roc_auc_score(y_test, y_prob) if y_prob is not None else np.nan)

        # === FTU ===
        X_test_flipped = X_test.copy()
        if sensitive_col in X_test_flipped.columns:
            X_test_flipped[sensitive_col] = 1 - X_test_flipped[sensitive_col]
            y_pred_flipped = model.predict(X_test_flipped)
            ftu = np.mean(np.abs(y_pred - y_pred_flipped))
        else:
            ftu = np.nan
        results[name]["FTU"].append(ftu)

        # === DP (Demographic Parity) ===
        if sensitive_col in X_test.columns:
            mask_priv = X_test[sensitive_col] == 7
            mask_unpriv = ~mask_priv
            p_priv = y_pred[mask_priv].mean() if np.any(mask_priv) else 0
            p_unpriv = y_pred[mask_unpriv].mean() if np.any(mask_unpriv) else 0
            dp = abs(p_priv - p_unpriv)
        else:
            dp = np.nan
        results[name]["DP"].append(dp)

# === Format Results Table ===
def format_metric(values):
    mean = np.mean(values)
    std = np.std(values)
    return f"{mean:.3f} ± {std:.3f}"

# Print Header
print(f"{'Model':<20} {'Precision↑':<15} {'Recall↑':<15} {'AUROC↑':<15} {'FTU↓':<15} {'DP↓':<15}")
print("-" * 95)

# Print formatted table
for model in models:
    row = [model]
    for metric in ["Precision", "Recall", "AUC", "FTU", "DP"]:
        row.append(format_metric(results[model][metric]))
    print(f"{row[0]:<20} {row[1]:<15} {row[2]:<15} {row[3]:<15} {row[4]:<15} {row[5]:<15}")


Model                Precision↑      Recall↑         AUROC↑          FTU↓            DP↓            
-----------------------------------------------------------------------------------------------
Decision Tree        0.969 ± 0.006   0.975 ± 0.004   0.513 ± 0.030   0.013 ± 0.013   0.040 ± 0.044  
Logistic Regression  0.968 ± 0.007   1.000 ± 0.000   0.453 ± 0.142   0.000 ± 0.000   0.000 ± 0.000  
Random Forest        0.968 ± 0.007   1.000 ± 0.000   0.454 ± 0.172   0.000 ± 0.000   0.000 ± 0.000  
SVM                  0.968 ± 0.007   1.000 ± 0.000   0.404 ± 0.185   0.000 ± 0.000   0.000 ± 0.000  
XGBoost              0.968 ± 0.008   0.997 ± 0.003   0.479 ± 0.067   0.001 ± 0.002   0.018 ± 0.015  


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# === Load dataset ===
df = pd.read_csv("generated_data_CLLM_prompt_Law.csv")    # CLLM Generated Data  for Law data
df["bar_passed"] = df["bar_passed"].astype(int)
sensitive_col = 'race'  # Update this if the sensitive feature is different

# === Prepare features and labels ===
X = df.drop(columns=["bar_passed"])
y = df["bar_passed"]

# === Initialize models ===
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True),
    "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
}

# === Repeated Evaluation (for mean ± std) ===
N_REPEATS = 5
results = {model: {"Precision": [], "Recall": [], "AUC": [], "FTU": [], "DP": []} for model in models}

for seed in range(N_REPEATS):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        # Save metrics
        results[name]["Precision"].append(precision_score(y_test, y_pred))
        results[name]["Recall"].append(recall_score(y_test, y_pred))
        results[name]["AUC"].append(roc_auc_score(y_test, y_prob) if y_prob is not None else np.nan)

        # === FTU ===
        X_test_flipped = X_test.copy()
        if sensitive_col in X_test_flipped.columns:
            X_test_flipped[sensitive_col] = 1 - X_test_flipped[sensitive_col]
            y_pred_flipped = model.predict(X_test_flipped)
            ftu = np.mean(np.abs(y_pred - y_pred_flipped))
        else:
            ftu = np.nan
        results[name]["FTU"].append(ftu)

        # === DP (Demographic Parity) ===
        if sensitive_col in X_test.columns:
            mask_priv = X_test[sensitive_col] == 7
            mask_unpriv = ~mask_priv
            p_priv = y_pred[mask_priv].mean() if np.any(mask_priv) else 0
            p_unpriv = y_pred[mask_unpriv].mean() if np.any(mask_unpriv) else 0
            dp = abs(p_priv - p_unpriv)
        else:
            dp = np.nan
        results[name]["DP"].append(dp)

# === Format Results Table ===
def format_metric(values):
    mean = np.mean(values)
    std = np.std(values)
    return f"{mean:.3f} ± {std:.3f}"

# Print Header
print(f"{'Model':<20} {'Precision↑':<15} {'Recall↑':<15} {'AUROC↑':<15} {'FTU↓':<15} {'DP↓':<15}")
print("-" * 95)

# Print formatted table
for model in models:
    row = [model]
    for metric in ["Precision", "Recall", "AUC", "FTU", "DP"]:
        row.append(format_metric(results[model][metric]))
    print(f"{row[0]:<20} {row[1]:<15} {row[2]:<15} {row[3]:<15} {row[4]:<15} {row[5]:<15}")


Model                Precision↑      Recall↑         AUROC↑          FTU↓            DP↓            
-----------------------------------------------------------------------------------------------
Decision Tree        0.912 ± 0.041   0.909 ± 0.018   0.902 ± 0.022   0.040 ± 0.015   0.115 ± 0.107  
Logistic Regression  0.926 ± 0.033   0.927 ± 0.011   0.983 ± 0.004   0.013 ± 0.012   0.105 ± 0.117  
Random Forest        0.926 ± 0.030   0.921 ± 0.032   0.981 ± 0.003   0.025 ± 0.013   0.111 ± 0.095  
SVM                  0.866 ± 0.050   0.907 ± 0.014   0.951 ± 0.011   0.006 ± 0.006   0.074 ± 0.069  
XGBoost              0.918 ± 0.048   0.929 ± 0.025   0.980 ± 0.004   0.039 ± 0.006   0.139 ± 0.079  
