In [None]:
import pandas as pd
import numpy as np
from scipy.io import arff
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# === Load and preprocess dataset ===
data, meta = arff.loadarff("compas.arff")    #Compas Real dataset
df = pd.DataFrame(data)

# Decode byte string columns
for col in df.columns:
    if df[col].dtype == object:
        df[col] = df[col].apply(lambda x: x.decode("utf-8") if isinstance(x, bytes) else x)

# Convert binary/categorical columns to int
categorical_columns = [
    "sex", "age_cat_25-45", "age_cat_Greaterthan45", "age_cat_Lessthan25",
    "race_African-American", "race_Caucasian", "c_charge_degree_F", "c_charge_degree_M"
]
for col in categorical_columns:
    df[col] = df[col].astype(int)

# === Split features/labels ===
X = df.drop(columns=["two_year_recid"])
y = df["two_year_recid"].astype(int)
sensitive_col = "race_African-American"

# === Define models ===
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True),
    "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
}

# === Repeat for N seeds
N_REPEATS = 5
results = {model: {"Precision": [], "Recall": [], "AUC": [], "FTU": [], "DP": []} for model in models}

for seed in range(N_REPEATS):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        # === Performance Metrics ===
        results[name]["Precision"].append(precision_score(y_test, y_pred))
        results[name]["Recall"].append(recall_score(y_test, y_pred))
        results[name]["AUC"].append(roc_auc_score(y_test, y_prob) if y_prob is not None else np.nan)

        # === FTU: flip sensitive attribute
        X_test_flipped = X_test.copy()
        if sensitive_col in X_test_flipped.columns:
            X_test_flipped[sensitive_col] = 1 - X_test_flipped[sensitive_col]
            y_pred_flipped = model.predict(X_test_flipped)
            ftu = np.mean(np.abs(y_pred - y_pred_flipped))
        else:
            ftu = np.nan
        results[name]["FTU"].append(ftu)

        # === DP (Demographic Parity)
        if sensitive_col in X_test.columns:
            mask_priv = X_test[sensitive_col] == 1
            mask_unpriv = ~mask_priv
            p_priv = y_pred[mask_priv].mean() if np.any(mask_priv) else 0
            p_unpriv = y_pred[mask_unpriv].mean() if np.any(mask_unpriv) else 0
            dp = abs(p_priv - p_unpriv)
        else:
            dp = np.nan
        results[name]["DP"].append(dp)

# === Format and print results
def format_metric(values):
    return f"{np.mean(values):.3f} ± {np.std(values):.3f}"

# === Print table
print(f"{'Model':<20} {'Precision↑':<15} {'Recall↑':<15} {'AUROC↑':<15} {'FTU↓':<15} {'DP↓':<15}")
print("-" * 95)
for model in models:
    row = [model]
    for metric in ["Precision", "Recall", "AUC", "FTU", "DP"]:
        row.append(format_metric(results[model][metric]))
    print(f"{row[0]:<20} {row[1]:<15} {row[2]:<15} {row[3]:<15} {row[4]:<15} {row[5]:<15}")


Model                Precision↑      Recall↑         AUROC↑          FTU↓            DP↓            
-----------------------------------------------------------------------------------------------
Decision Tree        0.592 ± 0.040   0.518 ± 0.013   0.630 ± 0.021   0.162 ± 0.037   0.161 ± 0.019  
Logistic Regression  0.652 ± 0.033   0.600 ± 0.009   0.720 ± 0.015   0.111 ± 0.020   0.270 ± 0.034  
Random Forest        0.598 ± 0.035   0.597 ± 0.026   0.673 ± 0.025   0.166 ± 0.016   0.167 ± 0.014  
SVM                  0.659 ± 0.040   0.590 ± 0.010   0.721 ± 0.015   0.012 ± 0.003   0.265 ± 0.033  
XGBoost              0.632 ± 0.033   0.577 ± 0.015   0.700 ± 0.023   0.152 ± 0.023   0.202 ± 0.012  


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# === Load and preprocess dataset ===
df = pd.read_csv("generated_data_Our_prompt_COMPAS.csv")   #Our generated dataset

# Convert object columns
for col in df.columns:
    if df[col].dtype == object:
        df[col] = df[col].apply(lambda x: x.decode("utf-8") if isinstance(x, bytes) else x)

# Cast binary/categorical features to int
categorical_columns = [
    "sex", "age_cat_25-45", "age_cat_Greaterthan45", "age_cat_Lessthan25",
    "race_African-American", "race_Caucasian", "c_charge_degree_F", "c_charge_degree_M"
]
for col in categorical_columns:
    df[col] = df[col].astype(int)

# === Define inputs ===
X = df.drop(columns=["two_year_recid"])
y = df["two_year_recid"].astype(int)
sensitive_col = "race_African-American"

# === Models ===
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True),
    "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
}

# === Repeated Evaluation ===
N_REPEATS = 5
results = {model: {"Precision": [], "Recall": [], "AUC": [], "FTU": [], "DP": []} for model in models}

for seed in range(N_REPEATS):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        # --- Main metrics ---
        results[name]["Precision"].append(precision_score(y_test, y_pred))
        results[name]["Recall"].append(recall_score(y_test, y_pred))
        results[name]["AUC"].append(roc_auc_score(y_test, y_prob) if y_prob is not None else np.nan)

        # --- FTU ---
        X_test_flipped = X_test.copy()
        if sensitive_col in X_test_flipped.columns:
            X_test_flipped[sensitive_col] = 1 - X_test_flipped[sensitive_col]
            y_pred_flipped = model.predict(X_test_flipped)
            ftu = np.mean(np.abs(y_pred - y_pred_flipped))
        else:
            ftu = np.nan
        results[name]["FTU"].append(ftu)

        # --- DP ---
        if sensitive_col in X_test.columns:
            mask_priv = X_test[sensitive_col] == 1
            mask_unpriv = ~mask_priv
            p_priv = y_pred[mask_priv].mean() if np.any(mask_priv) else 0
            p_unpriv = y_pred[mask_unpriv].mean() if np.any(mask_unpriv) else 0
            dp = abs(p_priv - p_unpriv)
        else:
            dp = np.nan
        results[name]["DP"].append(dp)

# === Print final table ===
def format_metric(values):
    return f"{np.mean(values):.3f} ± {np.std(values):.3f}"

print(f"{'Model':<20} {'Precision↑':<15} {'Recall↑':<15} {'AUROC↑':<15} {'FTU↓':<15} {'DP↓':<15}")
print("-" * 95)
for model in models:
    row = [model]
    for metric in ["Precision", "Recall", "AUC", "FTU", "DP"]:
        row.append(format_metric(results[model][metric]))
    print(f"{row[0]:<20} {row[1]:<15} {row[2]:<15} {row[3]:<15} {row[4]:<15} {row[5]:<15}")


Model                Precision↑      Recall↑         AUROC↑          FTU↓            DP↓            
-----------------------------------------------------------------------------------------------
Decision Tree        0.845 ± 0.020   0.775 ± 0.032   0.817 ± 0.018   0.060 ± 0.035   0.064 ± 0.029  
Logistic Regression  0.846 ± 0.011   0.841 ± 0.019   0.916 ± 0.006   0.014 ± 0.010   0.063 ± 0.040  
Random Forest        0.852 ± 0.026   0.840 ± 0.025   0.928 ± 0.008   0.078 ± 0.018   0.053 ± 0.022  
SVM                  0.863 ± 0.008   0.834 ± 0.025   0.926 ± 0.005   0.008 ± 0.012   0.032 ± 0.034  
XGBoost              0.850 ± 0.023   0.829 ± 0.019   0.933 ± 0.005   0.083 ± 0.020   0.054 ± 0.034  


In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# === Load and preprocess dataset ===
df = pd.read_csv("compas_synthetic_data_1000_200_epochs.csv")  # DECAF generated dataset

# Convert object columns
for col in df.columns:
    if df[col].dtype == object:
        df[col] = df[col].apply(lambda x: x.decode("utf-8") if isinstance(x, bytes) else x)

# Cast binary/categorical features to int
categorical_columns = [
    "sex", "age_cat_25-45", "age_cat_Greaterthan45", "age_cat_Lessthan25",
    "race_African-American", "race_Caucasian", "c_charge_degree_F", "c_charge_degree_M"
]
for col in categorical_columns:
    df[col] = df[col].astype(int)

# === Define inputs ===
X = df.drop(columns=["two_year_recid"])
y = df["two_year_recid"].astype(int)
sensitive_col = "race_African-American"

# === Models ===
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True),
    "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
}

# === Repeated Evaluation ===
N_REPEATS = 5
results = {model: {"Precision": [], "Recall": [], "AUC": [], "FTU": [], "DP": []} for model in models}

for seed in range(N_REPEATS):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        # --- Main metrics ---
        results[name]["Precision"].append(precision_score(y_test, y_pred))
        results[name]["Recall"].append(recall_score(y_test, y_pred))
        results[name]["AUC"].append(roc_auc_score(y_test, y_prob) if y_prob is not None else np.nan)

        # --- FTU ---
        X_test_flipped = X_test.copy()
        if sensitive_col in X_test_flipped.columns:
            X_test_flipped[sensitive_col] = 1 - X_test_flipped[sensitive_col]
            y_pred_flipped = model.predict(X_test_flipped)
            ftu = np.mean(np.abs(y_pred - y_pred_flipped))
        else:
            ftu = np.nan
        results[name]["FTU"].append(ftu)

        # --- DP ---
        if sensitive_col in X_test.columns:
            mask_priv = X_test[sensitive_col] == 1
            mask_unpriv = ~mask_priv
            p_priv = y_pred[mask_priv].mean() if np.any(mask_priv) else 0
            p_unpriv = y_pred[mask_unpriv].mean() if np.any(mask_unpriv) else 0
            dp = abs(p_priv - p_unpriv)
        else:
            dp = np.nan
        results[name]["DP"].append(dp)

# === Print final table ===
def format_metric(values):
    return f"{np.mean(values):.3f} ± {np.std(values):.3f}"

print(f"{'Model':<20} {'Precision↑':<15} {'Recall↑':<15} {'AUROC↑':<15} {'FTU↓':<15} {'DP↓':<15}")
print("-" * 95)
for model in models:
    row = [model]
    for metric in ["Precision", "Recall", "AUC", "FTU", "DP"]:
        row.append(format_metric(results[model][metric]))
    print(f"{row[0]:<20} {row[1]:<15} {row[2]:<15} {row[3]:<15} {row[4]:<15} {row[5]:<15}")


Model                Precision↑      Recall↑         AUROC↑          FTU↓            DP↓            
-----------------------------------------------------------------------------------------------
Decision Tree        0.589 ± 0.023   0.571 ± 0.040   0.556 ± 0.019   0.041 ± 0.022   0.083 ± 0.041  
Logistic Regression  0.632 ± 0.032   0.603 ± 0.046   0.632 ± 0.020   0.071 ± 0.046   0.102 ± 0.044  
Random Forest        0.569 ± 0.024   0.569 ± 0.020   0.570 ± 0.034   0.215 ± 0.017   0.080 ± 0.051  
SVM                  0.664 ± 0.029   0.398 ± 0.027   0.628 ± 0.022   0.004 ± 0.006   0.049 ± 0.038  
XGBoost              0.587 ± 0.029   0.563 ± 0.031   0.593 ± 0.027   0.175 ± 0.023   0.073 ± 0.021  


In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# === Load and preprocess dataset ===
df = pd.read_csv("generated_data_CLLM_prompt_COMPAS.csv")  # CLLM generated dataset

# Convert object columns
for col in df.columns:
    if df[col].dtype == object:
        df[col] = df[col].apply(lambda x: x.decode("utf-8") if isinstance(x, bytes) else x)

# Cast binary/categorical features to int
categorical_columns = [
    "sex", "age_cat_25-45", "age_cat_Greaterthan45", "age_cat_Lessthan25",
    "race_African-American", "race_Caucasian", "c_charge_degree_F", "c_charge_degree_M"
]
for col in categorical_columns:
    df[col] = df[col].astype(int)

# === Define inputs ===
X = df.drop(columns=["two_year_recid"])
y = df["two_year_recid"].astype(int)
sensitive_col = "race_African-American"

# === Models ===
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True),
    "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
}

# === Repeated Evaluation ===
N_REPEATS = 5
results = {model: {"Precision": [], "Recall": [], "AUC": [], "FTU": [], "DP": []} for model in models}

for seed in range(N_REPEATS):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        # --- Main metrics ---
        results[name]["Precision"].append(precision_score(y_test, y_pred))
        results[name]["Recall"].append(recall_score(y_test, y_pred))
        results[name]["AUC"].append(roc_auc_score(y_test, y_prob) if y_prob is not None else np.nan)

        # --- FTU ---
        X_test_flipped = X_test.copy()
        if sensitive_col in X_test_flipped.columns:
            X_test_flipped[sensitive_col] = 1 - X_test_flipped[sensitive_col]
            y_pred_flipped = model.predict(X_test_flipped)
            ftu = np.mean(np.abs(y_pred - y_pred_flipped))
        else:
            ftu = np.nan
        results[name]["FTU"].append(ftu)

        # --- DP ---
        if sensitive_col in X_test.columns:
            mask_priv = X_test[sensitive_col] == 1
            mask_unpriv = ~mask_priv
            p_priv = y_pred[mask_priv].mean() if np.any(mask_priv) else 0
            p_unpriv = y_pred[mask_unpriv].mean() if np.any(mask_unpriv) else 0
            dp = abs(p_priv - p_unpriv)
        else:
            dp = np.nan
        results[name]["DP"].append(dp)

# === Print final table ===
def format_metric(values):
    return f"{np.mean(values):.3f} ± {np.std(values):.3f}"

print(f"{'Model':<20} {'Precision↑':<15} {'Recall↑':<15} {'AUROC↑':<15} {'FTU↓':<15} {'DP↓':<15}")
print("-" * 95)
for model in models:
    row = [model]
    for metric in ["Precision", "Recall", "AUC", "FTU", "DP"]:
        row.append(format_metric(results[model][metric]))
    print(f"{row[0]:<20} {row[1]:<15} {row[2]:<15} {row[3]:<15} {row[4]:<15} {row[5]:<15}")


Model                Precision↑      Recall↑         AUROC↑          FTU↓            DP↓            
-----------------------------------------------------------------------------------------------
Decision Tree        0.866 ± 0.034   0.869 ± 0.039   0.868 ± 0.029   0.027 ± 0.015   0.126 ± 0.067  
Logistic Regression  0.884 ± 0.026   0.900 ± 0.035   0.944 ± 0.005   0.006 ± 0.012   0.112 ± 0.054  
Random Forest        0.866 ± 0.021   0.912 ± 0.032   0.948 ± 0.009   0.044 ± 0.016   0.111 ± 0.072  
SVM                  0.803 ± 0.026   0.878 ± 0.033   0.919 ± 0.015   0.005 ± 0.005   0.122 ± 0.043  
XGBoost              0.881 ± 0.033   0.908 ± 0.031   0.960 ± 0.009   0.051 ± 0.022   0.107 ± 0.063  
