In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, roc_curve, auc
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from fairlearn.preprocessing import CorrelationRemover
from aif360.algorithms.preprocessing import DisparateImpactRemover, Reweighing
from aif360.datasets import StandardDataset

# === Load dataset ===
df = pd.read_csv("bar_pass_prediction (processed version).csv")
df["bar_passed"] = df["bar_passed"].astype(int)

# === Train-test split ===
X = df.drop(columns=["bar_passed"])
y = df["bar_passed"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
y_test = y_test.reset_index(drop=True)
race_test = X_test["race"].reset_index(drop=True)

# === Model training function ===
def train_models(X_train, y_train, sample_weight=None):
    models = {
        "Decision Tree": DecisionTreeClassifier(random_state=42),
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
        "SVM": SVC(probability=True),
        "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
    }
    for name, model in models.items():
        if sample_weight is not None:
            model.fit(X_train, y_train, sample_weight=sample_weight)
        else:
            model.fit(X_train, y_train)
    return models

# === Fairness metric evaluation ===
def evaluate_fairness(models, X_test, y_test, race_test, title):
    print(f"\nFairness Metrics: {title}")
    print("--------------------------------------------------------------------------")
    print(f"{'Model':<20} {'ABROCA':<10} {'ERD':<10} {'TPRD':<10} {'Fairness':<10}")
    print("--------------------------------------------------------------------------")

    for name, model in models.items():
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        group_priv = race_test == 7
        group_unpriv = race_test != 7

        err_priv = np.mean(y_pred[group_priv] != y_test[group_priv])
        err_unpriv = np.mean(y_pred[group_unpriv] != y_test[group_unpriv])
        erd = err_unpriv - err_priv

        def tpr(y_true, y_pred):
            tp = np.sum((y_true == 1) & (y_pred == 1))
            fn = np.sum((y_true == 1) & (y_pred == 0))
            return tp / (tp + fn) if (tp + fn) > 0 else 0

        tpr_priv = tpr(y_test[group_priv], y_pred[group_priv])
        tpr_unpriv = tpr(y_test[group_unpriv], y_pred[group_unpriv])
        tprd = tpr_unpriv - tpr_priv

        abroca = np.nan
        if y_prob is not None:
            fpr_priv, tpr_priv_vals, _ = roc_curve(y_test[group_priv], y_prob[group_priv])
            fpr_unpriv, tpr_unpriv_vals, _ = roc_curve(y_test[group_unpriv], y_prob[group_unpriv])
            auc_priv = auc(fpr_priv, tpr_priv_vals)
            auc_unpriv = auc(fpr_unpriv, tpr_unpriv_vals)
            abroca = abs(auc_priv - auc_unpriv)

        fairness = (3 - abs(abroca) - abs(erd) - abs(tprd)) / 3 if not np.isnan(abroca) else float("nan")

        print(f"{name:<20} {abroca:<10.4f} {erd:<10.4f} {tprd:<10.4f} {fairness:<10.4f}")

# === 0. No Mitigation (Original) ===
models_abs = train_models(X_train, y_train)
evaluate_fairness(models_abs, X_test, y_test, race_test, "No Mitigation")

# === 1. Suppression (Drop 'race') ===
X_train_sup = X_train.drop(columns=["race"])
X_test_sup = X_test.drop(columns=["race"])
models_sup = train_models(X_train_sup, y_train)
evaluate_fairness(models_sup, X_test_sup, y_test, race_test, "Suppression")

# === 2. Correlation Remover ===
cr = CorrelationRemover(sensitive_feature_ids=["race"])
cr.fit(X_train)
X_train_cr = cr.transform(X_train)
X_test_cr = cr.transform(X_test)
models_cr = train_models(X_train_cr, y_train)
evaluate_fairness(models_cr, X_test_cr, y_test, race_test, "Correlation Remover")

# === 3. Disparate Impact Remover ===
df_train_dir = pd.concat([X_train, y_train.rename("bar_passed")], axis=1)
dataset_dir = StandardDataset(df_train_dir,
                               label_name="bar_passed",
                               favorable_classes=[1],
                               protected_attribute_names=["race"],
                               privileged_classes=[[7]])
dir = DisparateImpactRemover(repair_level=1.0)
dataset_dir_transformed = dir.fit_transform(dataset_dir)
X_train_dir = pd.DataFrame(dataset_dir_transformed.features, columns=X_train.columns)
models_dir = train_models(X_train_dir, y_train)
evaluate_fairness(models_dir, X_test, y_test, race_test, "Disparate Impact Remover")

# === 4. Reweighing ===
dataset_rw = StandardDataset(df_train_dir,
                              label_name="bar_passed",
                              favorable_classes=[1],
                              protected_attribute_names=["race"],
                              privileged_classes=[[7]])
rw = Reweighing(privileged_groups=[{"race": 7}],
                unprivileged_groups=[{"race": i} for i in range(1, 9) if i != 7])
dataset_rw_transformed = rw.fit_transform(dataset_rw)
X_train_rw = pd.DataFrame(dataset_rw_transformed.features, columns=X_train.columns)
y_train_rw = dataset_rw_transformed.labels.ravel()
models_rw = train_models(X_train_rw, y_train_rw, sample_weight=dataset_rw_transformed.instance_weights)
evaluate_fairness(models_rw, X_test, y_test, race_test, "Reweighing")


  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
pip install 'aif360[inFairness]'



Fairness Metrics: No Mitigation
--------------------------------------------------------------------------
Model                ABROCA     ERD        TPRD       Fairness  
--------------------------------------------------------------------------
Decision Tree        0.0897     0.1534     -0.1190    0.8793    
Logistic Regression  0.0328     0.1097     -0.0343    0.9411    
Random Forest        0.0012     0.1076     -0.0382    0.9510    
SVM                  0.1003     0.1049     0.0000     0.9316    
XGBoost              0.0353     0.1158     -0.0482    0.9336    

Fairness Metrics: Suppression
--------------------------------------------------------------------------
Model                ABROCA     ERD        TPRD       Fairness  
--------------------------------------------------------------------------
Decision Tree        0.0644     0.1439     -0.1021    0.8965    
Logistic Regression  0.0324     0.1088     -0.0297    0.9430    
Random Forest        0.0035     0.1108     -0.0499 

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, roc_curve, auc
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from fairlearn.preprocessing import CorrelationRemover
from aif360.algorithms.preprocessing import DisparateImpactRemover, Reweighing
from aif360.datasets import StandardDataset

# === Load dataset ===
df = pd.read_csv("generated_data_CLLM_prompt_Law.csv")
df["bar_passed"] = df["bar_passed"].astype(int)

# === Train-test split ===
X = df.drop(columns=["bar_passed"])
y = df["bar_passed"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
y_test = y_test.reset_index(drop=True)
race_test = X_test["race"].reset_index(drop=True)

# === Model training function ===
def train_models(X_train, y_train, sample_weight=None):
    models = {
        "Decision Tree": DecisionTreeClassifier(random_state=42),
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
        "SVM": SVC(probability=True),
        "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
    }
    for name, model in models.items():
        if sample_weight is not None:
            model.fit(X_train, y_train, sample_weight=sample_weight)
        else:
            model.fit(X_train, y_train)
    return models

# === Fairness metric evaluation ===
def evaluate_fairness(models, X_test, y_test, race_test, title):
    print(f"\nFairness Metrics: {title}")
    print("--------------------------------------------------------------------------")
    print(f"{'Model':<20} {'ABROCA':<10} {'ERD':<10} {'TPRD':<10} {'Fairness':<10}")
    print("--------------------------------------------------------------------------")

    for name, model in models.items():
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        group_priv = race_test == 7
        group_unpriv = race_test != 7

        err_priv = np.mean(y_pred[group_priv] != y_test[group_priv])
        err_unpriv = np.mean(y_pred[group_unpriv] != y_test[group_unpriv])
        erd = err_unpriv - err_priv

        def tpr(y_true, y_pred):
            tp = np.sum((y_true == 1) & (y_pred == 1))
            fn = np.sum((y_true == 1) & (y_pred == 0))
            return tp / (tp + fn) if (tp + fn) > 0 else 0

        tpr_priv = tpr(y_test[group_priv], y_pred[group_priv])
        tpr_unpriv = tpr(y_test[group_unpriv], y_pred[group_unpriv])
        tprd = tpr_unpriv - tpr_priv

        abroca = np.nan
        if y_prob is not None:
            fpr_priv, tpr_priv_vals, _ = roc_curve(y_test[group_priv], y_prob[group_priv])
            fpr_unpriv, tpr_unpriv_vals, _ = roc_curve(y_test[group_unpriv], y_prob[group_unpriv])
            auc_priv = auc(fpr_priv, tpr_priv_vals)
            auc_unpriv = auc(fpr_unpriv, tpr_unpriv_vals)
            abroca = abs(auc_priv - auc_unpriv)

        fairness = (3 - abs(abroca) - abs(erd) - abs(tprd)) / 3 if not np.isnan(abroca) else float("nan")

        print(f"{name:<20} {abroca:<10.4f} {erd:<10.4f} {tprd:<10.4f} {fairness:<10.4f}")

# === 0. No Mitigation (Original) ===
models_abs = train_models(X_train, y_train)
evaluate_fairness(models_abs, X_test, y_test, race_test, "No Mitigation")

# === 1. Suppression (Drop 'race') ===
X_train_sup = X_train.drop(columns=["race"])
X_test_sup = X_test.drop(columns=["race"])
models_sup = train_models(X_train_sup, y_train)
evaluate_fairness(models_sup, X_test_sup, y_test, race_test, "Suppression")

# === 2. Correlation Remover ===
cr = CorrelationRemover(sensitive_feature_ids=["race"])
cr.fit(X_train)
X_train_cr = cr.transform(X_train)
X_test_cr = cr.transform(X_test)
models_cr = train_models(X_train_cr, y_train)
evaluate_fairness(models_cr, X_test_cr, y_test, race_test, "Correlation Remover")

# === 3. Disparate Impact Remover ===
df_train_dir = pd.concat([X_train, y_train.rename("bar_passed")], axis=1)
dataset_dir = StandardDataset(df_train_dir,
                               label_name="bar_passed",
                               favorable_classes=[1],
                               protected_attribute_names=["race"],
                               privileged_classes=[[7]])
dir = DisparateImpactRemover(repair_level=1.0)
dataset_dir_transformed = dir.fit_transform(dataset_dir)
X_train_dir = pd.DataFrame(dataset_dir_transformed.features, columns=X_train.columns)
models_dir = train_models(X_train_dir, y_train)
evaluate_fairness(models_dir, X_test, y_test, race_test, "Disparate Impact Remover")

# === 4. Reweighing ===
dataset_rw = StandardDataset(df_train_dir,
                              label_name="bar_passed",
                              favorable_classes=[1],
                              protected_attribute_names=["race"],
                              privileged_classes=[[7]])
rw = Reweighing(privileged_groups=[{"race": 7}],
                unprivileged_groups=[{"race": i} for i in range(1, 9) if i != 7])
dataset_rw_transformed = rw.fit_transform(dataset_rw)
X_train_rw = pd.DataFrame(dataset_rw_transformed.features, columns=X_train.columns)
y_train_rw = dataset_rw_transformed.labels.ravel()
models_rw = train_models(X_train_rw, y_train_rw, sample_weight=dataset_rw_transformed.instance_weights)
evaluate_fairness(models_rw, X_test, y_test, race_test, "Reweighing")



Fairness Metrics: No Mitigation
--------------------------------------------------------------------------
Model                ABROCA     ERD        TPRD       Fairness  
--------------------------------------------------------------------------
Decision Tree        0.0418     0.0507     -0.0638    0.9479    
Logistic Regression  0.0075     0.0280     -0.0851    0.9598    
Random Forest        0.0122     0.0243     -0.0638    0.9666    
SVM                  0.0042     0.0464     -0.0226    0.9756    
XGBoost              0.0176     0.0394     -0.0780    0.9550    

Fairness Metrics: Suppression
--------------------------------------------------------------------------
Model                ABROCA     ERD        TPRD       Fairness  
--------------------------------------------------------------------------
Decision Tree        0.0283     0.0394     -0.0851    0.9491    
Logistic Regression  0.0072     -0.0005    -0.0851    0.9691    
Random Forest        0.0020     0.0167     -0.0567 

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, roc_curve, auc
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from fairlearn.preprocessing import CorrelationRemover
from aif360.algorithms.preprocessing import DisparateImpactRemover, Reweighing
from aif360.datasets import StandardDataset

# === Load dataset ===
df = pd.read_csv("generated_data_Our_prompts_Law.csv")
df["bar_passed"] = df["bar_passed"].astype(int)

# === Train-test split ===
X = df.drop(columns=["bar_passed"])
y = df["bar_passed"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
y_test = y_test.reset_index(drop=True)
race_test = X_test["race"].reset_index(drop=True)

# === Model training function ===
def train_models(X_train, y_train, sample_weight=None):
    models = {
        "Decision Tree": DecisionTreeClassifier(random_state=42),
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
        "SVM": SVC(probability=True),
        "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
    }
    for name, model in models.items():
        if sample_weight is not None:
            model.fit(X_train, y_train, sample_weight=sample_weight)
        else:
            model.fit(X_train, y_train)
    return models

# === Fairness metric evaluation ===
def evaluate_fairness(models, X_test, y_test, race_test, title):
    print(f"\nFairness Metrics: {title}")
    print("--------------------------------------------------------------------------")
    print(f"{'Model':<20} {'ABROCA':<10} {'ERD':<10} {'TPRD':<10} {'Fairness':<10}")
    print("--------------------------------------------------------------------------")

    for name, model in models.items():
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        group_priv = race_test == 7
        group_unpriv = race_test != 7

        err_priv = np.mean(y_pred[group_priv] != y_test[group_priv])
        err_unpriv = np.mean(y_pred[group_unpriv] != y_test[group_unpriv])
        erd = err_unpriv - err_priv

        def tpr(y_true, y_pred):
            tp = np.sum((y_true == 1) & (y_pred == 1))
            fn = np.sum((y_true == 1) & (y_pred == 0))
            return tp / (tp + fn) if (tp + fn) > 0 else 0

        tpr_priv = tpr(y_test[group_priv], y_pred[group_priv])
        tpr_unpriv = tpr(y_test[group_unpriv], y_pred[group_unpriv])
        tprd = tpr_unpriv - tpr_priv

        abroca = np.nan
        if y_prob is not None:
            fpr_priv, tpr_priv_vals, _ = roc_curve(y_test[group_priv], y_prob[group_priv])
            fpr_unpriv, tpr_unpriv_vals, _ = roc_curve(y_test[group_unpriv], y_prob[group_unpriv])
            auc_priv = auc(fpr_priv, tpr_priv_vals)
            auc_unpriv = auc(fpr_unpriv, tpr_unpriv_vals)
            abroca = abs(auc_priv - auc_unpriv)

        fairness = (3 - abs(abroca) - abs(erd) - abs(tprd)) / 3 if not np.isnan(abroca) else float("nan")

        print(f"{name:<20} {abroca:<10.4f} {erd:<10.4f} {tprd:<10.4f} {fairness:<10.4f}")

# === 0. No Mitigation (Original) ===
models_abs = train_models(X_train, y_train)
evaluate_fairness(models_abs, X_test, y_test, race_test, "No Mitigation")

# === 1. Suppression (Drop 'race') ===
X_train_sup = X_train.drop(columns=["race"])
X_test_sup = X_test.drop(columns=["race"])
models_sup = train_models(X_train_sup, y_train)
evaluate_fairness(models_sup, X_test_sup, y_test, race_test, "Suppression")

# === 2. Correlation Remover ===
cr = CorrelationRemover(sensitive_feature_ids=["race"])
cr.fit(X_train)
X_train_cr = cr.transform(X_train)
X_test_cr = cr.transform(X_test)
models_cr = train_models(X_train_cr, y_train)
evaluate_fairness(models_cr, X_test_cr, y_test, race_test, "Correlation Remover")

# === 3. Disparate Impact Remover ===
df_train_dir = pd.concat([X_train, y_train.rename("bar_passed")], axis=1)
dataset_dir = StandardDataset(df_train_dir,
                               label_name="bar_passed",
                               favorable_classes=[1],
                               protected_attribute_names=["race"],
                               privileged_classes=[[7]])
dir = DisparateImpactRemover(repair_level=1.0)
dataset_dir_transformed = dir.fit_transform(dataset_dir)
X_train_dir = pd.DataFrame(dataset_dir_transformed.features, columns=X_train.columns)
models_dir = train_models(X_train_dir, y_train)
evaluate_fairness(models_dir, X_test, y_test, race_test, "Disparate Impact Remover")

# === 4. Reweighing ===
dataset_rw = StandardDataset(df_train_dir,
                              label_name="bar_passed",
                              favorable_classes=[1],
                              protected_attribute_names=["race"],
                              privileged_classes=[[7]])
rw = Reweighing(privileged_groups=[{"race": 7}],
                unprivileged_groups=[{"race": i} for i in range(1, 9) if i != 7])
dataset_rw_transformed = rw.fit_transform(dataset_rw)
X_train_rw = pd.DataFrame(dataset_rw_transformed.features, columns=X_train.columns)
y_train_rw = dataset_rw_transformed.labels.ravel()
models_rw = train_models(X_train_rw, y_train_rw, sample_weight=dataset_rw_transformed.instance_weights)
evaluate_fairness(models_rw, X_test, y_test, race_test, "Reweighing")



Fairness Metrics: No Mitigation
--------------------------------------------------------------------------
Model                ABROCA     ERD        TPRD       Fairness  
--------------------------------------------------------------------------
Decision Tree        0.0790     0.0791     -0.0896    0.9175    
Logistic Regression  0.0152     0.0550     -0.0858    0.9480    
Random Forest        0.0187     0.0569     -0.0647    0.9532    
SVM                  0.0230     0.0316     -0.0970    0.9495    
XGBoost              0.0203     0.0497     -0.0647    0.9551    

Fairness Metrics: Suppression
--------------------------------------------------------------------------
Model                ABROCA     ERD        TPRD       Fairness  
--------------------------------------------------------------------------
Decision Tree        0.0332     0.0350     -0.0175    0.9714    
Logistic Regression  0.0150     0.0422     -0.0933    0.9499    
Random Forest        0.0173     0.0071     -0.0535 

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, roc_curve, auc
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from fairlearn.preprocessing import CorrelationRemover
from aif360.algorithms.preprocessing import DisparateImpactRemover, Reweighing
from aif360.datasets import StandardDataset

# === Load dataset ===
df = pd.read_csv("synthetic_law_data_decaf.csv")
df["bar_passed"] = df["bar_passed"].astype(int)

# === Train-test split ===
X = df.drop(columns=["bar_passed"])
y = df["bar_passed"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
y_test = y_test.reset_index(drop=True)
race_test = X_test["race"].reset_index(drop=True)

# === Model training function ===
def train_models(X_train, y_train, sample_weight=None):
    models = {
        "Decision Tree": DecisionTreeClassifier(random_state=42),
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
        "SVM": SVC(probability=True),
        "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
    }
    for name, model in models.items():
        if sample_weight is not None:
            model.fit(X_train, y_train, sample_weight=sample_weight)
        else:
            model.fit(X_train, y_train)
    return models

# === Fairness metric evaluation ===
def evaluate_fairness(models, X_test, y_test, race_test, title):
    print(f"\nFairness Metrics: {title}")
    print("--------------------------------------------------------------------------")
    print(f"{'Model':<20} {'ABROCA':<10} {'ERD':<10} {'TPRD':<10} {'Fairness':<10}")
    print("--------------------------------------------------------------------------")

    for name, model in models.items():
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        group_priv = race_test == 7
        group_unpriv = race_test != 7

        err_priv = np.mean(y_pred[group_priv] != y_test[group_priv])
        err_unpriv = np.mean(y_pred[group_unpriv] != y_test[group_unpriv])
        erd = err_unpriv - err_priv

        def tpr(y_true, y_pred):
            tp = np.sum((y_true == 1) & (y_pred == 1))
            fn = np.sum((y_true == 1) & (y_pred == 0))
            return tp / (tp + fn) if (tp + fn) > 0 else 0

        tpr_priv = tpr(y_test[group_priv], y_pred[group_priv])
        tpr_unpriv = tpr(y_test[group_unpriv], y_pred[group_unpriv])
        tprd = tpr_unpriv - tpr_priv

        abroca = np.nan
        if y_prob is not None:
            fpr_priv, tpr_priv_vals, _ = roc_curve(y_test[group_priv], y_prob[group_priv])
            fpr_unpriv, tpr_unpriv_vals, _ = roc_curve(y_test[group_unpriv], y_prob[group_unpriv])
            auc_priv = auc(fpr_priv, tpr_priv_vals)
            auc_unpriv = auc(fpr_unpriv, tpr_unpriv_vals)
            abroca = abs(auc_priv - auc_unpriv)

        fairness = (3 - abs(abroca) - abs(erd) - abs(tprd)) / 3 if not np.isnan(abroca) else float("nan")

        print(f"{name:<20} {abroca:<10.4f} {erd:<10.4f} {tprd:<10.4f} {fairness:<10.4f}")

# === 0. No Mitigation (Original) ===
models_abs = train_models(X_train, y_train)
evaluate_fairness(models_abs, X_test, y_test, race_test, "No Mitigation")

# === 1. Suppression (Drop 'race') ===
X_train_sup = X_train.drop(columns=["race"])
X_test_sup = X_test.drop(columns=["race"])
models_sup = train_models(X_train_sup, y_train)
evaluate_fairness(models_sup, X_test_sup, y_test, race_test, "Suppression")

# === 2. Correlation Remover ===
cr = CorrelationRemover(sensitive_feature_ids=["race"])
cr.fit(X_train)
X_train_cr = cr.transform(X_train)
X_test_cr = cr.transform(X_test)
models_cr = train_models(X_train_cr, y_train)
evaluate_fairness(models_cr, X_test_cr, y_test, race_test, "Correlation Remover")

# === 3. Disparate Impact Remover ===
df_train_dir = pd.concat([X_train, y_train.rename("bar_passed")], axis=1)
dataset_dir = StandardDataset(df_train_dir,
                               label_name="bar_passed",
                               favorable_classes=[1],
                               protected_attribute_names=["race"],
                               privileged_classes=[[7]])
dir = DisparateImpactRemover(repair_level=1.0)
dataset_dir_transformed = dir.fit_transform(dataset_dir)
X_train_dir = pd.DataFrame(dataset_dir_transformed.features, columns=X_train.columns)
models_dir = train_models(X_train_dir, y_train)
evaluate_fairness(models_dir, X_test, y_test, race_test, "Disparate Impact Remover")

# === 4. Reweighing ===
dataset_rw = StandardDataset(df_train_dir,
                              label_name="bar_passed",
                              favorable_classes=[1],
                              protected_attribute_names=["race"],
                              privileged_classes=[[7]])
rw = Reweighing(privileged_groups=[{"race": 7}],
                unprivileged_groups=[{"race": i} for i in range(1, 9) if i != 7])
dataset_rw_transformed = rw.fit_transform(dataset_rw)
X_train_rw = pd.DataFrame(dataset_rw_transformed.features, columns=X_train.columns)
y_train_rw = dataset_rw_transformed.labels.ravel()
models_rw = train_models(X_train_rw, y_train_rw, sample_weight=dataset_rw_transformed.instance_weights)
evaluate_fairness(models_rw, X_test, y_test, race_test, "Reweighing")



Fairness Metrics: No Mitigation
--------------------------------------------------------------------------
Model                ABROCA     ERD        TPRD       Fairness  
--------------------------------------------------------------------------
Decision Tree        nan        -0.0264    -0.0114    nan       
Logistic Regression  nan        -0.0398    0.0000     nan       
Random Forest        nan        -0.0398    0.0000     nan       
SVM                  nan        -0.0398    0.0000     nan       
XGBoost              nan        -0.0398    0.0000     nan       





Fairness Metrics: Suppression
--------------------------------------------------------------------------
Model                ABROCA     ERD        TPRD       Fairness  
--------------------------------------------------------------------------
Decision Tree        nan        -0.0797    0.0415     nan       
Logistic Regression  nan        -0.0398    0.0000     nan       
Random Forest        nan        -0.0398    0.0000     nan       
SVM                  nan        -0.0398    0.0000     nan       
XGBoost              nan        -0.0398    0.0000     nan       





Fairness Metrics: Correlation Remover
--------------------------------------------------------------------------
Model                ABROCA     ERD        TPRD       Fairness  
--------------------------------------------------------------------------
Decision Tree        nan        0.0303     -0.0688    nan       
Logistic Regression  nan        -0.0398    0.0000     nan       
Random Forest        nan        -0.0398    0.0000     nan       
SVM                  nan        -0.0398    0.0000     nan       
XGBoost              nan        -0.0438    0.0041     nan       





Fairness Metrics: Disparate Impact Remover
--------------------------------------------------------------------------
Model                ABROCA     ERD        TPRD       Fairness  
--------------------------------------------------------------------------
Decision Tree        nan        -0.0508    0.0131     nan       
Logistic Regression  nan        -0.0398    0.0000     nan       
Random Forest        nan        -0.0398    0.0000     nan       
SVM                  nan        -0.0398    0.0000     nan       
XGBoost              nan        -0.0314    -0.0080    nan       





Fairness Metrics: Reweighing
--------------------------------------------------------------------------
Model                ABROCA     ERD        TPRD       Fairness  
--------------------------------------------------------------------------
Decision Tree        nan        -0.0185    -0.0197    nan       
Logistic Regression  nan        -0.0398    0.0000     nan       
Random Forest        nan        -0.0398    0.0000     nan       
SVM                  nan        -0.0398    0.0000     nan       
XGBoost              nan        -0.0398    0.0000     nan       




In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, roc_curve, auc
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from fairlearn.preprocessing import CorrelationRemover
from aif360.algorithms.preprocessing import DisparateImpactRemover, Reweighing
from aif360.datasets import StandardDataset

# === Load datasets ===
df_orig = pd.read_csv("bar_pass_prediction (processed version).csv")
df_synth = pd.read_csv("synthetic_law_data_decaf.csv")
df_orig["bar_passed"] = df_orig["bar_passed"].astype(int)
df_synth["bar_passed"] = df_synth["bar_passed"].astype(int)

# === Split datasets ===
X_orig = df_orig.drop(columns=["bar_passed"])
y_orig = df_orig["bar_passed"]
X_synth = df_synth.drop(columns=["bar_passed"])
y_synth = df_synth["bar_passed"]

X_orig_train, X_orig_test, y_orig_train, y_orig_test = train_test_split(X_orig, y_orig, test_size=0.3, random_state=42)
X_synth_train, X_synth_test, y_synth_train, y_synth_test = train_test_split(X_synth, y_synth, test_size=0.000000001, random_state=42)

X_train_combined = pd.concat([X_orig_train, X_synth_train], axis=0).reset_index(drop=True)
y_train_combined = pd.concat([y_orig_train, y_synth_train], axis=0).reset_index(drop=True)
X_test_combined = pd.concat([X_orig_test, X_synth_test], axis=0).reset_index(drop=True)
y_test_combined = pd.concat([y_orig_test, y_synth_test], axis=0).reset_index(drop=True)
race_test = pd.concat([X_orig_test["race"], X_synth_test["race"]], axis=0).reset_index(drop=True)

# === Model training ===
def train_models(X_train, y_train, sample_weight=None):
    models = {
        "Decision Tree": DecisionTreeClassifier(random_state=42),
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
        "SVM": SVC(probability=True),
        "XGBoost": XGBClassifier(eval_metric="logloss", random_state=42)
    }
    for name, model in models.items():
        if sample_weight is not None:
            model.fit(X_train, y_train, sample_weight=sample_weight)
        else:
            model.fit(X_train, y_train)
    return models

# === Fairness evaluation ===
def evaluate_fairness(models, X_test, y_test, race_test, title):
    print(f"\nFairness Metrics: {title}")
    print("--------------------------------------------------------------------------")
    print(f"{'Model':<20} {'ABROCA':<10} {'ERD':<10} {'TPRD':<10} {'Fairness':<10}")
    print("--------------------------------------------------------------------------")
    for name, model in models.items():
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        group_priv = race_test == 7
        group_unpriv = race_test != 7

        err_priv = np.mean(y_pred[group_priv] != y_test[group_priv])
        err_unpriv = np.mean(y_pred[group_unpriv] != y_test[group_unpriv])
        erd = err_unpriv - err_priv

        def tpr(y_true, y_pred):
            tp = np.sum((y_true == 1) & (y_pred == 1))
            fn = np.sum((y_true == 1) & (y_pred == 0))
            return tp / (tp + fn) if (tp + fn) > 0 else 0

        tpr_priv = tpr(y_test[group_priv], y_pred[group_priv])
        tpr_unpriv = tpr(y_test[group_unpriv], y_pred[group_unpriv])
        tprd = tpr_unpriv - tpr_priv

        abroca = np.nan
        if y_prob is not None:
            fpr_priv, tpr_priv_vals, _ = roc_curve(y_test[group_priv], y_prob[group_priv])
            fpr_unpriv, tpr_unpriv_vals, _ = roc_curve(y_test[group_unpriv], y_prob[group_unpriv])
            auc_priv = auc(fpr_priv, tpr_priv_vals)
            auc_unpriv = auc(fpr_unpriv, tpr_unpriv_vals)
            abroca = abs(auc_priv - auc_unpriv)

        fairness = (3 - abs(abroca) - abs(erd) - abs(tprd)) / 3 if not np.isnan(abroca) else float("nan")

        print(f"{name:<20} {abroca:<10.4f} {erd:<10.4f} {tprd:<10.4f} {fairness:<10.4f}")

# === 0. No mitigation ===
models_abs = train_models(X_train_combined, y_train_combined)
evaluate_fairness(models_abs, X_test_combined, y_test_combined, race_test, "Absolute (No Mitigation)")

# === 1. Suppression ===
X_sup = X_train_combined.drop(columns=["race"])
X_test_sup = X_test_combined.drop(columns=["race"])
models_sup = train_models(X_sup, y_train_combined)
evaluate_fairness(models_sup, X_test_sup, y_test_combined, race_test, "Suppression")

# === 2. Correlation Remover (with fallback for older versions) ===
cr = CorrelationRemover(sensitive_feature_ids=["race"])
cr.fit(X_train_combined)
X_cor_train = cr.transform(X_train_combined)
X_cor_test = cr.transform(X_test_combined)
models_cor = train_models(X_cor_train, y_train_combined)
evaluate_fairness(models_cor, X_cor_test, y_test_combined, race_test, "Correlation Remover")

# === 3. Disparate Impact Remover ===
df_train_dir = pd.concat([X_train_combined, y_train_combined.rename("bar_passed")], axis=1)
dataset_dir = StandardDataset(
    df_train_dir,
    label_name="bar_passed",
    favorable_classes=[1],
    protected_attribute_names=["race"],
    privileged_classes=[[7]]
)
dir = DisparateImpactRemover(repair_level=1.0)
dataset_dir_transformed = dir.fit_transform(dataset_dir)
X_dir = pd.DataFrame(dataset_dir_transformed.features, columns=X_train_combined.columns)
models_dir = train_models(X_dir, y_train_combined)
evaluate_fairness(models_dir, X_test_combined, y_test_combined, race_test, "Disparate Impact Remover")

# === 4. Reweighing ===
df_train_rw = pd.concat([X_train_combined, y_train_combined.rename("bar_passed")], axis=1)
dataset_rw = StandardDataset(
    df_train_rw,
    label_name="bar_passed",
    favorable_classes=[1],
    protected_attribute_names=["race"],
    privileged_classes=[[7]]
)
rw = Reweighing(
    privileged_groups=[{"race": 7}],
    unprivileged_groups=[{"race": i} for i in range(1, 9) if i != 7]
)
dataset_rw_transformed = rw.fit_transform(dataset_rw)
X_rw = pd.DataFrame(dataset_rw_transformed.features, columns=X_train_combined.columns)
y_rw = dataset_rw_transformed.labels.ravel()
models_rw = train_models(X_rw, y_rw, sample_weight=dataset_rw_transformed.instance_weights)
evaluate_fairness(models_rw, X_test_combined, y_test_combined, race_test, "Reweighing")
