## Bias Mitigation using AIF360 - Heart Failure Prediction Dataset (Source: https://www.kaggle.com/datasets/fedesoriano/heart-failure-prediction/data)
Model Training and Evaluation

In [1]:
#load preprocessed data 
import pandas as pd
train_df = pd.read_csv("./data_subsets/train_75M_25F.csv")

X_test = pd.read_csv("./data_splits/X_test.csv")
y_test = pd.read_csv("./data_splits/y_test.csv")

#check out the data
train_df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,61,1,3,146.0,241.0,0,0,148.0,1,3.0,0,1
1,52,1,1,120.0,284.0,0,0,118.0,0,0.0,2,0
2,48,0,3,150.0,227.0,0,0,130.0,1,1.0,1,0
3,49,1,3,128.0,212.0,0,0,96.0,1,0.0,1,1
4,56,1,3,120.0,236.0,0,1,148.0,0,0.0,1,1


In [2]:
TARGET = "HeartDisease"
SENSITIVE = "Sex"   # 1 = Male, 0 = Female

categorical_cols = ['Sex','ChestPainType','FastingBS','RestingECG','ExerciseAngina','ST_Slope']
continuous_cols  = ['Age','RestingBP','Cholesterol','MaxHR','Oldpeak']

In [3]:
# Split train into X / y and keep sensitive feature for fairness evaluation
X_train = train_df.drop(columns=[TARGET])
y_train = train_df[TARGET]

In [4]:
# scale numeric features only, fit on train, transform test
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_num_scaled = pd.DataFrame(
    scaler.fit_transform(X_train[continuous_cols]),
    columns=continuous_cols, index=X_train.index
)
X_test_num_scaled = pd.DataFrame(
    scaler.transform(X_test[continuous_cols]),
    columns=continuous_cols, index=X_test.index
)

In [5]:
#one-hot encode categoricals; numeric are kept as is 
from sklearn.preprocessing import OneHotEncoder

ohe = OneHotEncoder(handle_unknown="ignore", drop="if_binary", sparse_output=False)
ohe.fit(X_train[categorical_cols])

X_train_cat = pd.DataFrame(
    ohe.transform(X_train[categorical_cols]),
    columns=ohe.get_feature_names_out(categorical_cols),
    index=X_train.index
)
X_test_cat = pd.DataFrame(
    ohe.transform(X_test[categorical_cols]),
    columns=ohe.get_feature_names_out(categorical_cols),
    index=X_test.index
)

In [6]:
# Assemble final matrices
X_train_ready = pd.concat([X_train_cat, X_train_num_scaled], axis=1)
X_test_ready  = pd.concat([X_test_cat,  X_test_num_scaled],  axis=1)

print("Final feature shapes:", X_train_ready.shape, X_test_ready.shape)

Final feature shapes: (600, 18) (184, 18)


In [7]:
# create sensitive attribute arrays - after creating X_train_ready and X_test_ready
A_train = X_train["Sex"].astype(int).to_numpy().ravel()  # 1=Male, 0=Female
A_test  = X_test["Sex"].astype(int).to_numpy().ravel()

In [8]:
# setup for AIF360
import numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, confusion_matrix
from sklearn.base import clone
from IPython.display import display 

from aif360.datasets import BinaryLabelDataset
from aif360.algorithms.preprocessing import Reweighing
from aif360.algorithms.postprocessing import EqOddsPostprocessing
from aif360.metrics import ClassificationMetric

# Config 
protected_attr = "Sex"  # 1=Male, 0=Female
PRIV_VALUE = 1          # privileged = Male
label_name = "label"
favorable_label, unfavorable_label = 1, 0
privileged_groups   = [{protected_attr: PRIV_VALUE}]
unprivileged_groups = [{protected_attr: 1 - PRIV_VALUE}]

# Ensure 1-D ints for targets
y_train = np.asarray(y_train).astype(int).ravel()
y_test  = np.asarray(y_test).astype(int).ravel()

# Sensitive attribute arrays
A_train = X_train["Sex"].astype(int).to_numpy().ravel()
A_test  = X_test["Sex"].astype(int).to_numpy().ravel()

def _to_bld(y, A):
    y = (y.values if hasattr(y,'values') else np.asarray(y)).ravel()
    A = (A.values if hasattr(A,'values') else np.asarray(A)).ravel()
    df = pd.DataFrame({"dummy": np.zeros(len(y)), label_name: y, protected_attr: A})
    return BinaryLabelDataset(
        df=df,
        label_names=[label_name],
        protected_attribute_names=[protected_attr],
        favorable_label=favorable_label,
        unfavorable_label=unfavorable_label
    )

def fair_metrics(y_true, y_pred, A, y_scores=None, absolute=True):
    """AIF360-based DP and EO (equal opportunity) differences."""
    t = _to_bld(y_true, A)
    p = _to_bld(y_pred, A)
    if y_scores is not None:
        p.scores = np.asarray(y_scores).reshape(-1, 1)
    cm = ClassificationMetric(
        t, p,
        privileged_groups=privileged_groups,
        unprivileged_groups=unprivileged_groups
    )
    dp = cm.statistical_parity_difference()
    eo = cm.equal_opportunity_difference()
    return (abs(dp), abs(eo)) if absolute else (dp, eo)

def get_scores(model, X):
    if hasattr(model, "predict_proba"):
        return model.predict_proba(X)[:, 1]
    if hasattr(model, "decision_function"):
        z = model.decision_function(X)
        return (z - z.min()) / (z.max() - z.min() + 1e-12)
    return model.predict(X).astype(float)

def selection_rate(y_pred, positive=1):
    y_pred = np.asarray(y_pred).ravel()
    return np.mean(y_pred == positive)

def per_group_table(y_true, y_pred, A, positive=1, group_name="Sex"):
    """Keeps your existing API (positive=...), uses sklearn metrics."""
    y_true = np.asarray(y_true).ravel()
    y_pred = np.asarray(y_pred).ravel()
    A = np.asarray(A).ravel()
    rows = []
    for g in np.unique(A):
        idx = (A == g)
        yt, yp = y_true[idx], y_pred[idx]
        tn, fp, fn, tp = confusion_matrix(yt, yp, labels=[0, 1]).ravel()
        tpr = tp / (tp + fn) if (tp + fn) else 0.0
        fpr = fp / (fp + tn) if (fp + tn) else 0.0
        rec = recall_score(yt, yp, pos_label=positive)   # equals TPR for binary
        sr  = selection_rate(yp, positive=positive)
        acc = accuracy_score(yt, yp)
        rows.append({group_name: g, "TPR": tpr, "FPR": fpr,
                     "Recall": rec, "SelectionRate": sr, "Accuracy": acc})
    return pd.DataFrame(rows).set_index(group_name)

def aif_diffs(y_true, y_pred, A, *, abs_vals=True):
    """Alternative disparities (AIF360): DP and average odds difference."""
    t = _to_bld(y_true, A)
    p = _to_bld(y_pred, A)
    cm = ClassificationMetric(
        t, p,
        privileged_groups=privileged_groups,
        unprivileged_groups=unprivileged_groups
    )
    dp = cm.statistical_parity_difference()
    eo = cm.average_odds_difference()   # avg of TPR/FPR diffs
    if abs_vals:
        dp, eo = abs(dp), abs(eo)
    return dp, eo

def print_row(title, acc, dp, eo, note=""):
    print(f"{title:>24s} | Acc {acc:.4f} | DP {dp:.4f} | EO {eo:.4f} {('|' if note else '')} {note}")

# to print a model cleanly (fixed call sites)
def report_model(name, y_true, y_pred, A, scores=None, note=""):
    acc = accuracy_score(y_true, y_pred)
    dp, eo = fair_metrics(y_true, y_pred, A, y_scores=scores, absolute=True)  # no pos_label here
    tbl = per_group_table(y_true, y_pred, A, positive=favorable_label, group_name="Sex").round(6)
    
    print(f"\n=== {name} ===")
    display(tbl)
    print(f"Overall -> Accuracy: {acc:.4f} | DP diff: {dp:.4f} | EO diff: {eo:.4f}"
          + (f" | {note}" if note else ""))
    
    return {"Model": name, "Accuracy": acc, "DP diff": dp, "EO diff": eo}

# Pre: compute reweighing weights ONCE on TRAIN
_bld_train = BinaryLabelDataset(
    df=pd.concat([pd.DataFrame(X_train_ready),
                  pd.Series(y_train, name=label_name),
                  pd.Series(A_train, name=protected_attr)], axis=1),
    label_names=[label_name],
    protected_attribute_names=[protected_attr],
    favorable_label=favorable_label,
    unfavorable_label=unfavorable_label
)
_rw = Reweighing(unprivileged_groups=unprivileged_groups,
                 privileged_groups=privileged_groups).fit(_bld_train)
_rw_weights = _rw.transform(_bld_train).instance_weights.ravel()

# Turn weights into a resampled training set
def resample_by_weights(X, y, A, weights, n_samples=None, random_state=42):
    rng = np.random.default_rng(random_state)
    Xn = np.asarray(X); yn = np.asarray(y).ravel(); An = np.asarray(A).ravel()
    w = np.clip(np.asarray(weights, dtype=float), 1e-12, None)
    p = w / w.sum()
    n = n_samples or len(yn)
    idx = rng.choice(len(yn), size=n, replace=True, p=p)
    return Xn[idx], yn[idx], An[idx]

Xrw, yrw, Arw = resample_by_weights(
    X_train_ready, y_train, A_train, _rw_weights,
    n_samples=len(y_train), random_state=42
)

# Post: make a small TRAIN-based calibration split (no test leakage)
trn_X, cal_X, trn_y, cal_y, trn_A, cal_A = train_test_split(
    X_train_ready, y_train, A_train, test_size=0.12, stratify=y_train, random_state=42
)

# Make types consistent to avoid the PCA warning 
X_test_np = np.asarray(X_test_ready)
trn_X_np  = np.asarray(trn_X)
cal_X_np  = np.asarray(cal_X)

  vect_normalized_discounted_cumulative_gain = vmap(
  monte_carlo_vect_ndcg = vmap(vect_normalized_discounted_cumulative_gain, in_dims=(0,))


In [9]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")

### Traditional ML Models - Baseline: K-Nearest Neighbors (KNN) & Decision Tree (DT)

In [10]:
#import required libraries
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    classification_report, confusion_matrix
)

#define a function 
def evaluate_model(y_true, y_pred, model_name):
    print(f"=== {model_name} Evaluation ===")
    print("Accuracy :", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='binary'))
    print("Recall   :", recall_score(y_true, y_pred, average='binary'))
    print("F1 Score :", f1_score(y_true, y_pred, average='binary'))
    print("\nClassification Report:\n", classification_report(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("\n" + "="*40 + "\n")

### PCA-KNN

In [11]:
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    classification_report, confusion_matrix
)
import numpy as np

#1) PCA + KNN pipeline (on one-hot encoded + scaled features)
pca_knn = Pipeline([
    ('pca', PCA(n_components=0.95, random_state=42)),  # keep 95% variance
    ('knn', KNeighborsClassifier(
        n_neighbors=15, metric='manhattan', weights='distance'
    ))
])

pca_knn.fit(X_train_ready, y_train)

# Inspect PCA details
n_comp = pca_knn.named_steps['pca'].n_components_
expl_var = pca_knn.named_steps['pca'].explained_variance_ratio_.sum()
print(f"PCA components: {n_comp} | Explained variance retained: {expl_var:.3f}")

# 2) Evaluate 
y_pred_pca_knn = pca_knn.predict(X_test_ready)
probs_pca_knn = pca_knn.predict_proba(X_test_ready)[:, 1]
  
evaluate_model(y_test, y_pred_pca_knn, "KNN (best params)")

PCA components: 12 | Explained variance retained: 0.967
=== KNN (best params) Evaluation ===
Accuracy : 0.8858695652173914
Precision: 0.9090909090909091
Recall   : 0.8823529411764706
F1 Score : 0.8955223880597015

Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.89      0.87        82
           1       0.91      0.88      0.90       102

    accuracy                           0.89       184
   macro avg       0.88      0.89      0.88       184
weighted avg       0.89      0.89      0.89       184

Confusion Matrix:
 [[73  9]
 [12 90]]




### Tuned Decision Tree (DT)

In [12]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import (
    classification_report, confusion_matrix, accuracy_score,
    precision_score, recall_score, f1_score
)

# 1) Base model
dt = DecisionTreeClassifier(random_state=42)

# 2) Hyperparameter grid 
param_grid = {
    "criterion": ["gini", "entropy"],
    "max_depth": [3, 5, 7, 9, None],
    "min_samples_split": [2, 5, 10, 20],
    "min_samples_leaf": [1, 2, 4, 6, 10],
}

# 3) Cross-validation setup
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 4) Grid search 
grid_dt = GridSearchCV(
    estimator=dt,
    param_grid=param_grid,
    cv=cv,
    scoring="f1",      
    n_jobs=-1,
    verbose=0
)

grid_dt.fit(X_train_ready, y_train)

print("Best Decision Tree params:", grid_dt.best_params_)
print("Best CV F1:", grid_dt.best_score_)

# 5) Train & evaluate best DT
tuned_dt = grid_dt.best_estimator_
y_pred_dt_best = tuned_dt.predict(X_test_ready)
y_prob_dt_best = tuned_dt.predict_proba(X_test_ready)[:, 1] 

evaluate_model(y_test, y_pred_dt_best, "Tuned Decision Tree (best params)")

Best Decision Tree params: {'criterion': 'entropy', 'max_depth': 9, 'min_samples_leaf': 2, 'min_samples_split': 2}
Best CV F1: 0.8593494246061409
=== Tuned Decision Tree (best params) Evaluation ===
Accuracy : 0.8097826086956522
Precision: 0.819047619047619
Recall   : 0.8431372549019608
F1 Score : 0.8309178743961353

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.77      0.78        82
           1       0.82      0.84      0.83       102

    accuracy                           0.81       184
   macro avg       0.81      0.81      0.81       184
weighted avg       0.81      0.81      0.81       184

Confusion Matrix:
 [[63 19]
 [16 86]]




### Ensemble Model - Random Forest (RF)

In [13]:
from sklearn.ensemble import RandomForestClassifier

# Initialize Random Forest
rf = RandomForestClassifier(random_state=42)

# Train the model
rf.fit(X_train_ready, y_train)

# Predict on test set
y_pred_rf = rf.predict(X_test_ready)
y_prob_rf = rf.predict_proba(X_test_ready)[:, 1]  
evaluate_model(y_test, y_pred_rf, "Random Forest")

=== Random Forest Evaluation ===
Accuracy : 0.8804347826086957
Precision: 0.8703703703703703
Recall   : 0.9215686274509803
F1 Score : 0.8952380952380953

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.83      0.86        82
           1       0.87      0.92      0.90       102

    accuracy                           0.88       184
   macro avg       0.88      0.88      0.88       184
weighted avg       0.88      0.88      0.88       184

Confusion Matrix:
 [[68 14]
 [ 8 94]]




### Deep Learning - Multi-layer Perceptron

In [14]:
#import required library 
from sklearn.neural_network import MLPClassifier

### Adam MLP + Early Stopping

In [16]:
#Adam + Early Stopping 
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix

adammlp = MLPClassifier(
    hidden_layer_sizes=(64, 32),   # slightly smaller/deeper can help
    activation='relu',
    solver='adam',
    learning_rate_init=1e-3,       # smaller step can stabilize
    alpha=1e-3,                    # L2 regularization to reduce overfitting
    batch_size=32,
    max_iter=1000,                 # increased max_iter
    early_stopping=True,           # use a validation split internally
    validation_fraction=0.15,
    n_iter_no_change=25,          
    tol=1e-4,
    random_state=42
)

adammlp.fit(X_train_ready, y_train)  
y_pred_mlp = adammlp.predict(X_test_ready)                     
y_prob_mlp = adammlp.predict_proba(X_test_ready)[:, 1]         

evaluate_model(y_test, y_pred_mlp, "(Adam + EarlyStopping)")

=== (Adam + EarlyStopping) Evaluation ===
Accuracy : 0.8586956521739131
Precision: 0.8877551020408163
Recall   : 0.8529411764705882
F1 Score : 0.87

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.87      0.85        82
           1       0.89      0.85      0.87       102

    accuracy                           0.86       184
   macro avg       0.86      0.86      0.86       184
weighted avg       0.86      0.86      0.86       184

Confusion Matrix:
 [[71 11]
 [15 87]]




### Bias Mitigation AIF360

In [17]:
# Setup: install AIF360
# Uncomment the next line if running locally for the first time
#!pip install aif360

In [18]:
import aif360
print("AIF360 version:", aif360.__version__)

AIF360 version: 0.6.1


### Bias Mitigation AIF 360 - KNN 

In [19]:
#get the Fairlearn KNN Baseline for AIF360 bias mitigation
knn_base = pca_knn

yhat_knn_base   = knn_base.predict(X_test_ready)         
scores_knn_base = get_scores(knn_base, X_test_ready)

res_knn_base = report_model("Fairlearn - KNN baseline", y_test, yhat_knn_base, A_test, scores=scores_knn_base)


#Pre (Reweighing)
knn_pre        = clone(pca_knn).fit(Xrw, yrw)
yhat_knn_pre   = knn_pre.predict(X_test_ready)
scores_knn_pre = get_scores(knn_pre, X_test_ready)
res_knn_pre    = report_model("KNN pre: Reweigh",
                              y_test, yhat_knn_pre, A_test,
                              scores=scores_knn_pre,
                              note="resampled by AIF360 weights")

#Post (Equalized Odds)
cal_scores_knn   = get_scores(knn_base, cal_X_np)  # baseline KNN on CAL
post_knn = EqOddsPostprocessing(privileged_groups=privileged_groups,
                                unprivileged_groups=unprivileged_groups)
post_knn.fit(_to_bld(cal_y, cal_A),
             _to_bld((cal_scores_knn >= 0.5).astype(int), cal_A))

pred_knn_post_bld = post_knn.predict(_to_bld((scores_knn_base >= 0.5).astype(int), A_test))
yhat_knn_post     = pred_knn_post_bld.labels.ravel().astype(int)

res_knn_post = report_model("KNN post: EqOdds",
                            y_test, yhat_knn_post, A_test,
                            scores=scores_knn_base,
                            note="calibrated on held-out TRAIN")


=== Fairlearn - KNN baseline ===


Unnamed: 0_level_0,TPR,FPR,Recall,SelectionRate,Accuracy
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0.833333,0.125,0.833333,0.236842,0.868421
1,0.885417,0.1,0.885417,0.616438,0.890411


Overall -> Accuracy: 0.8859 | DP diff: 0.3796 | EO diff: 0.0521

=== KNN pre: Reweigh ===


Unnamed: 0_level_0,TPR,FPR,Recall,SelectionRate,Accuracy
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1.0,0.09375,1.0,0.236842,0.921053
1,0.84375,0.14,0.84375,0.60274,0.849315


Overall -> Accuracy: 0.8641 | DP diff: 0.3659 | EO diff: 0.1562 | resampled by AIF360 weights

=== KNN post: EqOdds ===


Unnamed: 0_level_0,TPR,FPR,Recall,SelectionRate,Accuracy
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0.833333,0.125,0.833333,0.236842,0.868421
1,0.885417,0.1,0.885417,0.616438,0.890411


Overall -> Accuracy: 0.8859 | DP diff: 0.3796 | EO diff: 0.0521 | calibrated on held-out TRAIN


## KNN + AIF360

### Results overview
| Variant             | Accuracy | DP diff | EO diff (TPR gap) | DP+EO |
|---------------------|---------:|--------:|------------------:|------:|
| Baseline            | 0.8859   | 0.3796  | **0.0521**        | **0.4317** |
| Pre: Reweigh        | 0.8641   | **0.3659** | 0.1562         | 0.5221 |
| Post: EqualizedOdds | 0.8859   | 0.3796  | **0.0521**        | **0.4317** |

---

### Per-group behavior (Female → 0, Male → 1)

#### Baseline
- **Selection rate:** 0 **0.237**, 1 **0.616** → DP gap **0.380**.  
- **TPR (Recall):** 0 **0.833**, 1 **0.885** → recall gap **0.052** (quite small).  
- **FPR:** 0 **0.125**, 1 **0.100**.  
- **Note:** High accuracy and already good EO, but selection disparity remains.

#### Pre-processing: Reweigh
- **Selection rate:** 0 **0.237**, 1 **0.603** → DP improves slightly to **0.366**.  
- **TPR (Recall):** 0 **1.000**, 1 **0.844** → EO gap increases to **0.156**.  
- **FPR:** 0 **0.094**, 1 **0.140** (both ↑).  
- **Note:** Some DP improvement, but EO worsens and accuracy drops modestly.

#### Post-processing: Equalized Odds
- **Selection rate:** 0 **0.237**, 1 **0.616** → DP gap **0.380** (same as baseline).  
- **TPR (Recall):** 0 **0.833**, 1 **0.885** → EO gap unchanged (**0.052**).  
- **FPR:** 0 **0.125**, 1 **0.100** (same as baseline).  
- **Note:** Identical to baseline — no fairness gains.

---

### Implications
- **Most fair overall:** **Baseline** already strikes the best balance (small EO, higher accuracy).  
- **Pre: Reweigh** reduces DP a bit but worsens EO and lowers accuracy.  
- **Post: EqOdds** does not change results in this setup.  

---

In [20]:
#get the Fairlearn DT Baseline for AIF360 bias mitigation
dt_base = tuned_dt

yhat_dt_base   = dt_base.predict(X_test_ready)         
scores_dt_base = get_scores(dt_base, X_test_ready)

res_dt_base = report_model("Fairlearn - DT baseline", y_test, yhat_dt_base, A_test, scores=scores_dt_base)

# Pre (Reweighing)
dt_pre = clone(tuned_dt).fit(Xrw, yrw)
yhat_dt_pre = dt_pre.predict(X_test_np)
scores_dt_pre = get_scores(dt_pre, X_test_np)
_ = report_model("DT pre: Reweigh", y_test, yhat_dt_pre, A_test, scores=scores_dt_pre,
                 note="resampled by AIF360 weights")

# Post (Equalized Odds)
cal_scores_dt = get_scores(dt_base, cal_X_np)
post_dt = EqOddsPostprocessing(privileged_groups=privileged_groups,
                               unprivileged_groups=unprivileged_groups)
post_dt.fit(_to_bld(cal_y, cal_A),
            _to_bld((cal_scores_dt >= 0.5).astype(int), cal_A))
yhat_dt_post = post_dt.predict(_to_bld((scores_dt_base >= 0.5).astype(int), A_test)).labels.ravel().astype(int)
_ = report_model("DT post: EqOdds", y_test, yhat_dt_post, A_test, scores=scores_dt_base,
                 note="calibrated on held-out TRAIN")


=== Fairlearn - DT baseline ===


Unnamed: 0_level_0,TPR,FPR,Recall,SelectionRate,Accuracy
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0.833333,0.28125,0.833333,0.368421,0.736842
1,0.84375,0.2,0.84375,0.623288,0.828767


Overall -> Accuracy: 0.8098 | DP diff: 0.2549 | EO diff: 0.0104

=== DT pre: Reweigh ===


Unnamed: 0_level_0,TPR,FPR,Recall,SelectionRate,Accuracy
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1.0,0.25,1.0,0.368421,0.789474
1,0.802083,0.16,0.802083,0.582192,0.815068


Overall -> Accuracy: 0.8098 | DP diff: 0.2138 | EO diff: 0.1979 | resampled by AIF360 weights

=== DT post: EqOdds ===


Unnamed: 0_level_0,TPR,FPR,Recall,SelectionRate,Accuracy
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0.833333,0.375,0.833333,0.447368,0.657895
1,0.864583,0.2,0.864583,0.636986,0.842466


Overall -> Accuracy: 0.8043 | DP diff: 0.1896 | EO diff: 0.0312 | calibrated on held-out TRAIN


## DT + AIF360

### Results overview
| Variant             | Accuracy | DP diff | EO diff (TPR gap) | DP+EO |
|---------------------|---------:|--------:|------------------:|------:|
| Baseline            | 0.8098   | 0.2549  | **0.0104**        | **0.2653** |
| Pre: Reweigh        | 0.8098   | 0.2138  | 0.1979            | 0.4117 |
| Post: EqualizedOdds | 0.8043   | **0.1896** | 0.0312          | 0.2208 |

---

### Per-group reading (Female → 0, Male → 1)

#### Baseline
- **Selection rate:** 0 **0.369**, 1 **0.624** → DP gap **0.255**.  
- **TPR (Recall):** 0 **0.833**, 1 **0.844** → EO gap **0.010** (excellent parity).  
- **FPR:** 0 **0.281**, 1 **0.200** (higher for females).  
- **Note:** High balance overall; smallest EO and decent accuracy.

#### Pre-processing: Reweigh
- **Selection rate:** 0 **0.368**, 1 **0.582** → DP improves to **0.214**.  
- **TPR (Recall):** 0 **1.000**, 1 **0.802** → EO worsens to **0.198**.  
- **FPR:** 0 **0.250**, 1 **0.160**.  
- **Note:** Best DP vs. baseline, but EO gap widens significantly.

#### Post-processing: Equalized Odds
- **Selection rate:** 0 **0.447**, 1 **0.637** → DP improves further to **0.190**.  
- **TPR (Recall):** 0 **0.833**, 1 **0.865** → EO gap **0.031** (still small).  
- **FPR:** 0 **0.375** (↑), 1 **0.200** (≈ baseline).  
- **Note:** Strong DP improvement, EO still low, but accuracy slightly reduced.

---

### Implications
- **Most balanced overall:** **Post: EqOdds**, with the lowest combined gap (DP+EO ≈ 0.221).  
- **Baseline** already has excellent EO (≈0.01) and good accuracy but higher DP.  
- **Pre: Reweigh** reduces DP but significantly harms EO parity.  
- **Takeaway:** Post-processing seems the more effective fairness strategy for DT here, though baseline is also competitive thanks to its minimal EO gap.

---

In [21]:
# Random Forest (RF) with AIF360

# -get Fairlearn baseline
yhat_rf_base    = rf.predict(X_test_ready)
scores_rf_base  = get_scores(rf, X_test_ready)
res_rf_base     = report_model("Fairlearn - RF baseline", y_test, yhat_rf_base, A_test, scores=scores_rf_base)

# Pre (Reweighing via sample_weight)
rf_pre          = clone(rf).fit(X_train_ready, y_train, sample_weight=_rw_weights)
yhat_rf_pre     = rf_pre.predict(X_test_ready)
scores_rf_pre   = get_scores(rf_pre, X_test_ready)
res_rf_pre      = report_model("RF pre: Reweigh (sample_weight)",
                               y_test, yhat_rf_pre, A_test, scores=scores_rf_pre)

# Post (Equalized Odds) learned on CAL
cal_scores_rf   = get_scores(rf, cal_X_np)  # baseline rf on calibration split
post_rf = EqOddsPostprocessing(privileged_groups=privileged_groups,
                               unprivileged_groups=unprivileged_groups)
post_rf.fit(_to_bld(cal_y, cal_A),
            _to_bld((cal_scores_rf >= 0.5).astype(int), cal_A))

pred_rf_post_bld = post_rf.predict(_to_bld((scores_rf_base >= 0.5).astype(int), A_test))
yhat_rf_post     = pred_rf_post_bld.labels.ravel().astype(int)

res_rf_post = report_model("RF post: EqOdds",
                           y_test, yhat_rf_post, A_test,
                           scores=scores_rf_base,
                           note="calibrated on held-out TRAIN")


=== Fairlearn - RF baseline ===


Unnamed: 0_level_0,TPR,FPR,Recall,SelectionRate,Accuracy
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1.0,0.125,1.0,0.263158,0.894737
1,0.916667,0.2,0.916667,0.671233,0.876712


Overall -> Accuracy: 0.8804 | DP diff: 0.4081 | EO diff: 0.0833

=== RF pre: Reweigh (sample_weight) ===


Unnamed: 0_level_0,TPR,FPR,Recall,SelectionRate,Accuracy
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1.0,0.125,1.0,0.263158,0.894737
1,0.916667,0.2,0.916667,0.671233,0.876712


Overall -> Accuracy: 0.8804 | DP diff: 0.4081 | EO diff: 0.0833

=== RF post: EqOdds ===


Unnamed: 0_level_0,TPR,FPR,Recall,SelectionRate,Accuracy
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1.0,0.15625,1.0,0.289474,0.868421
1,0.916667,0.2,0.916667,0.671233,0.876712


Overall -> Accuracy: 0.8750 | DP diff: 0.3818 | EO diff: 0.0833 | calibrated on held-out TRAIN


## RF + AIF360 

### Results overview
| Variant             | Accuracy | DP diff | EO diff (TPR gap) | DP+EO |
|---------------------|---------:|--------:|------------------:|------:|
| Baseline            | 0.8804   | 0.4081  | **0.0833**        | 0.4914 |
| Pre: Reweigh        | 0.8804   | 0.4081  | **0.0833**        | 0.4914 |
| Post: EqualizedOdds | 0.8750   | **0.3818** | **0.0833**      | **0.4651** |

---

### Per-group behavior (Female → 0, Male → 1)

#### Baseline
- **Selection rate:** 0 **0.263**, 1 **0.671** → DP gap **0.408** (large).  
- **TPR (Recall):** 0 **1.000**, 1 **0.917** → EO gap **0.083** (small).  
- **FPR:** 0 **0.125**, 1 **0.200**.  
- **Note:** High accuracy, EO relatively low, but strong selection disparity.

#### Pre-processing: Reweigh
- **Selection rate:** unchanged → DP **0.408**.  
- **TPR (Recall):** unchanged (**1.000 vs 0.917**) → EO **0.083**.  
- **FPR:** unchanged.  
- **Note:** Exactly the same as baseline due to sample weighting.

#### Post-processing: Equalized Odds
- **Selection rate:** 0 **0.289**, 1 **0.671** → DP improves slightly to **0.382**.  
- **TPR (Recall):** unchanged (**1.000 vs 0.917**) → EO **0.083**.  
- **FPR:** 0 **0.156**, 1 **0.200** (female FPR ↑).  
- **Note:** Accuracy drops a bit (0.875 vs 0.880), fairness slightly improved on DP, EO unchanged.

---

### Implications
- **Most fair overall:** **Post: EqOdds** achieves the lowest combined gap (DP+EO ≈ 0.465), though at a minor accuracy cost.  
- **Baseline** already gives good EO parity but with higher DP disparity.  
- **Pre: Reweigh** has no effect in this configuration.  
- **Takeaway:** RF baseline is strong, but post-processing offers a marginal DP improvement without hurting EO.  

---

In [22]:
#get the Fairlearn MLP Baseline for AIF360 bias mitigation
mlp_base = adammlp
yhat_mlp_base   = mlp_base.predict(X_test_ready)         
scores_mlp_base = get_scores(mlp_base, X_test_ready)

res_mlp_base = report_model("Fairlearn - MLP baseline", y_test, yhat_mlp_base, A_test, scores=scores_mlp_base)

# Pre (Reweighing)
mlp_pre = clone(adammlp).fit(Xrw, yrw)
yhat_mlp_pre = mlp_pre.predict(X_test_np)
scores_mlp_pre = get_scores(mlp_pre, X_test_np)
_ = report_model("MLP pre: Reweigh", y_test, yhat_mlp_pre, A_test, scores=scores_mlp_pre,
                 note="resampled by AIF360 weights")

# Post (Equalized Odds)
cal_scores_mlp = get_scores(mlp_base, cal_X_np)
post_mlp = EqOddsPostprocessing(privileged_groups=privileged_groups,
                                unprivileged_groups=unprivileged_groups)
post_mlp.fit(_to_bld(cal_y, cal_A),
             _to_bld((cal_scores_mlp >= 0.5).astype(int), cal_A))
yhat_mlp_post = post_mlp.predict(_to_bld((scores_mlp_base >= 0.5).astype(int), A_test)).labels.ravel().astype(int)
_ = report_model("MLP post: EqOdds", y_test, yhat_mlp_post, A_test, scores=scores_mlp_base,
                 note="calibrated on held-out TRAIN")


=== Fairlearn - MLP baseline ===


Unnamed: 0_level_0,TPR,FPR,Recall,SelectionRate,Accuracy
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1.0,0.125,1.0,0.263158,0.894737
1,0.84375,0.14,0.84375,0.60274,0.849315


Overall -> Accuracy: 0.8587 | DP diff: 0.3396 | EO diff: 0.1562

=== MLP pre: Reweigh ===


Unnamed: 0_level_0,TPR,FPR,Recall,SelectionRate,Accuracy
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1.0,0.15625,1.0,0.289474,0.868421
1,0.885417,0.2,0.885417,0.650685,0.856164


Overall -> Accuracy: 0.8587 | DP diff: 0.3612 | EO diff: 0.1146 | resampled by AIF360 weights

=== MLP post: EqOdds ===


Unnamed: 0_level_0,TPR,FPR,Recall,SelectionRate,Accuracy
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1.0,0.125,1.0,0.263158,0.894737
1,0.791667,0.22,0.791667,0.59589,0.787671


Overall -> Accuracy: 0.8098 | DP diff: 0.3327 | EO diff: 0.2083 | calibrated on held-out TRAIN


## MLP + AIF360

### Results overview
| Variant             | Accuracy | DP diff | EO diff (TPR gap) | DP+EO |
|---------------------|---------:|--------:|------------------:|------:|
| Baseline            | 0.8587   | **0.3396** | 0.1562          | 0.4958 |
| Pre: Reweigh        | 0.8587   | 0.3612  | **0.1146**        | **0.4758** |
| Post: EqualizedOdds | 0.8098   | 0.3327  | 0.2083            | 0.5410 |

---

### Per-group behavior (Female → 0, Male → 1)

#### Baseline
- **Selection rate:** 0 **0.263**, 1 **0.603** → DP gap **0.340**.  
- **TPR (Recall):** 0 **1.000**, 1 **0.844** → EO gap **0.156**.  
- **FPR:** 0 **0.125**, 1 **0.140**.  
- **Note:** Highest accuracy, with moderate DP disparity and a noticeable EO gap.

#### Pre-processing: Reweigh
- **Selection rate:** 0 **0.289**, 1 **0.651** → DP worsens slightly to **0.361**.  
- **TPR (Recall):** 0 **1.000**, 1 **0.885** → **best EO** (**0.115**).  
- **FPR:** 0 **0.156**, 1 **0.200** (both ↑ compared to baseline).  
- **Note:** Improves EO but increases DP a bit; accuracy unchanged.

#### Post-processing: Equalized Odds
- **Selection rate:** 0 **0.263**, 1 **0.596** → DP **0.333** (slightly better than baseline).  
- **TPR (Recall):** 0 **1.000**, 1 **0.792** → EO worsens to **0.208**.  
- **FPR:** 0 **0.125**, 1 **0.220** (female parity maintained, male ↑ sharply).  
- **Note:** Accuracy drops most (0.810); DP improved, but EO worsened.

---

### Implications
- **Most fair overall (lowest DP+EO):** **Pre: Reweigh** (≈0.476) — best EO, but slightly worse DP.  
- **Baseline** remains strong in accuracy and a decent fairness balance.  
- **Post: EqOdds** sacrifices accuracy and EO, despite a small DP improvement.  

For MLP, **Reweighing** appears to be the most effective trade-off, prioritizing equal opportunity while keeping accuracy high.

---

First fairness mitigation: pre- and post-processing was performed on the designated best performing models (KNN, DT, RF, MLP) for CVD prediction.  In addition, these results are compared to a fairness-aware in-processing model - Adversarial Debiasing offered by AIF360.

In [23]:
#Adversarial Debiasing - In-processing by AIF360
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

try:
    import tensorflow as tf
    from aif360.algorithms.inprocessing import AdversarialDebiasing

    # TF1 graph mode - required by AIF360's implementation 
    tf.compat.v1.disable_eager_execution()
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

    # Build AIF360 datasets with FEATURES + label + sensitive attribute
    bld_tr = BinaryLabelDataset(
        df=pd.concat([
            pd.DataFrame(X_train_ready).reset_index(drop=True),
            pd.Series(y_train, name=label_name),
            pd.Series(A_train, name=protected_attr)
        ], axis=1),
        label_names=[label_name],
        protected_attribute_names=[protected_attr],
        favorable_label=favorable_label,
        unfavorable_label=unfavorable_label
    )

    bld_te = BinaryLabelDataset(
        df=pd.concat([
            pd.DataFrame(X_test_ready).reset_index(drop=True),
            pd.Series(y_test, name=label_name),
            pd.Series(A_test, name=protected_attr)
        ], axis=1),
        label_names=[label_name],
        protected_attribute_names=[protected_attr],
        favorable_label=favorable_label,
        unfavorable_label=unfavorable_label
    )

    # Train + predict inside a TF1 session
    sess = tf.compat.v1.Session()
    with sess.as_default():
        adv = AdversarialDebiasing(
            privileged_groups=privileged_groups,
            unprivileged_groups=unprivileged_groups,
            scope_name="adv_debias",
            debias=True,
            sess=sess
        )
        adv.fit(bld_tr)
        pred_te = adv.predict(bld_te)

        # Extract labels and (if available) scores
        yhat_adv = pred_te.labels.ravel().astype(int)
        scores_adv = getattr(pred_te, "scores", None)
        if scores_adv is None:
            scores_adv = yhat_adv.astype(float)

    # Clean up TF graph
    tf.compat.v1.reset_default_graph()
    sess.close()

    # Same structured output as other models
    _ = report_model(
        "ADV in-proc (AIF360)",
        y_test, yhat_adv, A_test,
        scores=scores_adv,
        note="trained on X_train_ready"
    )

except Exception as e:
    print("AdversarialDebiasing skipped:", type(e).__name__, e)



epoch 0; iter: 0; batch classifier loss: 0.735763; batch adversarial loss: 0.681147
epoch 1; iter: 0; batch classifier loss: 0.677662; batch adversarial loss: 0.682407
epoch 2; iter: 0; batch classifier loss: 0.607165; batch adversarial loss: 0.679510
epoch 3; iter: 0; batch classifier loss: 0.569618; batch adversarial loss: 0.679133
epoch 4; iter: 0; batch classifier loss: 0.505279; batch adversarial loss: 0.676523
epoch 5; iter: 0; batch classifier loss: 0.493380; batch adversarial loss: 0.680220
epoch 6; iter: 0; batch classifier loss: 0.475112; batch adversarial loss: 0.675369
epoch 7; iter: 0; batch classifier loss: 0.466881; batch adversarial loss: 0.674108
epoch 8; iter: 0; batch classifier loss: 0.373924; batch adversarial loss: 0.667348
epoch 9; iter: 0; batch classifier loss: 0.396961; batch adversarial loss: 0.669007
epoch 10; iter: 0; batch classifier loss: 0.365351; batch adversarial loss: 0.665717
epoch 11; iter: 0; batch classifier loss: 0.420213; batch adversarial los

Unnamed: 0_level_0,TPR,FPR,Recall,SelectionRate,Accuracy
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1.0,0.15625,1.0,0.289474,0.868421
1,0.822917,0.1,0.822917,0.575342,0.849315


Overall -> Accuracy: 0.8533 | DP diff: 0.2859 | EO diff: 0.1771 | trained on X_train_ready


## ADV In-processing (AIF360)

### Results overview
| Variant        | Accuracy | DP diff | EO diff (TPR gap) | DP+EO |
|----------------|---------:|--------:|------------------:|------:|
| ADV in-proc    | 0.8533   | **0.2859** | 0.1771          | 0.4630 |

---

### Per-group behavior (Female → 0, Male → 1)

#### ADV in-proc
- **Selection rate:** 0 **0.289**, 1 **0.575** → DP gap **0.286** (moderate).  
- **TPR (Recall):** 0 **1.000**, 1 **0.823** → EO gap **0.177** (females perfectly recalled, males lower).  
- **FPR:** 0 **0.156**, 1 **0.100** (female higher).  
- **Accuracy:** Female **0.868**, Male **0.849** → both solid, female slightly higher.  

---

### Implications
- **DP disparity improves** notably (0.286 vs ~0.38 in earlier ADV runs).  
- **EO gap worsens** (0.177 vs ~0.03 previously), as female recall is perfect while male recall lags.  
- **Accuracy (0.853)** is competitive, only slightly below prior ADV results.  
- **Interpretation:** This configuration balances selection rates better, but trades off by creating a larger recall disparity between sexes.

---

In [24]:
# Grid-tune AIF360 AdversarialDebiasing for better DP/EO balance and print with report_model
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import tensorflow as tf
from aif360.algorithms.inprocessing import AdversarialDebiasing

# small search over key knobs; widen if needed
ADV_GRID = dict(
    adversary_loss_weight=[0.02, 0.05, 0.1, 0.2, 0.3],
    num_epochs=[40, 60, 80],
    batch_size=[64, 128],
    classifier_num_hidden_units=[32, 64]  # size of main net
)

def run_adv(loss_w=0.1, epochs=50, bs=128, hidden=64, seed=42):
    tf.compat.v1.reset_default_graph()
    tf.compat.v1.disable_eager_execution()
    tf.compat.v1.set_random_seed(seed)
    sess = tf.compat.v1.Session()
    with sess.as_default():
        adv = AdversarialDebiasing(
            privileged_groups=privileged_groups,
            unprivileged_groups=unprivileged_groups,
            debias=True,
            scope_name=f"adv_w{loss_w}_e{epochs}_b{bs}_h{hidden}",
            adversary_loss_weight=loss_w,
            num_epochs=epochs,
            batch_size=bs,
            classifier_num_hidden_units=hidden,
            sess=sess
        )
        adv.fit(bld_tr)
        pred_te = adv.predict(bld_te)
        yhat = pred_te.labels.ravel().astype(int)
        scores = getattr(pred_te, "scores", None)
        if scores is None:
            scores = yhat.astype(float)
    sess.close()
    tf.compat.v1.reset_default_graph()
    return yhat, scores

# Build once (as you did)
bld_tr = BinaryLabelDataset(
    df=pd.concat([pd.DataFrame(X_train_ready).reset_index(drop=True),
                  pd.Series(y_train, name=label_name),
                  pd.Series(A_train, name=protected_attr)], axis=1),
    label_names=[label_name], protected_attribute_names=[protected_attr],
    favorable_label=favorable_label, unfavorable_label=unfavorable_label
)
bld_te = BinaryLabelDataset(
    df=pd.concat([pd.DataFrame(X_test_ready).reset_index(drop=True),
                  pd.Series(y_test, name=label_name),
                  pd.Series(A_test, name=protected_attr)], axis=1),
    label_names=[label_name], protected_attribute_names=[protected_attr],
    favorable_label=favorable_label, unfavorable_label=unfavorable_label
)

# Search & pick the best by minimizing (DP + EO) with an accuracy floor
best = None
acc_floor = 0.86  # keep close to your current accuracy; adjust as you like
results = []
for w in ADV_GRID["adversary_loss_weight"]:
    for e in ADV_GRID["num_epochs"]:
        for bs in ADV_GRID["batch_size"]:
            for h in ADV_GRID["classifier_num_hidden_units"]:
                yhat, scores = run_adv(w, e, bs, h)
                acc = accuracy_score(y_test, yhat)
                dp, eo = fair_metrics(y_test, yhat, A_test, scores, absolute=True)
                obj = dp + eo
                results.append((obj, acc, dp, eo, w, e, bs, h, yhat, scores))
                if (best is None or obj < best[0]) and acc >= acc_floor:
                    best = (obj, acc, dp, eo, w, e, bs, h, yhat, scores)

# Report best and (optionally) a few runners-up
if best is None:
    # fallback: take global best even if below floor
    best = sorted(results, key=lambda t: t[0])[0]

obj, acc, dp, eo, w, e, bs, h, yhat_best, scores_best = best
_ = report_model(
    f"ADV in-proc (best) w={w}, e={e}, b={bs}, h={h}",
    y_test, yhat_best, A_test, scores=scores_best,
    note=f"combined gap (DP+EO)={obj:.4f}; acc={acc:.4f}"
)

epoch 0; iter: 0; batch classifier loss: 0.807135; batch adversarial loss: 0.665331
epoch 1; iter: 0; batch classifier loss: 0.762230; batch adversarial loss: 0.647444
epoch 2; iter: 0; batch classifier loss: 0.676140; batch adversarial loss: 0.644139
epoch 3; iter: 0; batch classifier loss: 0.622245; batch adversarial loss: 0.697847
epoch 4; iter: 0; batch classifier loss: 0.550433; batch adversarial loss: 0.640313
epoch 5; iter: 0; batch classifier loss: 0.569255; batch adversarial loss: 0.632424
epoch 6; iter: 0; batch classifier loss: 0.603947; batch adversarial loss: 0.642563
epoch 7; iter: 0; batch classifier loss: 0.535557; batch adversarial loss: 0.613861
epoch 8; iter: 0; batch classifier loss: 0.468385; batch adversarial loss: 0.641418
epoch 9; iter: 0; batch classifier loss: 0.601277; batch adversarial loss: 0.613495
epoch 10; iter: 0; batch classifier loss: 0.393018; batch adversarial loss: 0.597638
epoch 11; iter: 0; batch classifier loss: 0.394670; batch adversarial loss:

Unnamed: 0_level_0,TPR,FPR,Recall,SelectionRate,Accuracy
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0.833333,0.15625,0.833333,0.263158,0.842105
1,0.864583,0.1,0.864583,0.60274,0.876712


Overall -> Accuracy: 0.8696 | DP diff: 0.3396 | EO diff: 0.0312 | combined gap (DP+EO)=0.3708; acc=0.8696


## ADV In-processing (tuned)

### Results overview
| Variant            | Accuracy | DP diff | EO diff (TPR gap) | DP+EO |
|--------------------|---------:|--------:|------------------:|------:|
| ADV in-proc (best) | 0.8696   | 0.3396  | **0.0312**        | **0.3708** |

---

### Per-group behavior (Female → 0, Male → 1)

#### ADV in-proc (best, tuned)
- **Selection rate:** 0 **0.263**, 1 **0.603** → DP gap **0.340** (moderate).  
- **TPR (Recall):** 0 **0.833**, 1 **0.865** → **near parity** (EO gap **0.031**, very small).  
- **FPR:** 0 **0.156**, 1 **0.100** (slightly higher for females).  
- **Accuracy:** Female **0.842**, Male **0.877** → both groups solid, males a bit higher.  

---

### Implications
- **EO gap is very low (0.031)**, showing the model balances recall well between groups.  
- **DP gap (0.340)** is still present but improved compared to earlier ADV results.  
- **Accuracy (0.870)** is the highest among ADV runs so far, showing good overall performance.  
- **Overall:** This tuned ADV configuration achieves the **best balance so far** — high accuracy, minimal EO gap, and moderate DP disparity.

---

# Overall Comparison of Bias Mitigation Results

## Results overview
| Model / Variant       | Accuracy | DP diff | EO diff | DP+EO | Key Point                                                      |
|------------------------|---------:|--------:|--------:|------:|----------------------------------------------------------------|
| **KNN – Baseline**     | 0.8859   | 0.3796  | **0.0521** | **0.4317** | Strong accuracy, small EO, but DP remains high.                |
| KNN – Pre: Reweigh     | 0.8641   | **0.3659** | 0.1562 | 0.5221 | Slight DP gain, EO worsens, accuracy lower.                    |
| KNN – Post: EqOdds     | 0.8859   | 0.3796  | **0.0521** | **0.4317** | Identical to baseline.                                         |
| **DT – Baseline**      | 0.8098   | 0.2549  | **0.0104** | 0.2653 | Very low EO, but DP moderate.                                  |
| DT – Pre: Reweigh      | 0.8098   | 0.2138  | 0.1979 | 0.4117 | Best DP, but EO worsens sharply.                               |
| DT – Post: EqOdds      | 0.8043   | **0.1896** | 0.0312 | **0.2208** | Best balance (lowest DP+EO), slight accuracy drop.             |
| **RF – Baseline**      | 0.8804   | 0.4081  | **0.0833** | 0.4914 | High accuracy, good EO, DP high.                              |
| RF – Pre: Reweigh      | 0.8804   | 0.4081  | **0.0833** | 0.4914 | Same as baseline.                                              |
| RF – Post: EqOdds      | 0.8750   | **0.3818** | **0.0833** | **0.4651** | Slight DP gain, accuracy small drop.                           |
| **MLP – Baseline**     | 0.8587   | **0.3396** | 0.1562 | 0.4958 | Strong accuracy, moderate DP, EO gap.                         |
| MLP – Pre: Reweigh     | 0.8587   | 0.3612  | **0.1146** | **0.4758** | Best EO, DP worsens, accuracy stable.                          |
| MLP – Post: EqOdds     | 0.8098   | 0.3327  | 0.2083 | 0.5410 | Accuracy loss, EO worsens, DP slightly better.                 |
| **ADV in-proc**        | 0.8533   | **0.2859** | 0.1771 | 0.4630 | Best DP among ADV runs, EO worsens.                            |
| **ADV in-proc (tuned)**| 0.8696   | 0.3396  | **0.0312** | **0.3708** | Best ADV overall: strong accuracy + minimal EO.                |

---

## Cross-Model Insights
- **Best DP parity:** DT Post (0.1896) → lowest selection gap, with still-low EO (0.031).  
- **Best EO parity:** DT Baseline (0.010) and ADV tuned (0.031) → almost perfect recall equality.  
- **Best combined (DP+EO):** DT Post (0.221) → best fairness overall, though accuracy is lower.  
- **Best accuracy with fairness:** KNN Baseline (0.886, EO 0.052) and ADV tuned (0.870, EO 0.031).  
- **Weakest performers:** MLP Post (accuracy down, EO up) and RF Pre (no changes vs baseline).  

---

## Takeaways
- **KNN:** Baseline already the best trade-off — small EO, high accuracy.  
- **DT:** Post-processing Equalized Odds clearly dominates, minimizing both DP and EO.  
- **RF:** Baseline is strong; mitigation adds little benefit.  
- **MLP:** Reweighing best — reduces EO while holding accuracy.  
- **ADV (tuned):** Best global balance across models — **high accuracy, excellent EO parity, and moderate DP**.  

Overall, **DT Post and ADV tuned** stand out as the strongest fairness strategies:  
- DT Post is the **fairest** (lowest DP+EO overall).  
- ADV tuned is the **most balanced** (high accuracy + very small EO gap).

---