In [None]:
# Jupyter notebook cell content for COMPAS fairness audit and mitigation
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from aif360.datasets import CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.algorithms.preprocessing import Reweighing
from sklearn.model_selection import train_test_split

dataset = CompasDataset()
dataset = dataset.dropna()

#  Train/Test split 
train, test = dataset.split([0.7], shuffle=True)

# Build baseline model pipeline 
X_train = train.features
y_train = train.labels.ravel()
X_test = test.features
y_test = test.labels.ravel()

pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(solver='liblinear'))
])

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

#  Evaluate fairness (SPD, EOD)
test_pred = test.copy()
test_pred.labels = y_pred.reshape(-1, 1)

metric = ClassificationMetric(
    test, test_pred,
    unprivileged_groups=[{"race": 1}],
    privileged_groups=[{"race": 0}]
)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Statistical Parity Difference:", metric.statistical_parity_difference())
print("Equalized Odds Difference:", metric.equal_opportunity_difference())

# Mitigation using Reweighing 
RW = Reweighing(unprivileged_groups=[{"race": 1}], privileged_groups=[{"race": 0}])
RW.fit(train)
train_transf = RW.transform(train)

X_train_rw = train_transf.features
y_train_rw = train_transf.labels.ravel()
sample_weights = train_transf.instance_weights

pipeline.fit(X_train_rw, y_train_rw, clf__sample_weight=sample_weights)
y_pred_rw = pipeline.predict(X_test)

# Post-mitigation metrics 
test_pred_rw = test.copy()
test_pred_rw.labels = y_pred_rw.reshape(-1, 1)

metric_rw = ClassificationMetric(
    test, test_pred_rw,
    unprivileged_groups=[{"race": 1}],
    privileged_groups=[{"race": 0}]
)

print("Accuracy (Reweighed):", accuracy_score(y_test, y_pred_rw))
print("SPD (Reweighed):", metric_rw.statistical_parity_difference())
print("EOD (Reweighed):", metric_rw.equal_opportunity_difference())