In [17]:
!pip install aif360 pandas matplotlib scikit-learn

# Step 2: Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from aif360.sklearn.datasets import fetch_compas
from aif360.metrics import ClassificationMetric
from aif360.algorithms.preprocessing import Reweighing
from aif360.sklearn.preprocessing import Reweighing as RW_sklearn

# Step 3: Load COMPAS dataset using fetch_compas()
data = fetch_compas(binary_race=True)

# Optional: view first few rows
df = pd.concat([data.X, data.y], axis=1)
print(df.head())

# Step 4: Split data into train/test
X = data.X
y = data.y
race = data.X['race'].values  # FIXED

X_train, X_test, y_train, y_test, race_train, race_test = train_test_split(
    X, y, race, test_size=0.3, random_state=42
)

# Step 5: Apply reweighing to address bias
train_df = pd.DataFrame(X_train, columns=X.columns)  # FIXED
train_df['y'] = y_train
train_df['race'] = race_train

rew = RW_sklearn(prot_attr='race', priv_group=1, unpriv_group=0)
X_train_rw, y_train_rw, w_train = rew.fit_transform(train_df[X.columns],
                                                   train_df['y'])

# Step 6: Scale features and train model
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_rw)
X_test_scaled = scaler.transform(X_test)

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train_scaled, y_train_rw, sample_weight=w_train)

# Step 7: Predict on test set
y_pred = clf.predict(X_test_scaled)

# Step 8: Fairness evaluation
test_df = pd.DataFrame(X_test, columns=X.columns)
test_df['y_true'] = y_test
test_df['y_pred'] = y_pred
test_df['race'] = race_test

from aif360.datasets import BinaryLabelDataset

test_data = BinaryLabelDataset(df=test_df,
                               label_names=['y_true'],
                               protected_attribute_names=['race'])

pred_data = test_data.copy()
pred_data.labels = test_df['y_pred'].values.reshape(-1,1)

metric = ClassificationMetric(test_data, pred_data,
                              unprivileged_groups=[{'race': 0}],
                              privileged_groups=[{'race': 1}])

print("False Positive Rate Difference:", metric.false_positive_rate_difference())
print("Equal Opportunity Difference:", metric.equal_opportunity_difference())

# Step 9: Visualize FPR
fpr_priv = metric.false_positive_rate(privileged=True)
fpr_unpriv = metric.false_positive_rate(privileged=False)

plt.bar(['Privileged (White)', 'Unprivileged (Black)'],
        [fpr_priv, fpr_unpriv],
        color=['green', 'red'])
plt.ylabel('False Positive Rate')
plt.title('FPR by Race Group')
plt.show()


                            sex  age       age_cat              race  \
sex    race                                                            
Male   African-American    Male   34       25 - 45  African-American   
       African-American    Male   24  Less than 25  African-American   
       Caucasian           Male   41       25 - 45         Caucasian   
Female Caucasian         Female   39       25 - 45         Caucasian   
Male   Caucasian           Male   27       25 - 45         Caucasian   

                         juv_fel_count  juv_misd_count  juv_other_count  \
sex    race                                                               
Male   African-American              0               0                0   
       African-American              0               0                1   
       Caucasian                     0               0                0   
Female Caucasian                     0               0                0   
Male   Caucasian                     0       

TypeError: Reweighing.__init__() got an unexpected keyword argument 'priv_group'