In [None]:

# Import necessary libraries
import sys
sys.path.insert(1, "../")  

import numpy as np
np.random.seed(0)

from aif360.datasets import CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.preprocessing import Reweighing

from IPython.display import Markdown, display


In [None]:

# Load the COMPAS dataset and define protected attribute
dataset_orig = CompasDataset(
    protected_attribute_names=['race'],            # Protected attribute: race
    privileged_classes=[['Caucasian']],           # Caucasians are privileged
    features_to_drop=['sex']                      # Dropping sex attribute for simplicity
)

# Split dataset into training and test sets
dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)

privileged_groups = [{'race': 1}]
unprivileged_groups = [{'race': 0}]


In [None]:

# Compute fairness metrics for the original training dataset
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
display(Markdown("#### Original training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())


In [None]:

# Mitigate bias using the Reweighing technique
RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
dataset_transf_train = RW.fit_transform(dataset_orig_train)


In [None]:

# Compute fairness metrics for the transformed training dataset
metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train, 
                                               unprivileged_groups=unprivileged_groups,
                                               privileged_groups=privileged_groups)
display(Markdown("#### Transformed training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_transf_train.mean_difference())
