In [13]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from aif360.datasets import CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.preprocessing import Reweighing
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_compas

# Collect COMPAS dataset

In [5]:
dataset = load_preproc_data_compas()
dataset_train, dataset_test = dataset.split([0.7], shuffle=True)

In [6]:
print(dataset_train.features.shape)

(3694, 10)


In [7]:
print(dataset_train.protected_attribute_names)

['sex', 'race']


In [8]:
print(dataset_train.feature_names)

['sex', 'race', 'age_cat=25 to 45', 'age_cat=Greater than 45', 'age_cat=Less than 25', 'priors_count=0', 'priors_count=1 to 3', 'priors_count=More than 3', 'c_charge_degree=F', 'c_charge_degree=M']


# Establish initial accuracy and fairness metrics

In [9]:
privileged_groups = [{'race': 1}]
unprivileged_groups = [{'race': 0}]

metric_train = BinaryLabelDatasetMetric(dataset_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_train.mean_difference())

Difference in mean outcomes between unprivileged and privileged groups = -0.140138


# Mitigation

In [14]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
dataset_transf_train = RW.fit_transform(dataset_train)

In [15]:
metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train, 
                                               unprivileged_groups=unprivileged_groups,
                                               privileged_groups=privileged_groups)
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_transf_train.mean_difference())

Difference in mean outcomes between unprivileged and privileged groups = -0.000000


# Differences in accuracy and fairness