In [1]:
from aif360.datasets import BinaryLabelDataset
from aif360.algorithms.preprocessing.reweighing import Reweighing

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit


In [2]:
def evaluate_fairness(y_true, y_pred, sensitive_features):
    """
    Evaluates fairness of the final majority vote classifier over T_inner hypotheses
    on the test set.
    #NOTE: defined in the meta_algo file, but we chose:
    a0 := African-American (COMPAS), Female (Adult)
    a1 := Caucasian (COMPAS), Male (Adult)

    :return: list. subgroups in sensitive_features.
    :return: list, dict, dict. groups is a list of the sensitive features in the dataset. 
    group_metrics is a dictionary containing dictionaries that have Delta_dp, Delta_eoy0, 
    and Delta_eoy1 for each group. gaps is a dictionary that contains the fairness gap
    for dp, eo_y0 and eo_y1.
    """
    groups = np.unique(sensitive_features.values)
    pos_count = {}
    dp_pct = {}
    eo_y0_pct = {}
    eo_y1_pct = {}

    for index, group in enumerate(groups):
        # Demographic Parity
        indices = {}
        indices[group] = sensitive_features.index[sensitive_features == group]
        dp_pct[group] = sum(y_pred[indices[group]])/len(indices[group])

        # Equalized Odds
        y1_indices = {}
        y0_indices = {}
        y1_indices[group] = sensitive_features.index[(sensitive_features == group) & (y_true == 1)]
        y0_indices[group] = sensitive_features.index[(sensitive_features == group) & (y_true == 0)]
        eo_y0_pct[group] = sum(y_pred[y0_indices[group]])/len(y0_indices[group])   
        eo_y1_pct[group] = sum(y_pred[y1_indices[group]])/len(y1_indices[group])

    gaps = {}
    group_metrics = {} # a dictionary of dictionaries

    gaps['dp'] = abs(dp_pct[groups[0]] - dp_pct[groups[1]])
    gaps['eo_y0'] = abs(eo_y0_pct[groups[0]] - eo_y0_pct[groups[1]])
    gaps['eo_y1'] = abs(eo_y1_pct[groups[0]] - eo_y1_pct[groups[1]])
    group_metrics['dp'] = dp_pct
    group_metrics['eo_y0'] = eo_y0_pct
    group_metrics['eo_y1'] = eo_y1_pct

    return groups, group_metrics, gaps

In [3]:
X_train_arr = []
X_test_arr = []
y_train_arr = []
y_test_arr = []
sensitive_features_train_arr = []
sensitive_features_test_arr = []

for i in range(5):
    X_train = pd.read_csv('./../../data/processed/adult/adult_train{}_X.csv'.format(i + 1))
    X_test = pd.read_csv('./../../data/processed/adult/adult_test{}_X.csv'.format(i + 1))
    y_train = pd.read_csv('./../../data/processed/adult/adult_train{}_y.csv'.format(i + 1))
    y_test = pd.read_csv('./../../data/processed/adult/adult_test{}_y.csv'.format(i + 1))

    y_train = y_train['income']
    y_test = y_test['income']
    
    sensitive_features_train = X_train['sex']
    sensitive_features_test = X_test['sex']

    sensitive_features_train[sensitive_features_train <= 0] = 0
    sensitive_features_train[sensitive_features_train > 0] = 1
    sensitive_features_train = sensitive_features_train.reset_index(drop=True)
    
    sensitive_features_test[sensitive_features_test <= 0] = 0
    sensitive_features_test[sensitive_features_test > 0] = 1
    sensitive_features_test = sensitive_features_test.reset_index(drop=True)
    
    X_train_arr.append(X_train)
    X_test_arr.append(X_test)
    y_train_arr.append(y_train)
    y_test_arr.append(y_test)
    sensitive_features_train_arr.append(sensitive_features_train)
    sensitive_features_test_arr.append(sensitive_features_test)

In [4]:
X_train_copy = X_train_arr[0].copy()
y_train_copy = y_train_arr[0].copy()
X_test_copy = X_test_arr[0].copy()
y_test_copy = y_test_arr[0].copy()
sensitive_features_test = sensitive_features_test_arr[0]

X_train_copy['income'] = y_train_copy
X_test_copy['income'] = y_test_copy

privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]

In [5]:
# Create Dataset object
dataset_orig_train = BinaryLabelDataset(1, 0, df=X_train_copy, label_names=['income'], protected_attribute_names=['sex'])
dataset_orig_test = BinaryLabelDataset(1, 0, df=X_test_copy, label_names=['income'], protected_attribute_names=['sex'])

In [7]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
               privileged_groups=privileged_groups)
dataset_transf_train = RW.fit_transform(dataset_orig_train)

In [8]:
### Testing 
assert np.abs(dataset_transf_train.instance_weights.sum()-dataset_orig_train.instance_weights.sum())<1e-6

In [9]:
pd.DataFrame({'Sex': X_train_copy.sex,
              'Income': y_train_copy,
              'Original_weight': np.ones(shape=(X_train_copy.shape[0],)),
              'new_weight': dataset_transf_train.instance_weights}).sample(15)

Unnamed: 0,Sex,Income,Original_weight,new_weight
1589,0.0,1,1.0,0.854641
353,0.0,0,1.0,1.209897
1028,0.0,1,1.0,0.854641
394,0.0,0,1.0,1.209897
219,0.0,1,1.0,0.854641
751,1.0,0,1.0,0.666474
1562,1.0,0,1.0,0.666474
1481,1.0,0,1.0,0.666474
603,1.0,0,1.0,0.666474
1352,1.0,0,1.0,0.666474


In [10]:
# Logistic regression classifier and predictions
lmod = LogisticRegression()
lmod.fit(X_train_arr[0], y_train_arr[0], 
         sample_weight=dataset_transf_train.instance_weights)

y_pred = lmod.predict(X_test_arr[0])

In [11]:
accuracy_score(y_pred, y_test_arr[0])

0.8366336633663366

In [12]:
evaluate_fairness(y_test_arr[0], y_pred, sensitive_features_test)

(array([0., 1.]),
 {'dp': {0.0: 0.5469798657718121, 1.0: 0.3018867924528302},
  'eo_y0': {0.0: 0.1937984496124031, 1.0: 0.1},
  'eo_y1': {0.0: 0.8165680473372781, 1.0: 0.9230769230769231}},
 {'dp': 0.24509307331898195,
  'eo_y0': 0.0937984496124031,
  'eo_y1': 0.10650887573964507})

In [13]:
logreg = LogisticRegression()
logreg.fit(X_train_arr[0], y_train_arr[0])
y_pred = logreg.predict(X_test_arr[0])

In [14]:
accuracy_score(y_pred, y_test_arr[0])

0.8316831683168316

In [15]:
evaluate_fairness(y_test_arr[0], y_pred, sensitive_features_test)

(array([0., 1.]),
 {'dp': {0.0: 0.587248322147651, 1.0: 0.24528301886792453},
  'eo_y0': {0.0: 0.24806201550387597, 1.0: 0.0625},
  'eo_y1': {0.0: 0.8461538461538461, 1.0: 0.8076923076923077}},
 {'dp': 0.34196530327972646,
  'eo_y0': 0.18556201550387597,
  'eo_y1': 0.038461538461538436})