In [24]:
from aif360.datasets import BinaryLabelDataset
from aif360.algorithms.preprocessing.reweighing import Reweighing

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

In [25]:
X_train_arr = []
X_test_arr = []
y_train_arr = []
y_test_arr = []
sensitive_features_train_arr = []
sensitive_features_test_arr = []

for i in range(5):
    X_train = pd.read_csv('./../../data/processed/adult/adult_train{}_X.csv'.format(i + 1))
    X_test = pd.read_csv('./../../data/processed/adult/adult_test{}_X.csv'.format(i + 1))
    y_train = pd.read_csv('./../../data/processed/adult/adult_train{}_y.csv'.format(i + 1))
    y_test = pd.read_csv('./../../data/processed/adult/adult_test{}_y.csv'.format(i + 1))

    y_train = y_train['income']
    y_test = y_test['income']
    
    sensitive_features_train = X_train['sex']
    sensitive_features_test = X_test['sex']

    sensitive_features_train[sensitive_features_train <= 0] = 0
    sensitive_features_train[sensitive_features_train > 0] = 1
    sensitive_features_train = sensitive_features_train.reset_index(drop=True)
    
    sensitive_features_test[sensitive_features_test <= 0] = 0
    sensitive_features_test[sensitive_features_test > 0] = 1
    sensitive_features_test = sensitive_features_test.reset_index(drop=True)
    
    X_train_arr.append(X_train)
    X_test_arr.append(X_test)
    y_train_arr.append(y_train)
    y_test_arr.append(y_test)
    sensitive_features_train_arr.append(sensitive_features_train)
    sensitive_features_test_arr.append(sensitive_features_test)

In [26]:
X_train = X_train_arr[0]
y_train = y_train_arr[0]
X_test = X_test_arr[0]
y_test = y_test_arr[0]
sensitive_features_test = sensitive_features_test_arr[0]

X_train['income'] = y_train
X_test['income'] = y_test

privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]

In [27]:
# Create Dataset object
binary = BinaryLabelDataset(1, 0, df=X_train, label_names=['income'], protected_attribute_names=['sex'])
binary_test = BinaryLabelDataset(1, 0, df=X_test, label_names=['income'], protected_attribute_names=['sex'])

In [28]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
               privileged_groups=privileged_groups)
RW.fit(binary)
dataset_transf_train = RW.transform(binary)

In [29]:
scale_transf = StandardScaler()
X_train = scale_transf.fit_transform(dataset_transf_train.features)
y_train = dataset_transf_train.labels.ravel()

In [30]:
lmod = LogisticRegression()
lmod.fit(X_train, y_train,
        sample_weight=dataset_transf_train.instance_weights)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [31]:
dataset_transf_test_pred = binary_test.copy(deepcopy=True)
X_test = scale_transf.fit_transform(dataset_transf_test_pred.features)

In [34]:
accuracy_score(lmod.predict(X_test), y_test_arr[0])

0.8341584158415841

In [35]:
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.cm as cm

def evaluate_fairness(y_true, y_pred, sensitive_features):
    """
    Evaluates fairness of the final majority vote classifier over T_inner hypotheses
    on the test set.
    #NOTE: defined in the meta_algo file, but we chose:
    a0 := African-American (COMPAS), Female (Adult)
    a1 := Caucasian (COMPAS), Male (Adult)

    :return: list. subgroups in sensitive_features.
    :return: list, dict, dict. groups is a list of the sensitive features in the dataset. 
    group_metrics is a dictionary containing dictionaries that have Delta_dp, Delta_eoy0, 
    and Delta_eoy1 for each group. gaps is a dictionary that contains the fairness gap
    for dp, eo_y0 and eo_y1.
    """
    groups = np.unique(sensitive_features.values)
    pos_count = {}
    dp_pct = {}
    eo_y0_pct = {}
    eo_y1_pct = {}

    for index, group in enumerate(groups):
        # Demographic Parity
        indices = {}
        indices[group] = sensitive_features.index[sensitive_features == group]
        dp_pct[group] = sum(y_pred[indices[group]])/len(indices[group])

        # Equalized Odds
        y1_indices = {}
        y0_indices = {}
        y1_indices[group] = sensitive_features.index[(sensitive_features == group) & (y_true == 1)]
        y0_indices[group] = sensitive_features.index[(sensitive_features == group) & (y_true == 0)]
        eo_y0_pct[group] = sum(y_pred[y0_indices[group]])/len(y0_indices[group])   
        eo_y1_pct[group] = sum(y_pred[y1_indices[group]])/len(y1_indices[group])

    gaps = {}
    group_metrics = {} # a dictionary of dictionaries

    gaps['dp'] = abs(dp_pct[groups[0]] - dp_pct[groups[1]])
    gaps['eo_y0'] = abs(eo_y0_pct[groups[0]] - eo_y0_pct[groups[1]])
    gaps['eo_y1'] = abs(eo_y1_pct[groups[0]] - eo_y1_pct[groups[1]])
    group_metrics['dp'] = dp_pct
    group_metrics['eo_y0'] = eo_y0_pct
    group_metrics['eo_y1'] = eo_y1_pct

    return groups, group_metrics, gaps

In [36]:
evaluate_fairness(lmod.predict(X_test), y_test_arr[0], sensitive_features_test)

(array([0., 1.]),
 {'dp': {0.0: 0.5671140939597316, 1.0: 0.24528301886792453},
  'eo_y0': {0.0: 0.26174496644295303, 1.0: 0.02666666666666667},
  'eo_y1': {0.0: 0.87248322147651, 1.0: 0.7741935483870968}},
 {'dp': 0.32183107509180703,
  'eo_y0': 0.23507829977628636,
  'eo_y1': 0.09828967308941328})

In [None]:
logreg = LogisticRegression()
logreg.fit( X_train_arr[0], y_train_arr[0])

evaluate_fairness(logreg.predict(X_test_arr[0]), y_test_arr[0], sensitive_features_test_arr[0])

In [None]:
accuracy_score(logreg.predict(X_test_arr[0]), y_test)

In [None]:
logreg.predict(X_test_arr[0])

In [None]:
y_test_arr[0]