In this notebook, we find the baseline fairness using each of the three fairness algorithms we would like to compare: Reweighing (Preprocessing), Reductions Approach to Fair Classficiation (Inprocessing), and Equality of Opportunity in Supervised Learning (Postprocessing).

In [28]:
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import numpy as np

In [29]:
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.cm as cm

def evaluate_fairness(y_true, y_pred, sensitive_features):
    """
    Evaluates fairness of the final majority vote classifier over T_inner hypotheses
    on the test set.
    #NOTE: defined in the meta_algo file, but we chose:
    a0 := African-American (COMPAS), Female (Adult)
    a1 := Caucasian (COMPAS), Male (Adult)

    :return: list. subgroups in sensitive_features.
    :return: list, dict, dict. groups is a list of the sensitive features in the dataset. 
    group_metrics is a dictionary containing dictionaries that have Delta_dp, Delta_eoy0, 
    and Delta_eoy1 for each group. gaps is a dictionary that contains the fairness gap
    for dp, eo_y0 and eo_y1.
    """
    groups = np.unique(sensitive_features.values)
    pos_count = {}
    dp_pct = {}
    eo_y0_pct = {}
    eo_y1_pct = {}

    for index, group in enumerate(groups):
        # Demographic Parity
        indices = {}
        indices[group] = sensitive_features.index[sensitive_features == group]
        dp_pct[group] = sum(y_pred[indices[group]])/len(indices[group])

        # Equalized Odds
        y1_indices = {}
        y0_indices = {}
        y1_indices[group] = sensitive_features.index[(sensitive_features == group) & (y_true == 1)]
        y0_indices[group] = sensitive_features.index[(sensitive_features == group) & (y_true == 0)]
        eo_y0_pct[group] = sum(y_pred[y0_indices[group]])/len(y0_indices[group])   
        eo_y1_pct[group] = sum(y_pred[y1_indices[group]])/len(y1_indices[group])

    gaps = {}
    group_metrics = {} # a dictionary of dictionaries

    gaps['dp'] = abs(dp_pct[groups[0]] - dp_pct[groups[1]])
    gaps['eo_y0'] = abs(eo_y0_pct[groups[0]] - eo_y0_pct[groups[1]])
    gaps['eo_y1'] = abs(eo_y1_pct[groups[0]] - eo_y1_pct[groups[1]])
    group_metrics['dp'] = dp_pct
    group_metrics['eo_y0'] = eo_y0_pct
    group_metrics['eo_y1'] = eo_y1_pct

    return groups, group_metrics, gaps

# Compas

In [30]:
X_train = pd.read_csv('./../../data/processed/compas/compas_train1_X.csv')
X_test = pd.read_csv('./../../data/processed/compas/compas_test1_X.csv')
y_train = pd.read_csv('./../../data/processed/compas/compas_train1_y.csv')
y_train = y_train['two_year_recid']
y_test = pd.read_csv('./../../data/processed/compas/compas_test1_y.csv')
y_test = y_test['two_year_recid']

sensitive_features_train = X_train['race']
sensitive_features_test = X_test['race']

## Unfair

In [31]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)

In [32]:
print("COMPAS Test Accuracy (Unfair): {}".format(accuracy_score(y_pred, y_test)))

COMPAS Test Accuracy (Unfair): 0.6926315789473684


In [33]:
groups, group_metrics, gaps = evaluate_fairness(y_test, y_pred, sensitive_features_test)

In [34]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics['dp'][group]))
print("Delta_dp = {}".format(gaps['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps['eo_y1']))
print("Delta_eo0 = {}".format(gaps['eo_y0']))

P[h(X) = 1 | A = 0.0] = 0.4963768115942029
P[h(X) = 1 | A = 1.0] = 0.24623115577889448
Delta_dp = 0.2501456558153084
P[h(X) = 1 | A = 0.0, Y = 1] = 0.6948529411764706
P[h(X) = 1 | A = 0.0, Y = 0] = 0.30357142857142855
P[h(X) = 1 | A = 1.0, Y = 1] = 0.41216216216216217
P[h(X) = 1 | A = 1.0, Y = 0] = 0.148
Delta_eo1 = 0.2826907790143084
Delta_eo0 = 0.15557142857142855


## Preprocessing

## Inprocessing

In [35]:
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds

expgrad_dp = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=DemographicParity(),
    eps=0.05,
    nu=1e-6)

expgrad_eo = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=EqualizedOdds(),
    eps=0.05,
    nu=1e-6)

expgrad_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
expgrad_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)
y_pred_dp = expgrad_dp.predict(X_test)
y_pred_eo = expgrad_eo.predict(X_test)

In [36]:
print("COMPAS Test Accuracy (Inprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("COMPAS Test Accuracy (Inprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

COMPAS Test Accuracy (Inprocessing, DP): 0.68
COMPAS Test Accuracy (Inprocessing, EO): 0.6757894736842105


In [37]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)

In [38]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = 0.0] = 0.49818840579710144
P[h(X) = 1 | A = 1.0] = 0.39949748743718594
Delta_dp = 0.0986909183599155
P[h(X) = 1 | A = 0.0, Y = 1] = 0.6948529411764706
P[h(X) = 1 | A = 0.0, Y = 0] = 0.32142857142857145
P[h(X) = 1 | A = 1.0, Y = 1] = 0.5743243243243243
P[h(X) = 1 | A = 1.0, Y = 0] = 0.288
Delta_eo1 = 0.12052861685214622
Delta_eo0 = 0.033428571428571474


## Postprocessing

In [39]:
from fairlearn.postprocessing import ThresholdOptimizer
from sklearn import clone
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.exceptions import NotFittedError

class LogisticRegressionAsRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, logistic_regression_estimator):
        self.logistic_regression_estimator = logistic_regression_estimator

    def fit(self, X, y):
        try:
            check_is_fitted(self.logistic_regression_estimator)
            self.logistic_regression_estimator_ = self.logistic_regression_estimator
        except NotFittedError:
            self.logistic_regression_estimator_ = clone(
                self.logistic_regression_estimator
            ).fit(X, y)
        return self

    def predict(self, X):
        # use predict_proba to get real values instead of 0/1, select only prob for 1
        scores = self.logistic_regression_estimator_.predict_proba(X)[:, 1]
        return scores

estimator_wrapper = LogisticRegressionAsRegression(logreg).fit(X_train, y_train)
postprocessed_predictor_dp = ThresholdOptimizer(estimator=estimator_wrapper, constraints="demographic_parity", prefit=True)
postprocessed_predictor_eo = ThresholdOptimizer(estimator=estimator_wrapper, constraints="equalized_odds", prefit=True)

postprocessed_predictor_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
postprocessed_predictor_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)

y_pred_dp = postprocessed_predictor_dp.predict(X_test, sensitive_features=sensitive_features_test)
y_pred_eo = postprocessed_predictor_eo.predict(X_test, sensitive_features=sensitive_features_test)

In [40]:
print("COMPAS Test Accuracy (Postprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("COMPAS Test Accuracy (Postprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

COMPAS Test Accuracy (Postprocessing, DP): 0.6736842105263158
COMPAS Test Accuracy (Postprocessing, EO): 0.6357894736842106


In [41]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)

In [42]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = 0.0] = 0.4855072463768116
P[h(X) = 1 | A = 1.0] = 0.46733668341708545
Delta_dp = 0.018170562959726133
P[h(X) = 1 | A = 0.0, Y = 1] = 0.7205882352941176
P[h(X) = 1 | A = 0.0, Y = 0] = 0.4107142857142857
P[h(X) = 1 | A = 1.0, Y = 1] = 0.7094594594594594
P[h(X) = 1 | A = 1.0, Y = 0] = 0.448
Delta_eo1 = 0.011128775834658211
Delta_eo0 = 0.03728571428571431


# Adult

In [43]:
X_train = pd.read_csv('./../../data/processed/adult/adult_train1_X.csv')
X_test = pd.read_csv('./../../data/processed/adult/adult_test1_X.csv')
y_train = pd.read_csv('./../../data/processed/adult/adult_train1_y.csv')
y_train = y_train['income']
y_test = pd.read_csv('./../../data/processed/adult/adult_test1_y.csv')
y_test = y_test['income']

In [44]:
sensitive_features_train = X_train['sex']
sensitive_features_test = X_test['sex']

sensitive_features_train[sensitive_features_train < 0] = 0
sensitive_features_train[sensitive_features_train > 0] = 1
sensitive_features_train = sensitive_features_train.reset_index(drop=True)

sensitive_features_test[sensitive_features_test < 0] = 0
sensitive_features_test[sensitive_features_test > 0] = 1
sensitive_features_test = sensitive_features_test.reset_index(drop=True)

## Unfair

In [45]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)

In [46]:
print("Adult Test Accuracy (Unfair): {}".format(accuracy_score(y_pred, y_test)))

Adult Test Accuracy (Unfair): 0.8316831683168316


In [47]:
groups, group_metrics, gaps = evaluate_fairness(y_test, y_pred, sensitive_features_test)

In [48]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics['dp'][group]))
print("Delta_dp = {}".format(gaps['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps['eo_y1']))
print("Delta_eo0 = {}".format(gaps['eo_y0']))

P[h(X) = 1 | A = 0.0] = 0.587248322147651
P[h(X) = 1 | A = 1.0] = 0.24528301886792453
Delta_dp = 0.34196530327972646
P[h(X) = 1 | A = 0.0, Y = 1] = 0.8461538461538461
P[h(X) = 1 | A = 0.0, Y = 0] = 0.24806201550387597
P[h(X) = 1 | A = 1.0, Y = 1] = 0.8076923076923077
P[h(X) = 1 | A = 1.0, Y = 0] = 0.0625
Delta_eo1 = 0.038461538461538436
Delta_eo0 = 0.18556201550387597


## Preprocessing

## Inprocessing

In [49]:
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds

expgrad_dp = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=DemographicParity(),
    eps=0.05,
    nu=1e-6)

expgrad_eo = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=EqualizedOdds(),
    eps=0.05,
    nu=1e-6)

expgrad_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
expgrad_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)
y_pred_dp = expgrad_dp.predict(X_test)
y_pred_eo = expgrad_eo.predict(X_test)

In [50]:
print("Adult Test Accuracy (Inprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("Adult Test Accuracy (Inprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

Adult Test Accuracy (Inprocessing, DP): 0.7995049504950495
Adult Test Accuracy (Inprocessing, EO): 0.8217821782178217


In [51]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)

In [52]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = 0.0] = 0.5100671140939598
P[h(X) = 1 | A = 1.0] = 0.4528301886792453
Delta_dp = 0.05723692541471448
P[h(X) = 1 | A = 0.0, Y = 1] = 0.8106508875739645
P[h(X) = 1 | A = 0.0, Y = 0] = 0.20155038759689922
P[h(X) = 1 | A = 1.0, Y = 1] = 0.9615384615384616
P[h(X) = 1 | A = 1.0, Y = 0] = 0.1625
Delta_eo1 = 0.15088757396449703
Delta_eo0 = 0.039050387596899216


## Postprocessing

In [53]:
from fairlearn.postprocessing import ThresholdOptimizer
from sklearn import clone
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.exceptions import NotFittedError

class LogisticRegressionAsRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, logistic_regression_estimator):
        self.logistic_regression_estimator = logistic_regression_estimator

    def fit(self, X, y):
        try:
            check_is_fitted(self.logistic_regression_estimator)
            self.logistic_regression_estimator_ = self.logistic_regression_estimator
        except NotFittedError:
            self.logistic_regression_estimator_ = clone(
                self.logistic_regression_estimator
            ).fit(X, y)
        return self

    def predict(self, X):
        # use predict_proba to get real values instead of 0/1, select only prob for 1
        scores = self.logistic_regression_estimator_.predict_proba(X)[:, 1]
        return scores

estimator_wrapper = LogisticRegressionAsRegression(logreg).fit(X_train, y_train)
postprocessed_predictor_dp = ThresholdOptimizer(estimator=estimator_wrapper, constraints="demographic_parity", prefit=True)
postprocessed_predictor_eo = ThresholdOptimizer(estimator=estimator_wrapper, constraints="equalized_odds", prefit=True)

postprocessed_predictor_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
postprocessed_predictor_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)

y_pred_dp = postprocessed_predictor_dp.predict(X_test, sensitive_features=sensitive_features_test)
y_pred_eo = postprocessed_predictor_eo.predict(X_test, sensitive_features=sensitive_features_test)

In [54]:
print("Adult Test Accuracy (Postprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("Adult Test Accuracy (Postprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

Adult Test Accuracy (Postprocessing, DP): 0.7871287128712872
Adult Test Accuracy (Postprocessing, EO): 0.8168316831683168


In [55]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)

In [56]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = 0.0] = 0.5100671140939598
P[h(X) = 1 | A = 1.0] = 0.4811320754716981
Delta_dp = 0.028935038622261655
P[h(X) = 1 | A = 0.0, Y = 1] = 0.834319526627219
P[h(X) = 1 | A = 0.0, Y = 0] = 0.20930232558139536
P[h(X) = 1 | A = 1.0, Y = 1] = 0.9230769230769231
P[h(X) = 1 | A = 1.0, Y = 0] = 0.2125
Delta_eo1 = 0.08875739644970415
Delta_eo0 = 0.0031976744186046346


# Lawschool

In [57]:
X_train = pd.read_csv('./../../data/processed/lawschool/lawschool_train1_X.csv')
X_test = pd.read_csv('./../../data/processed/lawschool/lawschool_test1_X.csv')
y_train = pd.read_csv('./../../data/processed/lawschool/lawschool_train1_y.csv')
y_train = y_train['bar1']
y_test = pd.read_csv('./../../data/processed/lawschool/lawschool_test1_y.csv')
y_test = y_test['bar1']

sensitive_features_train = X_train['race7']
sensitive_features_test = X_test['race7']

sensitive_features_train[sensitive_features_train < 0] = 0
sensitive_features_train[sensitive_features_train > 0] = 1
sensitive_features_train = sensitive_features_train.reset_index(drop=True)

sensitive_features_test[sensitive_features_test < 0] = 0
sensitive_features_test[sensitive_features_test > 0] = 1
sensitive_features_test = sensitive_features_test.reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()


## Unfair

In [58]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [59]:
print("Lawschool Test Accuracy (Unfair): {}".format(accuracy_score(y_pred, y_test)))

Lawschool Test Accuracy (Unfair): 0.8136986301369863


In [60]:
groups, group_metrics, gaps = evaluate_fairness(y_test, y_pred, sensitive_features_test)

In [61]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics['dp'][group]))
print("Delta_dp = {}".format(gaps['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps['eo_y1']))
print("Delta_eo0 = {}".format(gaps['eo_y0']))

P[h(X) = 1 | A = 0] = 0.22727272727272727
P[h(X) = 1 | A = 1] = 0.6101083032490975
Delta_dp = 0.3828355759763702
P[h(X) = 1 | A = 0, Y = 1] = 0.6190476190476191
P[h(X) = 1 | A = 0, Y = 0] = 0.1044776119402985
P[h(X) = 1 | A = 1, Y = 1] = 0.8670886075949367
P[h(X) = 1 | A = 1, Y = 0] = 0.2689075630252101
Delta_eo1 = 0.2480409885473176
Delta_eo0 = 0.16442995108491157


## Preprocessing

## Inprocessing

In [62]:
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds

expgrad_dp = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=DemographicParity(),
    eps=0.05,
    nu=1e-6)

expgrad_eo = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=EqualizedOdds(),
    eps=0.05,
    nu=1e-6)

expgrad_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
expgrad_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)
y_pred_dp = expgrad_dp.predict(X_test)
y_pred_eo = expgrad_eo.predict(X_test)

In [63]:
print("Lawschool Test Accuracy (Inprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("Lawschool Test Accuracy (Inprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

Lawschool Test Accuracy (Inprocessing, DP): 0.7506849315068493
Lawschool Test Accuracy (Inprocessing, EO): 0.7671232876712328


In [64]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)

In [65]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = 0] = 0.5340909090909091
P[h(X) = 1 | A = 1] = 0.5306859205776173
Delta_dp = 0.00340498851329174
P[h(X) = 1 | A = 0, Y = 1] = 0.9047619047619048
P[h(X) = 1 | A = 0, Y = 0] = 0.34328358208955223
P[h(X) = 1 | A = 1, Y = 1] = 0.7721518987341772
P[h(X) = 1 | A = 1, Y = 0] = 0.20168067226890757
Delta_eo1 = 0.13261000602772754
Delta_eo0 = 0.14160290982064466


## Postprocessing

In [66]:
from fairlearn.postprocessing import ThresholdOptimizer
from sklearn import clone
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.exceptions import NotFittedError

class LogisticRegressionAsRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, logistic_regression_estimator):
        self.logistic_regression_estimator = logistic_regression_estimator

    def fit(self, X, y):
        try:
            check_is_fitted(self.logistic_regression_estimator)
            self.logistic_regression_estimator_ = self.logistic_regression_estimator
        except NotFittedError:
            self.logistic_regression_estimator_ = clone(
                self.logistic_regression_estimator
            ).fit(X, y)
        return self

    def predict(self, X):
        # use predict_proba to get real values instead of 0/1, select only prob for 1
        scores = self.logistic_regression_estimator_.predict_proba(X)[:, 1]
        return scores

estimator_wrapper = LogisticRegressionAsRegression(logreg).fit(X_train, y_train)
postprocessed_predictor_dp = ThresholdOptimizer(estimator=estimator_wrapper, constraints="demographic_parity", prefit=True)
postprocessed_predictor_eo = ThresholdOptimizer(estimator=estimator_wrapper, constraints="equalized_odds", prefit=True)

postprocessed_predictor_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
postprocessed_predictor_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)

y_pred_dp = postprocessed_predictor_dp.predict(X_test, sensitive_features=sensitive_features_test)
y_pred_eo = postprocessed_predictor_eo.predict(X_test, sensitive_features=sensitive_features_test)

In [67]:
print("Lawschool Test Accuracy (Postprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("Lawschool Test Accuracy (Postprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

Lawschool Test Accuracy (Postprocessing, DP): 0.7424657534246575
Lawschool Test Accuracy (Postprocessing, EO): 0.7095890410958904


In [68]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)

In [69]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = 0] = 0.6136363636363636
P[h(X) = 1 | A = 1] = 0.5379061371841155
Delta_dp = 0.07573022645224814
P[h(X) = 1 | A = 0, Y = 1] = 0.7619047619047619
P[h(X) = 1 | A = 0, Y = 0] = 0.2537313432835821
P[h(X) = 1 | A = 1, Y = 1] = 0.6455696202531646
P[h(X) = 1 | A = 1, Y = 0] = 0.23529411764705882
Delta_eo1 = 0.1163351416515973
Delta_eo0 = 0.01843722563652328


# Communities

In [70]:
X_train = pd.read_csv('./../../data/processed/communities/communities_train1_X.csv')
X_test = pd.read_csv('./../../data/processed/communities/communities_test1_X.csv')
y_train = pd.read_csv('./../../data/processed/communities/communities_train1_y.csv')
y_train = y_train['ViolentCrimesPerPop']
y_test = pd.read_csv('./../../data/processed/communities/communities_test1_y.csv')
y_test = y_test['ViolentCrimesPerPop']

sensitive_features_train = X_train['majority_white']
sensitive_features_test = X_test['majority_white']
sensitive_features_train[sensitive_features_train < 0] = 0
sensitive_features_train[sensitive_features_train > 0] = 1
sensitive_features_train = sensitive_features_train.reset_index(drop=True)

sensitive_features_test[sensitive_features_test < 0] = 0
sensitive_features_test[sensitive_features_test > 0] = 1
sensitive_features_test = sensitive_features_test.reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


## Unfair

In [71]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [72]:
print("Communities Test Accuracy (Unfair): {}".format(accuracy_score(y_pred, y_test)))

Communities Test Accuracy (Unfair): 0.8646616541353384


In [73]:
groups, group_metrics, gaps = evaluate_fairness(y_test, y_pred, sensitive_features_test)
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics['dp'][group]))
print("Delta_dp = {}".format(gaps['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps['eo_y1']))
print("Delta_eo0 = {}".format(gaps['eo_y0']))

P[h(X) = 1 | A = 0] = 0.6379310344827587
P[h(X) = 1 | A = 1] = 0.09187279151943463
Delta_dp = 0.5460582429633241
P[h(X) = 1 | A = 0, Y = 1] = 0.8026315789473685
P[h(X) = 1 | A = 0, Y = 0] = 0.325
P[h(X) = 1 | A = 1, Y = 1] = 0.5
P[h(X) = 1 | A = 1, Y = 0] = 0.024691358024691357
Delta_eo1 = 0.3026315789473685
Delta_eo0 = 0.30030864197530865


## Preprocessing

## Inprocessing

In [74]:
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds

expgrad_dp = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=DemographicParity(),
    eps=0.05,
    nu=1e-6)

expgrad_eo = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=EqualizedOdds(),
    eps=0.05,
    nu=1e-6)

expgrad_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
expgrad_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)
y_pred_dp = expgrad_dp.predict(X_test)
y_pred_eo = expgrad_eo.predict(X_test)

In [75]:
print("Communities Test Accuracy (Inprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("Communities Test Accuracy (Inprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

Communities Test Accuracy (Inprocessing, DP): 0.7368421052631579
Communities Test Accuracy (Inprocessing, EO): 0.8120300751879699


In [76]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = 0] = 0.3879310344827586
P[h(X) = 1 | A = 1] = 0.3321554770318021
Delta_dp = 0.055775557450956526
P[h(X) = 1 | A = 0, Y = 1] = 0.75
P[h(X) = 1 | A = 0, Y = 0] = 0.275
P[h(X) = 1 | A = 1, Y = 1] = 0.8
P[h(X) = 1 | A = 1, Y = 0] = 0.1522633744855967
Delta_eo1 = 0.050000000000000044
Delta_eo0 = 0.12273662551440331


## Postprocessing

In [77]:
from fairlearn.postprocessing import ThresholdOptimizer
from sklearn import clone
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.exceptions import NotFittedError

class LogisticRegressionAsRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, logistic_regression_estimator):
        self.logistic_regression_estimator = logistic_regression_estimator

    def fit(self, X, y):
        try:
            check_is_fitted(self.logistic_regression_estimator)
            self.logistic_regression_estimator_ = self.logistic_regression_estimator
        except NotFittedError:
            self.logistic_regression_estimator_ = clone(
                self.logistic_regression_estimator
            ).fit(X, y)
        return self

    def predict(self, X):
        # use predict_proba to get real values instead of 0/1, select only prob for 1
        scores = self.logistic_regression_estimator_.predict_proba(X)[:, 1]
        return scores

estimator_wrapper = LogisticRegressionAsRegression(logreg).fit(X_train, y_train)
postprocessed_predictor_dp = ThresholdOptimizer(estimator=estimator_wrapper, constraints="demographic_parity", prefit=True)
postprocessed_predictor_eo = ThresholdOptimizer(estimator=estimator_wrapper, constraints="equalized_odds", prefit=True)

postprocessed_predictor_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
postprocessed_predictor_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)

y_pred_dp = postprocessed_predictor_dp.predict(X_test, sensitive_features=sensitive_features_test)
y_pred_eo = postprocessed_predictor_eo.predict(X_test, sensitive_features=sensitive_features_test)

In [78]:
print("Communities Test Accuracy (Postprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("Communities Test Accuracy (Postprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

Communities Test Accuracy (Postprocessing, DP): 0.7619047619047619
Communities Test Accuracy (Postprocessing, EO): 0.8270676691729323


In [79]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = 0] = 0.16379310344827586
P[h(X) = 1 | A = 1] = 0.2049469964664311
Delta_dp = 0.04115389301815525
P[h(X) = 1 | A = 0, Y = 1] = 0.6710526315789473
P[h(X) = 1 | A = 0, Y = 0] = 0.225
P[h(X) = 1 | A = 1, Y = 1] = 0.775
P[h(X) = 1 | A = 1, Y = 0] = 0.10699588477366255
Delta_eo1 = 0.10394736842105268
Delta_eo0 = 0.11800411522633746
