In this notebook, we find the baseline fairness using each of the three fairness algorithms we would like to compare: Reweighing (Preprocessing), Reductions Approach to Fair Classficiation (Inprocessing), and Equality of Opportunity in Supervised Learning (Postprocessing).

In [82]:
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import numpy as np

In [83]:
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.cm as cm

def evaluate_fairness(y_true, y_pred, sensitive_features):
    """
    Evaluates fairness of the final majority vote classifier over T_inner hypotheses
    on the test set.
    #NOTE: defined in the meta_algo file, but we chose:
    a0 := African-American (COMPAS), Female (Adult)
    a1 := Caucasian (COMPAS), Male (Adult)

    :return: list. subgroups in sensitive_features.
    :return: list, dict, dict. groups is a list of the sensitive features in the dataset. 
    group_metrics is a dictionary containing dictionaries that have Delta_dp, Delta_eoy0, 
    and Delta_eoy1 for each group. gaps is a dictionary that contains the fairness gap
    for dp, eo_y0 and eo_y1.
    """
    groups = np.unique(sensitive_features.values)
    pos_count = {}
    dp_pct = {}
    eo_y0_pct = {}
    eo_y1_pct = {}

    for index, group in enumerate(groups):
        # Demographic Parity
        indices = {}
        indices[group] = sensitive_features.index[sensitive_features == group]
        dp_pct[group] = sum(y_pred[indices[group]])/len(indices[group])

        # Equalized Odds
        y1_indices = {}
        y0_indices = {}
        y1_indices[group] = sensitive_features.index[(sensitive_features == group) & (y_true == 1)]
        y0_indices[group] = sensitive_features.index[(sensitive_features == group) & (y_true == 0)]
        eo_y0_pct[group] = sum(y_pred[y0_indices[group]])/len(y0_indices[group])   
        eo_y1_pct[group] = sum(y_pred[y1_indices[group]])/len(y1_indices[group])

    gaps = {}
    group_metrics = {} # a dictionary of dictionaries

    gaps['dp'] = abs(dp_pct[groups[0]] - dp_pct[groups[1]])
    gaps['eo_y0'] = abs(eo_y0_pct[groups[0]] - eo_y0_pct[groups[1]])
    gaps['eo_y1'] = abs(eo_y1_pct[groups[0]] - eo_y1_pct[groups[1]])
    group_metrics['dp'] = dp_pct
    group_metrics['eo_y0'] = eo_y0_pct
    group_metrics['eo_y1'] = eo_y1_pct

    return groups, group_metrics, gaps

# Compas

In [84]:
X_train = pd.read_csv('./../../data/processed/compas/compas_train1_X.csv')
X_test = pd.read_csv('./../../data/processed/compas/compas_test1_X.csv')
y_train = pd.read_csv('./../../data/processed/compas/compas_train1_y.csv')
y_train = y_train['two_year_recid']
y_test = pd.read_csv('./../../data/processed/compas/compas_test1_y.csv')
y_test = y_test['two_year_recid']

sensitive_features_train = X_train['race']
sensitive_features_test = X_test['race']

## Unfair

In [85]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)

In [86]:
print("COMPAS Test Accuracy (Unfair): {}".format(accuracy_score(y_pred, y_test)))

COMPAS Test Accuracy (Unfair): 0.6926315789473684


In [87]:
groups, group_metrics, gaps = evaluate_fairness(y_test, y_pred, sensitive_features_test)

In [88]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics['dp'][group]))
print("Delta_dp = {}".format(gaps['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps['eo_y1']))
print("Delta_eo0 = {}".format(gaps['eo_y0']))

P[h(X) = 1 | A = 0.0] = 0.4963768115942029
P[h(X) = 1 | A = 1.0] = 0.24623115577889448
Delta_dp = 0.2501456558153084
P[h(X) = 1 | A = 0.0, Y = 1] = 0.6948529411764706
P[h(X) = 1 | A = 0.0, Y = 0] = 0.30357142857142855
P[h(X) = 1 | A = 1.0, Y = 1] = 0.41216216216216217
P[h(X) = 1 | A = 1.0, Y = 0] = 0.148
Delta_eo1 = 0.2826907790143084
Delta_eo0 = 0.15557142857142855


## Preprocessing

## Inprocessing

In [89]:
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds

expgrad_dp = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=DemographicParity(),
    eps=0.05,
    nu=1e-6)

expgrad_eo = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=EqualizedOdds(),
    eps=0.05,
    nu=1e-6)

expgrad_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
expgrad_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)
y_pred_dp = expgrad_dp.predict(X_test)
y_pred_eo = expgrad_eo.predict(X_test)

In [90]:
print("COMPAS Test Accuracy (Inprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("COMPAS Test Accuracy (Inprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

COMPAS Test Accuracy (Inprocessing, DP): 0.6789473684210526
COMPAS Test Accuracy (Inprocessing, EO): 0.6747368421052632


In [91]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)

In [92]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = 0.0] = 0.5018115942028986
P[h(X) = 1 | A = 1.0] = 0.4020100502512563
Delta_dp = 0.09980154395164226
P[h(X) = 1 | A = 0.0, Y = 1] = 0.7022058823529411
P[h(X) = 1 | A = 0.0, Y = 0] = 0.3142857142857143
P[h(X) = 1 | A = 1.0, Y = 1] = 0.5675675675675675
P[h(X) = 1 | A = 1.0, Y = 0] = 0.304
Delta_eo1 = 0.13463831478537358
Delta_eo0 = 0.010285714285714287


## Postprocessing

In [93]:
from fairlearn.postprocessing import ThresholdOptimizer
from sklearn import clone
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.exceptions import NotFittedError

class LogisticRegressionAsRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, logistic_regression_estimator):
        self.logistic_regression_estimator = logistic_regression_estimator

    def fit(self, X, y):
        try:
            check_is_fitted(self.logistic_regression_estimator)
            self.logistic_regression_estimator_ = self.logistic_regression_estimator
        except NotFittedError:
            self.logistic_regression_estimator_ = clone(
                self.logistic_regression_estimator
            ).fit(X, y)
        return self

    def predict(self, X):
        # use predict_proba to get real values instead of 0/1, select only prob for 1
        scores = self.logistic_regression_estimator_.predict_proba(X)[:, 1]
        return scores

estimator_wrapper = LogisticRegressionAsRegression(logreg).fit(X_train, y_train)
postprocessed_predictor_dp = ThresholdOptimizer(estimator=estimator_wrapper, constraints="demographic_parity", prefit=True)
postprocessed_predictor_eo = ThresholdOptimizer(estimator=estimator_wrapper, constraints="equalized_odds", prefit=True)

postprocessed_predictor_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
postprocessed_predictor_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)

y_pred_dp = postprocessed_predictor_dp.predict(X_test, sensitive_features=sensitive_features_test)
y_pred_eo = postprocessed_predictor_eo.predict(X_test, sensitive_features=sensitive_features_test)

In [94]:
print("COMPAS Test Accuracy (Postprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("COMPAS Test Accuracy (Postprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

COMPAS Test Accuracy (Postprocessing, DP): 0.6726315789473685
COMPAS Test Accuracy (Postprocessing, EO): 0.6368421052631579


In [95]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)

In [96]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = 0.0] = 0.4855072463768116
P[h(X) = 1 | A = 1.0] = 0.45979899497487436
Delta_dp = 0.02570825140193722
P[h(X) = 1 | A = 0.0, Y = 1] = 0.7242647058823529
P[h(X) = 1 | A = 0.0, Y = 0] = 0.4107142857142857
P[h(X) = 1 | A = 1.0, Y = 1] = 0.7094594594594594
P[h(X) = 1 | A = 1.0, Y = 0] = 0.448
Delta_eo1 = 0.014805246422893492
Delta_eo0 = 0.03728571428571431


# Adult

In [97]:
X_train = pd.read_csv('./../../data/processed/adult/adult_train1_X.csv')
X_test = pd.read_csv('./../../data/processed/adult/adult_test1_X.csv')
y_train = pd.read_csv('./../../data/processed/adult/adult_train1_y.csv')
y_train = y_train['income']
y_test = pd.read_csv('./../../data/processed/adult/adult_test1_y.csv')
y_test = y_test['income']

sensitive_features_train = X_train['sex']
sensitive_features_test = X_test['sex']

## Unfair

In [98]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)

In [99]:
print("Adult Test Accuracy (Unfair): {}".format(accuracy_score(y_pred, y_test)))

Adult Test Accuracy (Unfair): 0.8341584158415841


In [100]:
groups, group_metrics, gaps = evaluate_fairness(y_test, y_pred, sensitive_features_test)

In [101]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics['dp'][group]))
print("Delta_dp = {}".format(gaps['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps['eo_y1']))
print("Delta_eo0 = {}".format(gaps['eo_y0']))

P[h(X) = 1 | A = -0.5903089186965219] = 0.5838926174496645
P[h(X) = 1 | A = 1.6940282762593677] = 0.24528301886792453
Delta_dp = 0.33860959858173995
P[h(X) = 1 | A = -0.5903089186965219, Y = 1] = 0.8461538461538461
P[h(X) = 1 | A = -0.5903089186965219, Y = 0] = 0.24031007751937986
P[h(X) = 1 | A = 1.6940282762593677, Y = 1] = 0.8076923076923077
P[h(X) = 1 | A = 1.6940282762593677, Y = 0] = 0.0625
Delta_eo1 = 0.038461538461538436
Delta_eo0 = 0.17781007751937986


## Preprocessing

## Inprocessing

In [102]:
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds

expgrad_dp = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=DemographicParity(),
    eps=0.05,
    nu=1e-6)

expgrad_eo = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=EqualizedOdds(),
    eps=0.05,
    nu=1e-6)

expgrad_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
expgrad_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)
y_pred_dp = expgrad_dp.predict(X_test)
y_pred_eo = expgrad_eo.predict(X_test)

In [103]:
print("Adult Test Accuracy (Inprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("Adult Test Accuracy (Inprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

Adult Test Accuracy (Inprocessing, DP): 0.7970297029702971
Adult Test Accuracy (Inprocessing, EO): 0.8242574257425742


In [104]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)

In [105]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = -0.5903089186965219] = 0.5067114093959731
P[h(X) = 1 | A = 1.6940282762593677] = 0.4528301886792453
Delta_dp = 0.05388122071672785
P[h(X) = 1 | A = -0.5903089186965219, Y = 1] = 0.8224852071005917
P[h(X) = 1 | A = -0.5903089186965219, Y = 0] = 0.20155038759689922
P[h(X) = 1 | A = 1.6940282762593677, Y = 1] = 0.8846153846153846
P[h(X) = 1 | A = 1.6940282762593677, Y = 0] = 0.15
Delta_eo1 = 0.062130177514792884
Delta_eo0 = 0.05155038759689923


## Postprocessing

In [106]:
from fairlearn.postprocessing import ThresholdOptimizer
from sklearn import clone
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.exceptions import NotFittedError

class LogisticRegressionAsRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, logistic_regression_estimator):
        self.logistic_regression_estimator = logistic_regression_estimator

    def fit(self, X, y):
        try:
            check_is_fitted(self.logistic_regression_estimator)
            self.logistic_regression_estimator_ = self.logistic_regression_estimator
        except NotFittedError:
            self.logistic_regression_estimator_ = clone(
                self.logistic_regression_estimator
            ).fit(X, y)
        return self

    def predict(self, X):
        # use predict_proba to get real values instead of 0/1, select only prob for 1
        scores = self.logistic_regression_estimator_.predict_proba(X)[:, 1]
        return scores

estimator_wrapper = LogisticRegressionAsRegression(logreg).fit(X_train, y_train)
postprocessed_predictor_dp = ThresholdOptimizer(estimator=estimator_wrapper, constraints="demographic_parity", prefit=True)
postprocessed_predictor_eo = ThresholdOptimizer(estimator=estimator_wrapper, constraints="equalized_odds", prefit=True)

postprocessed_predictor_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
postprocessed_predictor_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)

y_pred_dp = postprocessed_predictor_dp.predict(X_test, sensitive_features=sensitive_features_test)
y_pred_eo = postprocessed_predictor_eo.predict(X_test, sensitive_features=sensitive_features_test)

In [107]:
print("Adult Test Accuracy (Postprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("Adult Test Accuracy (Postprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

Adult Test Accuracy (Postprocessing, DP): 0.7846534653465347
Adult Test Accuracy (Postprocessing, EO): 0.8044554455445545


In [108]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)

In [109]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = -0.5903089186965219] = 0.5100671140939598
P[h(X) = 1 | A = 1.6940282762593677] = 0.49056603773584906
Delta_dp = 0.019501076358110714
P[h(X) = 1 | A = -0.5903089186965219, Y = 1] = 0.8224852071005917
P[h(X) = 1 | A = -0.5903089186965219, Y = 0] = 0.21705426356589147
P[h(X) = 1 | A = 1.6940282762593677, Y = 1] = 0.8461538461538461
P[h(X) = 1 | A = 1.6940282762593677, Y = 0] = 0.2125
Delta_eo1 = 0.023668639053254448
Delta_eo0 = 0.004554263565891475


# Lawschool

In [110]:
X_train = pd.read_csv('./../../data/processed/lawschool/lawschool_train1_X.csv')
X_test = pd.read_csv('./../../data/processed/lawschool/lawschool_test1_X.csv')
y_train = pd.read_csv('./../../data/processed/lawschool/lawschool_train1_y.csv')
y_train = y_train['bar1']
y_test = pd.read_csv('./../../data/processed/lawschool/lawschool_test1_y.csv')
y_test = y_test['bar1']

sensitive_features_train = X_train['race7']
sensitive_features_test = X_test['race7']

## Unfair

In [111]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [112]:
print("Lawschool Test Accuracy (Unfair): {}".format(accuracy_score(y_pred, y_test)))

Lawschool Test Accuracy (Unfair): 0.8136986301369863


In [113]:
groups, group_metrics, gaps = evaluate_fairness(y_test, y_pred, sensitive_features_test)

In [114]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics['dp'][group]))
print("Delta_dp = {}".format(gaps['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps['eo_y1']))
print("Delta_eo0 = {}".format(gaps['eo_y0']))

P[h(X) = 1 | A = 0] = 0.22727272727272727
P[h(X) = 1 | A = 1] = 0.6101083032490975
Delta_dp = 0.3828355759763702
P[h(X) = 1 | A = 0, Y = 1] = 0.6190476190476191
P[h(X) = 1 | A = 0, Y = 0] = 0.1044776119402985
P[h(X) = 1 | A = 1, Y = 1] = 0.8670886075949367
P[h(X) = 1 | A = 1, Y = 0] = 0.2689075630252101
Delta_eo1 = 0.2480409885473176
Delta_eo0 = 0.16442995108491157


## Preprocessing

## Inprocessing

In [119]:
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds

expgrad_dp = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=DemographicParity(),
    eps=0.05,
    nu=1e-6)

expgrad_eo = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=EqualizedOdds(),
    eps=0.05,
    nu=1e-6)

expgrad_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
expgrad_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)
y_pred_dp = expgrad_dp.predict(X_test)
y_pred_eo = expgrad_eo.predict(X_test)

In [120]:
print("Lawschool Test Accuracy (Inprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("Lawschool Test Accuracy (Inprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

Lawschool Test Accuracy (Inprocessing, DP): 0.7452054794520548
Lawschool Test Accuracy (Inprocessing, EO): 0.7753424657534247


In [121]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)

In [122]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = 0] = 0.5681818181818182
P[h(X) = 1 | A = 1] = 0.5342960288808665
Delta_dp = 0.03388578930095176
P[h(X) = 1 | A = 0, Y = 1] = 0.8095238095238095
P[h(X) = 1 | A = 0, Y = 0] = 0.29850746268656714
P[h(X) = 1 | A = 1, Y = 1] = 0.7848101265822784
P[h(X) = 1 | A = 1, Y = 0] = 0.20168067226890757
Delta_eo1 = 0.02471368294153109
Delta_eo0 = 0.09682679041765957


## Postprocessing

In [123]:
from fairlearn.postprocessing import ThresholdOptimizer
from sklearn import clone
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.exceptions import NotFittedError

class LogisticRegressionAsRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, logistic_regression_estimator):
        self.logistic_regression_estimator = logistic_regression_estimator

    def fit(self, X, y):
        try:
            check_is_fitted(self.logistic_regression_estimator)
            self.logistic_regression_estimator_ = self.logistic_regression_estimator
        except NotFittedError:
            self.logistic_regression_estimator_ = clone(
                self.logistic_regression_estimator
            ).fit(X, y)
        return self

    def predict(self, X):
        # use predict_proba to get real values instead of 0/1, select only prob for 1
        scores = self.logistic_regression_estimator_.predict_proba(X)[:, 1]
        return scores

estimator_wrapper = LogisticRegressionAsRegression(logreg).fit(X_train, y_train)
postprocessed_predictor_dp = ThresholdOptimizer(estimator=estimator_wrapper, constraints="demographic_parity", prefit=True)
postprocessed_predictor_eo = ThresholdOptimizer(estimator=estimator_wrapper, constraints="equalized_odds", prefit=True)

postprocessed_predictor_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
postprocessed_predictor_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)

y_pred_dp = postprocessed_predictor_dp.predict(X_test, sensitive_features=sensitive_features_test)
y_pred_eo = postprocessed_predictor_eo.predict(X_test, sensitive_features=sensitive_features_test)

In [124]:
print("Lawschool Test Accuracy (Postprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("Lawschool Test Accuracy (Postprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

Lawschool Test Accuracy (Postprocessing, DP): 0.7643835616438356
Lawschool Test Accuracy (Postprocessing, EO): 0.7315068493150685


In [125]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)

In [126]:
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = 0] = 0.5227272727272727
P[h(X) = 1 | A = 1] = 0.5451263537906137
Delta_dp = 0.022399081063340986
P[h(X) = 1 | A = 0, Y = 1] = 0.7619047619047619
P[h(X) = 1 | A = 0, Y = 0] = 0.2537313432835821
P[h(X) = 1 | A = 1, Y = 1] = 0.6708860759493671
P[h(X) = 1 | A = 1, Y = 0] = 0.20168067226890757
Delta_eo1 = 0.09101868595539475
Delta_eo0 = 0.05205067101467453


# Communities

In [127]:
X_train = pd.read_csv('./../../data/processed/communities/communities_train1_X.csv')
X_test = pd.read_csv('./../../data/processed/communities/communities_test1_X.csv')
y_train = pd.read_csv('./../../data/processed/communities/communities_train1_y.csv')
y_train = y_train['ViolentCrimesPerPop']
y_test = pd.read_csv('./../../data/processed/communities/communities_test1_y.csv')
y_test = y_test['ViolentCrimesPerPop']

sensitive_features_train = X_train['majority_white']
sensitive_features_test = X_test['majority_white']

## Unfair

In [128]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [131]:
print("Communities Test Accuracy (Unfair): {}".format(accuracy_score(y_pred, y_test)))

Communities Test Accuracy (Unfair): 0.8646616541353384


In [132]:
groups, group_metrics, gaps = evaluate_fairness(y_test, y_pred, sensitive_features_test)
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics['dp'][group]))
print("Delta_dp = {}".format(gaps['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps['eo_y1']))
print("Delta_eo0 = {}".format(gaps['eo_y0']))

P[h(X) = 1 | A = 0] = 0.6379310344827587
P[h(X) = 1 | A = 1] = 0.09187279151943463
Delta_dp = 0.5460582429633241
P[h(X) = 1 | A = 0, Y = 1] = 0.8026315789473685
P[h(X) = 1 | A = 0, Y = 0] = 0.325
P[h(X) = 1 | A = 1, Y = 1] = 0.5
P[h(X) = 1 | A = 1, Y = 0] = 0.024691358024691357
Delta_eo1 = 0.3026315789473685
Delta_eo0 = 0.30030864197530865


## Preprocessing

## Inprocessing

In [133]:
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds

expgrad_dp = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=DemographicParity(),
    eps=0.05,
    nu=1e-6)

expgrad_eo = ExponentiatedGradient(
    LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
    constraints=EqualizedOdds(),
    eps=0.05,
    nu=1e-6)

expgrad_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
expgrad_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)
y_pred_dp = expgrad_dp.predict(X_test)
y_pred_eo = expgrad_eo.predict(X_test)

In [134]:
print("Communities Test Accuracy (Inprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("Communities Test Accuracy (Inprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

Communities Test Accuracy (Inprocessing, DP): 0.7268170426065163
Communities Test Accuracy (Inprocessing, EO): 0.8120300751879699


In [135]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = 0] = 0.41379310344827586
P[h(X) = 1 | A = 1] = 0.34275618374558303
Delta_dp = 0.07103691970269282
P[h(X) = 1 | A = 0, Y = 1] = 0.75
P[h(X) = 1 | A = 0, Y = 0] = 0.275
P[h(X) = 1 | A = 1, Y = 1] = 0.8
P[h(X) = 1 | A = 1, Y = 0] = 0.1522633744855967
Delta_eo1 = 0.050000000000000044
Delta_eo0 = 0.12273662551440331


## Postprocessing

In [138]:
from fairlearn.postprocessing import ThresholdOptimizer
from sklearn import clone
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.exceptions import NotFittedError

class LogisticRegressionAsRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, logistic_regression_estimator):
        self.logistic_regression_estimator = logistic_regression_estimator

    def fit(self, X, y):
        try:
            check_is_fitted(self.logistic_regression_estimator)
            self.logistic_regression_estimator_ = self.logistic_regression_estimator
        except NotFittedError:
            self.logistic_regression_estimator_ = clone(
                self.logistic_regression_estimator
            ).fit(X, y)
        return self

    def predict(self, X):
        # use predict_proba to get real values instead of 0/1, select only prob for 1
        scores = self.logistic_regression_estimator_.predict_proba(X)[:, 1]
        return scores

estimator_wrapper = LogisticRegressionAsRegression(logreg).fit(X_train, y_train)
postprocessed_predictor_dp = ThresholdOptimizer(estimator=estimator_wrapper, constraints="demographic_parity", prefit=True)
postprocessed_predictor_eo = ThresholdOptimizer(estimator=estimator_wrapper, constraints="equalized_odds", prefit=True)

postprocessed_predictor_dp.fit(X_train, y_train, sensitive_features=sensitive_features_train)
postprocessed_predictor_eo.fit(X_train, y_train, sensitive_features=sensitive_features_train)

y_pred_dp = postprocessed_predictor_dp.predict(X_test, sensitive_features=sensitive_features_test)
y_pred_eo = postprocessed_predictor_eo.predict(X_test, sensitive_features=sensitive_features_test)

In [139]:
print("Communities Test Accuracy (Postprocessing, DP): {}".format(accuracy_score(y_pred_dp, y_test)))
print("Communities Test Accuracy (Postprocessing, EO): {}".format(accuracy_score(y_pred_eo, y_test)))

Communities Test Accuracy (Postprocessing, DP): 0.7593984962406015
Communities Test Accuracy (Postprocessing, EO): 0.8295739348370927


In [140]:
groups, group_metrics_dp, gaps_dp = evaluate_fairness(y_test, y_pred_dp, sensitive_features_test)
groups, group_metrics_eo, gaps_eo = evaluate_fairness(y_test, y_pred_eo, sensitive_features_test)
for group in groups:
    print("P[h(X) = 1 | A = {}] = {}".format(group, group_metrics_dp['dp'][group]))
print("Delta_dp = {}".format(gaps_dp['dp']))
for group in groups:
    print("P[h(X) = 1 | A = {}, Y = 1] = {}".format(group, group_metrics_eo['eo_y1'][group]))
    print("P[h(X) = 1 | A = {}, Y = 0] = {}".format(group, group_metrics_eo['eo_y0'][group]))
print("Delta_eo1 = {}".format(gaps_eo['eo_y1']))
print("Delta_eo0 = {}".format(gaps_eo['eo_y0']))

P[h(X) = 1 | A = 0] = 0.1724137931034483
P[h(X) = 1 | A = 1] = 0.2049469964664311
Delta_dp = 0.03253320336298282
P[h(X) = 1 | A = 0, Y = 1] = 0.6710526315789473
P[h(X) = 1 | A = 0, Y = 0] = 0.2
P[h(X) = 1 | A = 1, Y = 1] = 0.75
P[h(X) = 1 | A = 1, Y = 0] = 0.102880658436214
Delta_eo1 = 0.07894736842105265
Delta_eo0 = 0.09711934156378602
