In [10]:
from ber_metrics import BEREstimator
import numpy as np
import pickle
import pandas as pd
import sklearn
from collections import defaultdict
import pprint

In [13]:
def save_estimates(load_path, save_path, black_key, white_key, female_key, drop_cols=[]):
    X_train, y_train, X_test, y_test = pickle.load(open(load_path, 'rb'))
    xtrain, ytrain, xtest, ytest = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
    estimator = BEREstimator(xtrain, ytrain)
    estimates = defaultdict(dict)
    estimates['maha']['all_with_demographic'] = estimator.mahalanobis_bound()
    estimates['bhatt']['all_with_demographic'] = estimator.bhattacharyya_bound()
    estimates['nn']['all_with_demographic'] = estimator.nn_bound()
    
    # no protected features
    xtrain = np.array(X_train.drop(labels=drop_cols, axis=1))
    estimator = BEREstimator(xtrain, ytrain)
    estimates['maha']['all_no_demographic'] = estimator.mahalanobis_bound()
    estimates['bhatt']['all_no_demographic'] = estimator.bhattacharyya_bound()
    estimates['nn']['all_no_demographic'] = estimator.nn_bound()
    
    # black
    xtrain, ytrain = np.array(X_train[X_train[black_key]==1].drop(labels=drop_cols, axis=1)), np.array(y_train[X_train[black_key ]==1])
    estimator = BEREstimator(xtrain, ytrain)
    estimates['maha']['black'] = estimator.mahalanobis_bound()
    estimates['bhatt']['black'] = estimator.bhattacharyya_bound()
    estimates['nn']['black'] = estimator.nn_bound()
    
    # white
    xtrain, ytrain = np.array(X_train[X_train[white_key]==1].drop(labels=drop_cols, axis=1)), np.array(y_train[X_train[white_key]==1])
    estimator = BEREstimator(xtrain, ytrain)
    estimates['maha']['white'] = estimator.mahalanobis_bound()
    estimates['bhatt']['white'] = estimator.bhattacharyya_bound()
    estimates['nn']['white'] = estimator.nn_bound()
    
    # female
    xtrain, ytrain = np.array(X_train[X_train[female_key]==1].drop(labels=drop_cols, axis=1)), np.array(y_train[X_train[female_key]==1])
    estimator = BEREstimator(xtrain, ytrain)
    estimates['maha']['female'] = estimator.mahalanobis_bound()
    estimates['bhatt']['female'] = estimator.bhattacharyya_bound()
    estimates['nn']['female'] = estimator.nn_bound()
    
    # male
    xtrain, ytrain = np.array(X_train[X_train[female_key]==0].drop(labels=drop_cols, axis=1)), np.array(y_train[X_train[female_key]==0])
    estimator = BEREstimator(xtrain, ytrain)
    estimates['maha']['male'] = estimator.mahalanobis_bound()
    estimates['bhatt']['male'] = estimator.bhattacharyya_bound()
    estimates['nn']['male'] = estimator.nn_bound()
    
    pickle.dump(estimates, open(save_path, "wb"))
    pprint.pprint(estimates)

## Adult Income

In [68]:
X_train, y_train, X_test, y_test = pickle.load(open('Data/adult_income/processed_data.pkl', 'rb'))
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
adult_estimator = BEREstimator(X_train, y_train)
print("--- ALL INPUT FEATURES INCLUDING PROTECTED VARS ---")
print("Mahalanobis upper bound: {}".format(adult_estimator.mahalanobis_bound()))
print("Bhattacharrya upper bound: {}".format(adult_estimator.bhattacharyya_bound()))
print("Nearest Neighbor upper bound: {}".format(adult_estimator.nn_bound()))


--- ALL INPUT FEATURES INCLUDING PROTECTED VARS ---
Mahalanobis upper bound: 0.2383977285329773
Bhattacharrya upper bound: (0.00018925250028173357, 0.01375560554002971)
Nearest Neighbor upper bound: (0.11550679563529465, 0.20432995159472184)


In [8]:
X_train, y_train, X_test, y_test = pickle.load(open('Data/adult_income/processed_data.pkl', 'rb'))
X_train = X_train.drop(labels=['race_Other', 'race_Black', 'race_White', 'sex_Male', 'sex_Female'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
adult_estimator = BEREstimator(X_train, y_train)
print("--- ALL INPUT FEATURES EXCEPT DEMOGRAHPICS ---")
print("Mahalanobis upper bound: {}".format(adult_estimator.mahalanobis_bound()))
print("Bhattacharrya upper bound: {}".format(adult_estimator.bhattacharyya_bound()))
print("Nearest Neighbor upper bound: {}".format(adult_estimator.nn_bound()))


--- ALL INPUT FEATURES EXCEPT DEMOGRAHPICS ---
Mahalanobis upper bound: 0.23958132853260622
Bhattacharrya upper bound: (0.006264390495551109, 0.07889960650897043)
Nearest Neighbor upper bound: (0.11406516967703556, 0.2021086134871693)


In [46]:
# for the different groups
print("--- DECOUPLED, ADULT INCOME ---")
print("----Decoupled Race-----")
X_train, y_train, X_test, y_test = pickle.load(open('Data/adult_income/processed_data.pkl', 'rb'))
X_train, y_train = X_train[X_train['race_Black']==1], y_train[X_train['race_Black']==1]
X_test, y_test = X_test[X_test['race_Black']==1], y_test[X_test['race_Black']==1]
X_train = X_train.drop(labels=['race_Black', 'race_Other', 'race_White', 'sex_Male', 'sex_Female'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
adult_estimator = BEREstimator(X_train, y_train)
print(X_train.shape, y_train.shape)
print("BLACK")
print("\tMahalanobis upper bound: {}".format(adult_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(adult_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(adult_estimator.nn_bound()))

X_train, y_train, X_test, y_test = pickle.load(open('Data/adult_income/processed_data.pkl', 'rb'))
X_train, y_train = X_train[X_train['race_White']==1], y_train[X_train['race_White']==1]
X_test, y_test = X_test[X_test['race_White']==1], y_test[X_test['race_White']==1]
X_train = X_train.drop(labels=['race_Black', 'race_Other', 'race_White', 'sex_Male', 'sex_Female'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
adult_estimator = BEREstimator(X_train, y_train)
print("WHITE")
print("\tMahalanobis upper bound: {}".format(adult_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(adult_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(adult_estimator.nn_bound()))

print()
print("-----Decoupled Sex-------")
X_train, y_train, X_test, y_test = pickle.load(open('Data/adult_income/processed_data.pkl', 'rb'))
X_train, y_train = X_train[X_train['sex_Male']==0], y_train[X_train['sex_Male']==0]
X_test, y_test = X_test[X_test['sex_Male']==0], y_test[X_test['sex_Male']==0]
X_train = X_train.drop(labels=['race_Black', 'race_Other', 'race_White', 'sex_Male', 'sex_Female'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
adult_estimator = BEREstimator(X_train, y_train)
print("FEMALE")
print("\tMahalanobis upper bound: {}".format(adult_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(adult_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(adult_estimator.nn_bound()))

X_train, y_train, X_test, y_test = pickle.load(open('Data/adult_income/processed_data.pkl', 'rb'))
X_train, y_train = X_train[X_train['sex_Male']==1], y_train[X_train['sex_Male']==1]
X_test, y_test = X_test[X_test['sex_Male']==1], y_test[X_test['sex_Male']==1]
X_train = X_train.drop(labels=['race_Black', 'race_Other', 'race_White', 'sex_Male', 'sex_Female'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
adult_estimator = BEREstimator(X_train, y_train)
print("MALE")
print("\tMahalanobis upper bound: {}".format(adult_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(adult_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(adult_estimator.nn_bound()))


--- DECOUPLED, ADULT INCOME ---
----Decoupled Race-----
(2817, 42) (2817,)
BLACK
	Mahalanobis upper bound: 0.14569061587948332
	Bhattacharrya upper bound: (0.0, 0.0)
	Nearest Neighbor upper bound: (0.06737175410286744, 0.12566560170394037)
WHITE
	Mahalanobis upper bound: 0.24947258980904832
	Bhattacharrya upper bound: (0.0, 1.8233875173788025e-11)
	Nearest Neighbor upper bound: (0.12015098579878503, 0.21142945282073033)

-----Decoupled Sex-------
FEMALE
	Mahalanobis upper bound: 0.12883439717256787
	Bhattacharrya upper bound: (0.0, 0.0)
	Nearest Neighbor upper bound: (0.061022917265815824, 0.11459824166837047)
MALE
	Mahalanobis upper bound: 0.28570593841859737
	Bhattacharrya upper bound: (0.0, 0.0)
	Nearest Neighbor upper bound: (0.1443709356600102, 0.2470559371933268)


## COMPAS Recidivism

In [71]:
X_train, y_train, X_test, y_test = pickle.load(open('Data/compas/processed_violent_nonsingular.pkl', 'rb'))
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
violent_estimator = BEREstimator(X_train, y_train)
print("--- ALL INPUT FEATURES INCLUDING DEMOGRAPHICS: VIOLENT CRIME ---")
print("Mahalanobis upper bound: {}".format(violent_estimator.mahalanobis_bound()))
print("Bhattacharrya upper bound: {}".format(violent_estimator.bhattacharyya_bound()))
print("Nearest Neighbor upper bound: {}".format(violent_estimator.nn_bound()))

--- ALL INPUT FEATURES INCLUDING DEMOGRAPHICS: VIOLENT CRIME ---
Mahalanobis upper bound: 0.19586095536150863
Bhattacharrya upper bound: (0.00010508635518702292, 0.010250624958750676)
Nearest Neighbor upper bound: (0.0823570486352398, 0.15114873035066506)


In [70]:
X_train, y_train, X_test, y_test = pickle.load(open('Data/compas/processed_violent_nonsingular.pkl', 'rb'))
X_train = X_train.drop(columns=['race_is_causasian', 'race_is_african_american', 'race_is_hispanic', 'female'])
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
violent_estimator = BEREstimator(X_train, y_train)
print("--- ALL INPUT FEATURES EXCEPT DEMOGRAPHICS: VIOLENT CRIME ---")
print("Mahalanobis upper bound: {}".format(violent_estimator.mahalanobis_bound()))
print("Bhattacharrya upper bound: {}".format(violent_estimator.bhattacharyya_bound()))
print("Nearest Neighbor upper bound: {}".format(violent_estimator.nn_bound()))

--- ALL INPUT FEATURES EXCEPT DEMOGRAPHICS: VIOLENT CRIME ---
Mahalanobis upper bound: 0.19647007198919228
Bhattacharrya upper bound: (0.0, 9.418429699515122e-10)
Nearest Neighbor upper bound: (0.06533241098704945, 0.12212817412333736)


In [43]:
# for the different groups
print("----Decoupled Race-----")
X_train, y_train, X_test, y_test = pickle.load(open('Data/compas/processed_violent_nonsingular.pkl', 'rb'))
X_train, y_train = X_train[X_train['race_is_african_american']==1], y_train[X_train['race_is_african_american']==1]
X_test, y_test = X_test[X_test['race_is_african_american']==1], y_test[X_test['race_is_african_american']==1]
X_train = X_train.drop(labels=['race_is_causasian', 'race_is_african_american', 'race_is_hispanic', 'female'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
compas_estimator = BEREstimator(X_train, y_train)
print()
print("BLACK")
print("\tMahalanobis upper bound: {}".format(compas_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(compas_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(compas_estimator.nn_bound()))


X_train, y_train, X_test, y_test = pickle.load(open('Data/compas/processed_violent_nonsingular.pkl', 'rb'))
X_train, y_train = X_train[X_train['race_is_causasian']==1], y_train[X_train['race_is_causasian']==1]
X_test, y_test = X_test[X_test['race_is_causasian']==1], y_test[X_test['race_is_causasian']==1]
X_train = X_train.drop(labels=['race_is_causasian', 'race_is_african_american', 'race_is_hispanic', 'female'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
compas_estimator = BEREstimator(X_train, y_train)
print()
print("WHITE")
print("\tMahalanobis upper bound: {}".format(compas_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(compas_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(compas_estimator.nn_bound()))

print()
print("-----Decoupled Sex-------")
X_train, y_train, X_test, y_test = pickle.load(open('Data/compas/processed_violent_nonsingular.pkl', 'rb'))
X_train, y_train = X_train[X_train['female']==1], y_train[X_train['female']==1]
X_test, y_test = X_test[X_test['female']==1], y_test[X_test['female']==1]
X_train = X_train.drop(labels=['race_is_causasian', 'race_is_african_american', 'race_is_hispanic', 'female'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
compas_estimator = BEREstimator(X_train, y_train)
print()
print("FEMALE")
print("\tMahalanobis upper bound: {}".format(compas_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(compas_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(compas_estimator.nn_bound()))


X_train, y_train, X_test, y_test = pickle.load(open('Data/compas/processed_violent_nonsingular.pkl', 'rb'))
X_train, y_train = X_train[X_train['female']==0], y_train[X_train['female']==0]
X_test, y_test = X_test[X_test['female']==0], y_test[X_test['female']==0]
X_train = X_train.drop(labels=['race_is_causasian', 'race_is_african_american', 'race_is_hispanic', 'female'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
compas_estimator = BEREstimator(X_train, y_train)
print()
print("MALE")
print("\tMahalanobis upper bound: {}".format(compas_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(compas_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(compas_estimator.nn_bound()))



----Decoupled Race-----

BLACK
	Mahalanobis upper bound: 0.23171405034927542
	Bhattacharrya upper bound: (0.020318962310274413, 0.14108898639124204)
	Nearest Neighbor upper bound: (0.08154451491172132, 0.14979001399906672)

WHITE
	Mahalanobis upper bound: 0.15323894932606172
	Bhattacharrya upper bound: (0.0, 0.0)
	Nearest Neighbor upper bound: (0.11003473331478347, 0.1958541815582559)

-----Decoupled Sex-------

FEMALE
	Mahalanobis upper bound: 0.13330424601679006
	Bhattacharrya upper bound: (nan, nan)
	Nearest Neighbor upper bound: (0.06909816364635402, 0.1286472148541114)

MALE
	Mahalanobis upper bound: 0.20918503644901543
	Bhattacharrya upper bound: (nan, nan)
	Nearest Neighbor upper bound: (0.13955973551084727, 0.2401656314699793)


In [72]:
X_train, y_train, X_test, y_test = pickle.load(open('Data/compas/processed_arrest_nonsingular.pkl', 'rb'))
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
violent_estimator = BEREstimator(X_train, y_train)
print("--- ALL INPUT FEATURES INCLUDING DEMOGRAPHICS: RE-ARREST ---")
print("Mahalanobis upper bound: {}".format(violent_estimator.mahalanobis_bound()))
print("Bhattacharrya upper bound: {}".format(violent_estimator.bhattacharyya_bound()))
print("Nearest Neighbor upper bound: {}".format(violent_estimator.nn_bound()))

--- ALL INPUT FEATURES INCLUDING DEMOGRAPHICS: RE-ARREST ---
Mahalanobis upper bound: 0.4223692451173532
Bhattacharrya upper bound: (0.0, 0.0)
Nearest Neighbor upper bound: (0.3235442308898998, 0.437726723095526)


In [73]:
X_train, y_train, X_test, y_test = pickle.load(open('Data/compas/processed_arrest_nonsingular.pkl', 'rb'))
X_train = X_train.drop(columns=['race_is_causasian', 'race_is_african_american', 'race_is_hispanic', 'female'])
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
violent_estimator = BEREstimator(X_train, y_train)
print("--- ALL INPUT FEATURES EXCLUDING DEMOGRAPHICS: RE-ARREST ---")
print("Mahalanobis upper bound: {}".format(violent_estimator.mahalanobis_bound()))
print("Bhattacharrya upper bound: {}".format(violent_estimator.bhattacharyya_bound()))
print("Nearest Neighbor upper bound: {}".format(violent_estimator.nn_bound()))

--- ALL INPUT FEATURES EXCLUDING DEMOGRAPHICS: RE-ARREST ---
Mahalanobis upper bound: 0.42518241798423584
Bhattacharrya upper bound: (nan, nan)
Nearest Neighbor upper bound: (0.28214522881031767, 0.40507859733978235)


In [44]:
# for the different groups
print("----Decoupled Race-----")
X_train, y_train, X_test, y_test = pickle.load(open('Data/compas/processed_arrest_nonsingular.pkl', 'rb'))
X_train, y_train = X_train[X_train['race_is_african_american']==1], y_train[X_train['race_is_african_american']==1]
X_test, y_test = X_test[X_test['race_is_african_american']==1], y_test[X_test['race_is_african_american']==1]
X_train = X_train.drop(labels=['race_is_causasian', 'race_is_african_american', 'race_is_hispanic', 'female'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
compas_estimator = BEREstimator(X_train, y_train)
print()
print("BLACK")
print("\tMahalanobis upper bound: {}".format(compas_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(compas_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(compas_estimator.nn_bound()))


X_train, y_train, X_test, y_test = pickle.load(open('Data/compas/processed_arrest_nonsingular.pkl', 'rb'))
X_train, y_train = X_train[X_train['race_is_causasian']==1], y_train[X_train['race_is_causasian']==1]
X_test, y_test = X_test[X_test['race_is_causasian']==1], y_test[X_test['race_is_causasian']==1]
X_train = X_train.drop(labels=['race_is_causasian', 'race_is_african_american', 'race_is_hispanic', 'female'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
compas_estimator = BEREstimator(X_train, y_train)
print()
print("WHITE")
print("\tMahalanobis upper bound: {}".format(compas_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(compas_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(compas_estimator.nn_bound()))

print("-----Decoupled Sex-------")
X_train, y_train, X_test, y_test = pickle.load(open('Data/compas/processed_arrest_nonsingular.pkl', 'rb'))
X_train, y_train = X_train[X_train['female']==1], y_train[X_train['female']==1]
X_test, y_test = X_test[X_test['female']==1], y_test[X_test['female']==1]
X_train = X_train.drop(labels=['race_is_causasian', 'race_is_african_american', 'race_is_hispanic', 'female'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
compas_estimator = BEREstimator(X_train, y_train)
print()
print("FEMALE")
print("\tMahalanobis upper bound: {}".format(compas_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(compas_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(compas_estimator.nn_bound()))


X_train, y_train, X_test, y_test = pickle.load(open('Data/compas/processed_arrest_nonsingular.pkl', 'rb'))
X_train, y_train = X_train[X_train['female']==0], y_train[X_train['female']==0]
X_test, y_test = X_test[X_test['female']==0], y_test[X_test['female']==0]
X_train = X_train.drop(labels=['race_is_causasian', 'race_is_african_american', 'race_is_hispanic', 'female'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
compas_estimator = BEREstimator(X_train, y_train)
print()
print("MALE")
print("\tMahalanobis upper bound: {}".format(compas_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(compas_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(compas_estimator.nn_bound()))



----Decoupled Race-----

BLACK
	Mahalanobis upper bound: 0.4286840162649842
	Bhattacharrya upper bound: (0.0018098295397597775, 0.04250357698825829)
	Nearest Neighbor upper bound: (0.4008563430739571, 0.48034107058266223)

WHITE
	Mahalanobis upper bound: 0.4233892364072617
	Bhattacharrya upper bound: (0.0, 0.0)
	Nearest Neighbor upper bound: (0.2877020430262899, 0.4098591549295775)
-----Decoupled Sex-------

FEMALE
	Mahalanobis upper bound: 0.3878514393076384
	Bhattacharrya upper bound: (0.0, 0.0)
	Nearest Neighbor upper bound: (0.25340151904196406, 0.3783783783783784)

MALE
	Mahalanobis upper bound: 0.42946490529994935
	Bhattacharrya upper bound: (0.0, 0.0)
	Nearest Neighbor upper bound: (0.34468901876110336, 0.4517569982132222)


# MIMIC-III

In [19]:
path = "../mimic_compressed.pkl"
X_train, X_test, y_train, y_test = pickle.load(open(path, 'rb'))
print(X_train.columns)

Index([                 0,                  1,                  2,
                        3,                  4,                  5,
                        6,                  7,                  8,
                        9,                 10,                 11,
                       12,                 13,                 14,
                       15,                 16,                 17,
                       18,                 19,                 20,
                       21,                 22,                 23,
                       24,                 25,                 26,
                       27,                 28,                 29,
                       30,                 31,                 32,
                       33,                 34,                 35,
                       36,                 37,                 38,
                       39,         'IS_SEX_M',       'SUBJECT_ID',
          'IS_RACE_BLACK',    'IS_RACE_WHITE', 'IS_RACE_HISPAN

In [5]:
X_train, X_test, y_train, y_test = pickle.load(open(path, 'rb'))
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
mimic_estimator = BEREstimator(X_train, y_train)
print("--- ALL INPUT FEATURES (MIMIC-III) ---")
print("Mahalanobis upper bound: {}".format(mimic_estimator.mahalanobis_bound()))
print("Bhattacharrya upper bound: {}".format(mimic_estimator.bhattacharyya_bound()))
print("Nearest Neighbor upper bound: {}".format(mimic_estimator.nn_bound()))


--- ALL INPUT FEATURES (MIMIC-III) ---
Mahalanobis upper bound: 3.2936672086581615e-06
Bhattacharrya upper bound: (5.6900736372267335e-05, 0.007543043064869066)
Nearest Neighbor upper bound: (0.02357718593308067, 0.04604260447311523)


In [4]:
X_train, X_test, y_train, y_test = pickle.load(open(path, 'rb'))
X_train = X_train.drop(columns=['IS_SEX_M', 'IS_RACE_BLACK', 'IS_RACE_WHITE', 'IS_RACE_HISPANIC', 'IS_RACE_ASIAN', 'IS_SEX_F'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
mimic_estimator = BEREstimator(X_train, y_train)
print("--- ALL INPUT FEATURES (MIMIC-III) ---")
print("Mahalanobis upper bound: {}".format(mimic_estimator.mahalanobis_bound()))
print("Bhattacharrya upper bound: {}".format(mimic_estimator.bhattacharyya_bound()))
print("Nearest Neighbor upper bound: {}".format(mimic_estimator.nn_bound()))


--- ALL INPUT FEATURES (MIMIC-III) ---
Mahalanobis upper bound: 3.2998323778607902e-06
Bhattacharrya upper bound: (6.879598411496657e-05, 0.008294049145473577)
Nearest Neighbor upper bound: (0.014275311838182647, 0.02814305462021054)


In [6]:
# for the different groups
print("----Decoupled Race-----")
X_train, X_test, y_train, y_test = pickle.load(open(path, 'rb'))
X_train, y_train = X_train[X_train['IS_RACE_BLACK']==1], y_train[X_train['IS_RACE_BLACK']==1]
X_test, y_test = X_test[X_test['IS_RACE_BLACK']==1], y_test[X_test['IS_RACE_BLACK']==1]
X_train = X_train.drop(labels=['IS_SEX_M', 'IS_RACE_BLACK', 'IS_RACE_WHITE', 'IS_RACE_HISPANIC', 'IS_RACE_ASIAN', 'IS_SEX_F'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
mimic_estimator = BEREstimator(X_train, y_train)
print()
print("BLACK")
print("\tMahalanobis upper bound: {}".format(mimic_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(mimic_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(mimic_estimator.nn_bound()))


X_train, X_test, y_train, y_test = pickle.load(open(path, 'rb'))
X_train, y_train = X_train[X_train['IS_RACE_WHITE']==1], y_train[X_train['IS_RACE_WHITE']==1]
X_test, y_test = X_test[X_test['IS_RACE_WHITE']==1], y_test[X_test['IS_RACE_WHITE']==1]
X_train = X_train.drop(labels=['IS_SEX_M', 'IS_RACE_BLACK', 'IS_RACE_WHITE', 'IS_RACE_HISPANIC', 'IS_RACE_ASIAN', 'IS_SEX_F'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
mimic_estimator = BEREstimator(X_train, y_train)
print()
print("WHITE")
print("\tMahalanobis upper bound: {}".format(mimic_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(mimic_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(mimic_estimator.nn_bound()))

print("-----Decoupled Sex-------")
X_train, X_test, y_train, y_test = pickle.load(open(path, 'rb'))
X_train, y_train = X_train[X_train['IS_SEX_F']==1], y_train[X_train['IS_SEX_F']==1]
X_test, y_test = X_test[X_test['IS_SEX_F']==1], y_test[X_test['IS_SEX_F']==1]
X_train = X_train.drop(labels=['IS_SEX_M', 'IS_RACE_BLACK', 'IS_RACE_WHITE', 'IS_RACE_HISPANIC', 'IS_RACE_ASIAN', 'IS_SEX_F'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
mimic_estimator = BEREstimator(X_train, y_train)
print()
print("FEMALE")
print("\tMahalanobis upper bound: {}".format(mimic_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(mimic_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(mimic_estimator.nn_bound()))


X_train, X_test, y_train, y_test = pickle.load(open(path, 'rb'))
X_train, y_train = X_train[X_train['IS_SEX_M']==1], y_train[X_train['IS_SEX_M']==1]
X_test, y_test = X_test[X_test['IS_SEX_M']==1], y_test[X_test['IS_SEX_M']==1]
X_train = X_train.drop(labels=['IS_SEX_M', 'IS_RACE_BLACK', 'IS_RACE_WHITE', 'IS_RACE_HISPANIC', 'IS_RACE_ASIAN', 'IS_SEX_F'], axis=1)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
mimic_estimator = BEREstimator(X_train, y_train)
print()
print("MALE")
print("\tMahalanobis upper bound: {}".format(mimic_estimator.mahalanobis_bound()))
print("\tBhattacharrya upper bound: {}".format(mimic_estimator.bhattacharyya_bound()))
print("\tNearest Neighbor upper bound: {}".format(mimic_estimator.nn_bound()))



----Decoupled Race-----

BLACK
	Mahalanobis upper bound: 3.15407309387005e-06
	Bhattacharrya upper bound: (3.046635292675015e-05, 0.005519549322914704)
	Nearest Neighbor upper bound: (0.03252170221056294, 0.06292808219178082)

WHITE
	Mahalanobis upper bound: 3.440601729633301e-06
	Bhattacharrya upper bound: (nan, 4.81008361793429)


  lower_bound = 0.5 * (1 - np.sqrt(1 - 4 * p_0 * p_1 * np.exp(-2 * b_dist)))


	Nearest Neighbor upper bound: (0.014386493626438512, 0.02835904485514994)
-----Decoupled Sex-------

FEMALE
	Mahalanobis upper bound: 3.373440743699323e-06
	Bhattacharrya upper bound: (9.697311294515343e-05, 0.009847015241205678)
	Nearest Neighbor upper bound: (0.018117362749018096, 0.03557824783207715)

MALE
	Mahalanobis upper bound: 3.081586015114737e-06
	Bhattacharrya upper bound: (2.8027514198392112e-08, 0.00016741419718538654)
	Nearest Neighbor upper bound: (0.016200466973422645, 0.03187602368653143)


# Save estimates

In [14]:
save_estimates('Data/adult_income/processed_data.pkl', 'ber_bounds/adult_bounds.pkl', 'race_Black', 'race_White', 'sex_Female', drop_cols=['race_Other', 'race_Black', 'race_White', 'sex_Male', 'sex_Female'])

defaultdict(<class 'dict'>,
            {'bhatt': {'all_no_demographic': (0.006264390495551109,
                                              0.07889960650897043),
                       'all_with_demographic': (0.00018925250028173357,
                                                0.01375560554002971),
                       'black': (0.0, 0.0),
                       'female': (0.0, 0.0),
                       'male': (0.0, 0.0),
                       'white': (0.0, 1.8233875173788025e-11)},
             'maha': {'all_no_demographic': 0.23958132853260622,
                      'all_with_demographic': 0.2383977285329773,
                      'black': 0.14569061587948332,
                      'female': 0.12883439717256784,
                      'male': 0.28570593841859737,
                      'white': 0.24947258980904852},
             'nn': {'all_no_demographic': (0.11406516967703556,
                                           0.2021086134871693),
                    'all_with_

In [15]:
save_estimates('Data/compas/processed_violent_nonsingular.pkl', 'ber_bounds/compas_violent_bounds.pkl', 'race_is_african_american', 'race_is_causasian', 'female', drop_cols=['race_is_causasian', 'race_is_african_american', 'race_is_hispanic', 'female'])

defaultdict(<class 'dict'>,
            {'bhatt': {'all_no_demographic': (0.0, 9.418429699515122e-10),
                       'all_with_demographic': (0.00010508635518702292,
                                                0.010250624958750676),
                       'black': (0.020318962310274413, 0.14108898639124204),
                       'female': (nan, nan),
                       'male': (nan, nan),
                       'white': (0.0, 0.0)},
             'maha': {'all_no_demographic': 0.19647007198919228,
                      'all_with_demographic': 0.19586095536150863,
                      'black': 0.23171405034927542,
                      'female': 0.13330424601679006,
                      'male': 0.20918503644901543,
                      'white': 0.15323894932606172},
             'nn': {'all_no_demographic': (0.06533241098704945,
                                           0.12212817412333736),
                    'all_with_demographic': (0.0823570486352398,
         

  b_dist = first_term + second_term + third_term
  b_dist = first_term + second_term + third_term


In [16]:
save_estimates('Data/compas/processed_arrest_nonsingular.pkl', 'ber_bounds/compas_arrest_bounds.pkl', 'race_is_african_american', 'race_is_causasian', 'female', drop_cols=['race_is_causasian', 'race_is_african_american', 'race_is_hispanic', 'female'])

defaultdict(<class 'dict'>,
            {'bhatt': {'all_no_demographic': (nan, nan),
                       'all_with_demographic': (0.0, 0.0),
                       'black': (0.0018098295397597775, 0.04250357698825829),
                       'female': (0.0, 0.0),
                       'male': (0.0, 0.0),
                       'white': (0.0, 0.0)},
             'maha': {'all_no_demographic': 0.42518241798423584,
                      'all_with_demographic': 0.4223692451173532,
                      'black': 0.4286840162649842,
                      'female': 0.3878514393076384,
                      'male': 0.42946490529994935,
                      'white': 0.4233892364072617},
             'nn': {'all_no_demographic': (0.28214522881031767,
                                           0.40507859733978235),
                    'all_with_demographic': (0.3235442308898998,
                                             0.437726723095526),
                    'black': (0.4008563430739571,

In [18]:
save_estimates("../mimic_compressed.pkl", 'ber_bounds/mimic_bounds.pkl', 'IS_RACE_BLACK', 'IS_RACE_WHITE', 'IS_SEX_F', drop_cols=['IS_SEX_M', 'IS_RACE_BLACK', 'IS_RACE_WHITE', 'IS_RACE_HISPANIC', 'IS_RACE_ASIAN', 'IS_SEX_F'])

  lower_bound = 0.5 * (1 - np.sqrt(1 - 4 * p_0 * p_1 * np.exp(-2 * b_dist)))


defaultdict(<class 'dict'>,
            {'bhatt': {'all_no_demographic': (6.879598411496657e-05,
                                              0.008294049145473577),
                       'all_with_demographic': (5.6900736372267335e-05,
                                                0.007543043064869066),
                       'black': (3.046635292675015e-05, 0.005519549322914704),
                       'female': (9.697311294515343e-05, 0.009847015241205678),
                       'male': (2.8027514198392112e-08, 0.00016741419718538654),
                       'white': (nan, 4.81008361793429)},
             'maha': {'all_no_demographic': 3.2998323778607902e-06,
                      'all_with_demographic': 3.2936672086581615e-06,
                      'black': 3.15407309387005e-06,
                      'female': 3.373440743699323e-06,
                      'male': 3.081586015114737e-06,
                      'white': 3.440601729633301e-06},
             'nn': {'all_no_demographic