In [1]:
import numpy as np
import pandas as pd

In [2]:
# Set up 4 groups - white low-risk (wl), white high-risk (wh), black low-risk (bl), black high-risk (bh) - with a 
# crime/recidivism predictor value that is correlated with crime propensity both directly and via correlation with race   
# (independent of propensity):

wl = np.random.normal(3.2,1,2000)
wh = np.random.normal(5.7,1,1000)
bl = np.random.normal(3.9,1,1000)
bh = np.random.normal(7.3,1,2000)

In [3]:
# Set up crime/recidivism outcomes: those in the low-propensity category commit a crime 25% of the time, while those 
# in the high-propensity category commit a crime 75% of the time (in whatever time frame is at issue)

wl_outcomes = np.random.uniform(0,1,2000)
wl_outcomes = np.where(wl_outcomes>0.25,0,1)

wh_outcomes = np.random.uniform(0,1,1000)
wh_outcomes = np.where(wh_outcomes>0.75,0,1)

bl_outcomes = np.random.uniform(0,1,1000)
bl_outcomes = np.where(bl_outcomes>0.25,0,1)

bh_outcomes = np.random.uniform(0,1,2000)
bh_outcomes = np.where(bh_outcomes>0.75,0,1)

In [4]:
# Make a dataframe:

df_wl = pd.DataFrame({'Race':'W','Propensity':0,'Predictor':wl,'Outcome':wl_outcomes})
df_wh = pd.DataFrame({'Race':'W','Propensity':1,'Predictor':wh,'Outcome':wh_outcomes})
df_bl = pd.DataFrame({'Race':'B','Propensity':0,'Predictor':bl,'Outcome':bl_outcomes})
df_bh = pd.DataFrame({'Race':'B','Propensity':1,'Predictor':bh,'Outcome':bh_outcomes})
df = pd.concat([df_wl,df_wh,df_bl,df_bh])

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [6]:
# Predict outcomes using a logistic regression model:

X = df[['Predictor']]
y = df['Outcome']
clf = LogisticRegression().fit(X,y)
preds = clf.predict(X)
df['Predictions'] = preds

In [7]:
# Compare predictions to outcomes:

confusion_matrix(y,preds)

array([[2192,  802],
       [ 908, 2098]])

In [8]:
accuracy_score(y,preds)

0.715

In [9]:
# Compare predictions to actual propensities:

confusion_matrix(df['Propensity'],preds)

array([[2815,  185],
       [ 285, 2715]])

In [10]:
accuracy_score(df['Propensity'],preds)

0.9216666666666666

In [11]:
# Note that the model predicts propensities very well, but the accuracy is substantally lower when actual outcomes
# are used because of the additional randomness/uncertainty.

In [12]:
# Break down results by race:

df_white = df[df['Race']=='W']
df_black = df[df['Race']=='B']

In [13]:
# Compare predictions to actual outcomes for whites only:

cmw1 = confusion_matrix(df_white['Outcome'],df_white['Predictions'])
cmw1

array([[1510,  232],
       [ 687,  571]])

In [14]:
# Compare predictions to actual outcomes for blacks only:

cmb1 = confusion_matrix(df_black['Outcome'],df_black['Predictions'])
cmb1

array([[ 682,  570],
       [ 221, 1527]])

In [15]:
# To bring out the differences in the confusion matrices, functions are defined for 'positive predictive value' and
# 'false positive rate':

def ppv(cm):
    return cm[1,1]/(cm[1,1]+cm[0,1])

def fp_rate(cm):
    return cm[0,1]/(cm[0,1]+cm[0,0])

In [16]:
# Positive predictive value is nearly the same for both groups:

ppv(cmw1)

0.7110834371108343

In [17]:
ppv(cmb1)

0.7281831187410587

In [18]:
# False positive rate is substantially higher for blacks:

fp_rate(cmw1)

0.13318025258323765

In [19]:
fp_rate(cmb1)

0.45527156549520764

In [20]:
# Repeating the process but now for predictions compared to actual propensities:

cmw2 = confusion_matrix(df_white['Propensity'],df_white['Predictions'])
cmw2

array([[1941,   59],
       [ 256,  744]])

In [21]:
cmb2 = confusion_matrix(df_black['Propensity'],df_black['Predictions'])
cmb2

array([[ 874,  126],
       [  29, 1971]])

In [22]:
# As expected, accuracy is higher - and still roughly equal - for both groups:

ppv(cmw2)

0.9265255292652553

In [23]:
ppv(cmb2)

0.9399141630901288

In [24]:
# The false positive rate is still proportionally much higher for blacks, but the absolute rates - and 
# absolute difference - are now much lower:

fp_rate(cmw2)

0.0295

In [25]:
fp_rate(cmb2)

0.126

In [26]:
# Testing the impact of a higher threshold for crime/recidivism prediction:

pos_probs = clf.predict_proba(X)[:,1]
adj_preds = np.where(pos_probs>0.6,1,0)
df['Adjusted Predictions'] = adj_preds

# Compare adjusted predictions (with higher threshold) to actual propensities:

confusion_matrix(df['Propensity'],adj_preds)

array([[2981,   19],
       [ 770, 2230]])

In [27]:
# Overall accuracy declines slightly:

accuracy_score(df['Propensity'],adj_preds)

0.8685

In [28]:
# Breaking down results by race:

df_white = df[df['Race']=='W']
df_black = df[df['Race']=='B']

In [29]:
# Comparing adjusted predictions to actual propensities for whites only:

cmw3 = confusion_matrix(df_white['Propensity'],df_white['Adjusted Predictions'])
cmw3

array([[1994,    6],
       [ 603,  397]])

In [30]:
# Comparing adjusted predictions to actual propensities for blacks only:

cmb3 = confusion_matrix(df_black['Propensity'],df_black['Adjusted Predictions'])
cmb3

array([[ 987,   13],
       [ 167, 1833]])

In [31]:
# The positive predictive value has increased in both groups:

ppv(cmw3)

0.9851116625310173

In [32]:
ppv(cmb3)

0.9929577464788732

In [33]:
# The false positive rate has declined dramatically in both groups, and the absolute difference is now very small. 
# (The errors have been shifted to false negatives)

fp_rate(cmw3)

0.003

In [34]:
fp_rate(cmb3)

0.013

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [85]:
# A slightly different approach: start with a perfectly fair predictor (i.e. with no independent association with race).
# Specifically, 70% of high-propensity individuals have this characteristic vs. only 30% of low-propensity individuals 

wl2 = np.random.uniform(0,1,2000)
wl2 = np.where(wl2>0.3,0,1)

wh2 = np.random.uniform(0,1,1000)
wh2 = np.where(wh2>0.3,1,0)

bl2 = np.random.uniform(0,1,1000)
bl2 = np.where(bl2>0.3,0,1)

bh2 = np.random.uniform(0,1,2000)
bh2 = np.where(bh2>0.3,1,0)

In [86]:
# Make a dataframe (with no 'Outcome' column this time, as focus will be exclusively on predicting propensity):

df_wl2 = pd.DataFrame({'Race':'W','Propensity':0,'Predictor':wl2})
df_wh2 = pd.DataFrame({'Race':'W','Propensity':1,'Predictor':wh2})
df_bl2 = pd.DataFrame({'Race':'B','Propensity':0,'Predictor':bl2})
df_bh2 = pd.DataFrame({'Race':'B','Propensity':1,'Predictor':bh2})
df2 = pd.concat([df_wl2,df_wh2,df_bl2,df_bh2])

In [87]:
# Predict propensities using a logistic regression model:

X2 = df2[['Predictor']]
y2 = df2['Propensity']
clf2 = LogisticRegression().fit(X2,y2)
preds2 = clf2.predict(X2)
df2['Predictions'] = preds2

In [88]:
# Compare predictions to propensities:

confusion_matrix(y2,preds2)

array([[2096,  904],
       [ 904, 2096]])

In [89]:
accuracy_score(y2,preds2)

0.6986666666666667

In [90]:
ppv(confusion_matrix(y2,preds2))

0.6986666666666667

In [91]:
fp_rate(confusion_matrix(y2,preds2))

0.30133333333333334

In [92]:
# Break down results by race:

df2_white = df2[df2['Race']=='W']
df2_black = df2[df2['Race']=='B']

In [93]:
# Accuracy scores are essentially equal:

accuracy_score(df2_white['Propensity'],df2_white['Predictions'])

0.6986666666666667

In [94]:
accuracy_score(df2_black['Propensity'],df2_black['Predictions'])

0.6986666666666667

In [95]:
# Positive predictive value is NOT equal (due to base rate differences):

ppv(confusion_matrix(df2_white['Propensity'],df2_white['Predictions']))

0.536697247706422

In [96]:
ppv(confusion_matrix(df2_black['Propensity'],df2_black['Predictions']))

0.8238770685579196

In [97]:
# False positive rates are essentially equal (reflecting the fairness of the predictor):

fp_rate(confusion_matrix(df2_white['Propensity'],df2_white['Predictions']))

0.303

In [98]:
fp_rate(confusion_matrix(df2_black['Propensity'],df2_black['Predictions']))

0.298

In [99]:
# Now, add a predictor that is correlated WITH RACE ONLY (not propensity):

df_wl2['Race Predictor'] = np.random.normal(2,1,2000)
df_wh2['Race Predictor'] = np.random.normal(2,1,1000)
df_bl2['Race Predictor'] = np.random.normal(5,1,1000)
df_bh2['Race Predictor'] = np.random.normal(5,1,2000)
df3 = pd.concat([df_wl2,df_wh2,df_bl2,df_bh2])

In [100]:
# Predict propensities using a logistic regression model with this new predictor included:

X3 = df3[['Predictor','Race Predictor']]
y3 = df3['Propensity']
clf3 = LogisticRegression().fit(X3,y3)
preds3 = clf3.predict(X3)
df3['Predictions'] = preds3

In [101]:
# Compare predictions to propensities:

confusion_matrix(y3,preds3)

array([[2089,  911],
       [ 910, 2090]])

In [102]:
accuracy_score(y3,preds3)

0.6965

In [103]:
ppv(confusion_matrix(y3,preds3))

0.696434521826058

In [105]:
fp_rate(confusion_matrix(y3,preds3))

0.30366666666666664

In [106]:
# Break down results by race:

df3_white = df3[df3['Race']=='W']
df3_black = df3[df3['Race']=='B']

In [107]:
accuracy_score(df3_white['Propensity'],df3_white['Predictions'])

0.6946666666666667

In [108]:
accuracy_score(df3_black['Propensity'],df3_black['Predictions'])

0.6983333333333334

In [109]:
# Positive predictive values are now a bit closer together:

ppv(confusion_matrix(df3_white['Propensity'],df3_white['Predictions']))

0.5375

In [110]:
ppv(confusion_matrix(df3_black['Propensity'],df3_black['Predictions']))

0.7910685805422647

In [111]:
# False positive rates are starting to diverge (increasing for blacks, decreasing for whites):

fp_rate(confusion_matrix(df3_white['Propensity'],df3_white['Predictions']))

0.259

In [112]:
fp_rate(confusion_matrix(df3_black['Propensity'],df3_black['Predictions']))

0.393