In [2]:
import sys
!{sys.executable} -m pip install aequitas==0.42
#!{sys.executable} -m pip install aequitas==0.42 pandas==1.2.3

Collecting aequitas==0.42
  Using cached aequitas-0.42.0-py3-none-any.whl (2.2 MB)
Collecting Flask==0.12.2
  Downloading Flask-0.12.2-py2.py3-none-any.whl (83 kB)
[K     |████████████████████████████████| 83 kB 2.4 MB/s eta 0:00:011
[?25hCollecting pandas>=0.24.1
  Using cached pandas-1.1.5-cp36-cp36m-macosx_10_9_x86_64.whl (10.2 MB)
Collecting altair==4.1.0
  Downloading altair-4.1.0-py3-none-any.whl (727 kB)
[K     |████████████████████████████████| 727 kB 8.3 MB/s eta 0:00:01
[?25hCollecting xhtml2pdf==0.2.2
  Downloading xhtml2pdf-0.2.2.tar.gz (97 kB)
[K     |████████████████████████████████| 97 kB 3.6 MB/s eta 0:00:011
[?25hCollecting millify==0.1.1
  Downloading millify-0.1.1.tar.gz (1.2 kB)
Collecting Flask-Bootstrap==3.3.7.1
  Downloading Flask-Bootstrap-3.3.7.1.tar.gz (456 kB)
[K     |████████████████████████████████| 456 kB 3.4 MB/s eta 0:00:01
[?25hCollecting markdown2==2.3.5
  Downloading markdown2-2.3.5.zip (161 kB)
[K     |████████████████████████████████| 161 k

In [3]:
import pandas as pd
from aequitas.group import Group
from aequitas.bias import Bias
from aequitas.fairness import Fairness
import aequitas.plot as ap

# Enable Pandas to display dataframes without restriction.
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [4]:
# Load in the data and take an initial look at it.
# The dataset contains a recidivism score = tendency of a convict to reoffend.
# WHo is going to break the law again?
df = pd.read_csv("data/compas_for_aequitas.csv")
print(df.shape)
df.head()

(7214, 6)


Unnamed: 0,entity_id,score,label_value,race,sex,age_cat
0,1,0.0,0,Other,Male,Greater than 45
1,3,0.0,1,African-American,Male,25 - 45
2,4,0.0,1,African-American,Male,Less than 25
3,5,1.0,0,African-American,Male,Less than 25
4,6,0.0,0,Other,Male,25 - 45


In [5]:
# Race is our protected class that we will be exploring.
df["race"].value_counts()

African-American    3696
Caucasian           2454
Hispanic             637
Other                377
Asian                 32
Native American       18
Name: race, dtype: int64

In [8]:
# Remove the races that have very little data in this data.
df = df[~df["race"].isin(["Asian", "Native American"])]

In [14]:
df.head()

Unnamed: 0,entity_id,score,label_value,race,sex,age_cat
0,1,0.0,0,Other,Male,Greater than 45
1,3,0.0,1,African-American,Male,25 - 45
2,4,0.0,1,African-American,Male,Less than 25
3,5,1.0,0,African-American,Male,Less than 25
4,6,0.0,0,Other,Male,25 - 45


In [15]:
df.shape

(7164, 6)

### Create Crosstab

Create the crosstab that forms the basis for all the subsequent analyses.

In [16]:
# We instantiate agroup class and
# create a crosstab: basis for all subsequent analyses
group = Group()
# We ignore the returned index variable with features
xtab, _ = group.get_crosstabs(df)

# The crosstab contains the data slicing statistics.
# For each categorical feature and each level/group in them
# we have a slice/group for which the statistics are computed.
# Check below the abbreviations of the statistics.
#xtab.head(10)
xtab.T

Unnamed: 0,0,1,2,3,4,5,6,7,8
model_id,0,0,0,0,0,0,0,0,0
score_threshold,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1
k,3297,3297,3297,3297,3297,3297,3297,3297,3297
attribute_name,race,race,race,race,sex,sex,age_cat,age_cat,age_cat
attribute_value,African-American,Caucasian,Hispanic,Other,Female,Male,25 - 45,Greater than 45,Less than 25
tpr,0.720147,0.522774,0.443966,0.323308,0.607287,0.628196,0.625399,0.424797,0.738928
tnr,0.551532,0.765457,0.785185,0.852459,0.678212,0.67435,0.665607,0.830841,0.455371
for,0.34954,0.288125,0.288591,0.302013,0.242197,0.332029,0.324885,0.241468,0.426667
fdr,0.370285,0.408665,0.457895,0.455696,0.489796,0.365079,0.384736,0.464103,0.362173
fpr,0.448468,0.234543,0.214815,0.147541,0.321788,0.32565,0.334393,0.169159,0.544629


Abbev. | Name
--- | ---
tpr | True Positive Rate
tnr | True Negative Rate
for |False Omission Rate
fdr | False Discovery Rate
fpr | False Positive Rate
fnr | False Negative Rate
npv | Negative Predictive Value
pp | Predicted Positive Count
pn | Predicted Negative Count
ppr | Predicted Positive Ratio_k
pprev | Predicted Positive Ratio_g
fp | False Postives
fn | False Negatives
tn | True Negatives
tp | True Positives
prev | Group Prevalance

### Compute Bias

We calculate the bias vs. a predefined group we manually set.

In [18]:
# We instantiate a Bias class
# and compute disparities of the slices
# wrt. a reference group.
# We specify the reference group in ref_groups_dict.
# If we don't specify any reference group, the majority
# group/slice is taken.
# We get the same statistics as before + disparity statistics,
# differences wrt. refrence group
bias = Bias()
bias_df = bias.get_disparity_predefined_groups(xtab,
                                               original_df=df,
                                               ref_groups_dict={"race": "Caucasian", "sex": "Male", "age_cat": "25 - 45"},
                                               alpha=0.05,
                                               mask_significance=True)
#bias_df.head(10)
bias_df.T

get_disparity_predefined_group()


Unnamed: 0,0,1,2,3,4,5,6,7,8
model_id,0,0,0,0,0,0,0,0,0
score_threshold,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1
k,3297,3297,3297,3297,3297,3297,3297,3297,3297
attribute_name,race,race,race,race,sex,sex,age_cat,age_cat,age_cat
attribute_value,African-American,Caucasian,Hispanic,Other,Female,Male,25 - 45,Greater than 45,Less than 25
tpr,0.720147,0.522774,0.443966,0.323308,0.607287,0.628196,0.625399,0.424797,0.738928
tnr,0.551532,0.765457,0.785185,0.852459,0.678212,0.67435,0.665607,0.830841,0.455371
for,0.34954,0.288125,0.288591,0.302013,0.242197,0.332029,0.324885,0.241468,0.426667
fdr,0.370285,0.408665,0.457895,0.455696,0.489796,0.365079,0.384736,0.464103,0.362173
fpr,0.448468,0.234543,0.214815,0.147541,0.321788,0.32565,0.334393,0.169159,0.544629


Another common option:

In [20]:
# If we don't specify any reference group, the majority
# group/slice is taken: African-Americal males 25-45 y.o.
bias.get_disparity_major_group(xtab,
                               original_df=df,
                               alpha=0.05,
                               mask_significance=True).T #head(10)

get_disparity_major_group()


Unnamed: 0,0,1,2,3,4,5,6,7,8
model_id,0,0,0,0,0,0,0,0,0
score_threshold,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1
k,3297,3297,3297,3297,3297,3297,3297,3297,3297
attribute_name,race,race,race,race,sex,sex,age_cat,age_cat,age_cat
attribute_value,African-American,Caucasian,Hispanic,Other,Female,Male,25 - 45,Greater than 45,Less than 25
tpr,0.720147,0.522774,0.443966,0.323308,0.607287,0.628196,0.625399,0.424797,0.738928
tnr,0.551532,0.765457,0.785185,0.852459,0.678212,0.67435,0.665607,0.830841,0.455371
for,0.34954,0.288125,0.288591,0.302013,0.242197,0.332029,0.324885,0.241468,0.426667
fdr,0.370285,0.408665,0.457895,0.455696,0.489796,0.365079,0.384736,0.464103,0.362173
fpr,0.448468,0.234543,0.214815,0.147541,0.321788,0.32565,0.334393,0.169159,0.544629


In [22]:
# The goal we have with Aequitas: Compute how fair the dataset is.
# We get the same statistics as before + Fairnes true/false parity values
fairness = Fairness()
fairness_df = fairness.get_group_value_fairness(bias_df)
#fairness_df.head(10)
fairness_df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8
model_id,0,0,0,0,0,0,0,0,0
score_threshold,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1,binary 0/1
k,3297,3297,3297,3297,3297,3297,3297,3297,3297
attribute_name,race,race,race,race,sex,sex,age_cat,age_cat,age_cat
attribute_value,African-American,Caucasian,Hispanic,Other,Female,Male,25 - 45,Greater than 45,Less than 25
tpr,0.720147,0.522774,0.443966,0.323308,0.607287,0.628196,0.625399,0.424797,0.738928
tnr,0.551532,0.765457,0.785185,0.852459,0.678212,0.67435,0.665607,0.830841,0.455371
for,0.34954,0.288125,0.288591,0.302013,0.242197,0.332029,0.324885,0.241468,0.426667
fdr,0.370285,0.408665,0.457895,0.455696,0.489796,0.365079,0.384736,0.464103,0.362173
fpr,0.448468,0.234543,0.214815,0.147541,0.321788,0.32565,0.334393,0.169159,0.544629


In [13]:
# We can get overall/summary values
# - Unsupervised fairness: composed by some of the metrics; if one is False, the value is False
# - Supervised fairness: composed by some of the metrics; if one is False, the value is False
# - Overall fairness
overall_fairness = fairness.get_overall_fairness(fairness_df)
print(overall_fairness)

{'Unsupervised Fairness': False, 'Supervised Fairness': False, 'Overall Fairness': False}


In [24]:
# We can also get summary value plots
# for selected metrics and a specified tolerance.
# Aequitas has actually many plots, this one is only one.
# See metric abbreviations above.
# Hoover over the plot!
metrics = ['fpr', 'fnr', 'for']
disparity_tolerance = 1.25 # tolerance of 25% (in disparity?)

ap.summary(bias_df, metrics, fairness_threshold=disparity_tolerance)