### Imports

In [1]:
# Import analysis libraries
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

# from notebooks/ directory
import sys, os

# ADDITIONS:
from facet.inspection import RAIDataBaisCheck
from facet.inspection import RAIFairnessScenarios

# DISCUSS: how to handle
import gilfoyle.model.set_data as set_data
import gilfoyle.model.train_model as train_model

### Input parameters

In [2]:
# set parameters for bias check
model_input_path = './adult.csv'
dependent = 'rich'  # dependent variable we measure
protected_group = 'race'  # protected group for which we compare outcomes
outcome_advantage = " >50K"
pg_disadvantage = " Black" ## this is the class we want to test for whether an advantage exists
dependent_bin = 'rich_outcome'
thresh_min = .6
thresh_max = 1

# set additional parameters for the toy model [skip if you have your own model]
target_rate = .84  # target_rate = 1 - desired (target) positive rate, i.e., outome rate: percentage of the population that classified as predicted outcome 1.
test_size = .4
random_state = 777
n_estimators = 400
bias_detect_thresh = .2
data_name = 'adult_dataset'

### Read data

In [3]:
# Read in data
model_input = pd.read_csv(model_input_path)

# Create IDs to number individuals
model_input['ID'] = model_input.index
pg_array = model_input[protected_group]

# Select dependent and PG -- measure historical bias
# Encode chose parameter for disadvantaged class as binary variable
model_input[dependent_bin] = model_input[dependent].apply(lambda x: 1 if x == outcome_advantage else 0)

# Encode chose parameter for disadvantaged class as binary variable
model_input[protected_group] = model_input[protected_group].apply(lambda x: 1 if x == pg_disadvantage else 0)

##### Run functions to create dummies for categorical variables and normalize numeric
num_vars = ['age', 'fnlwgt', 'education-num', 'race', 'capital-gain', 'capital-loss',
            'hours-per-week']

cat_vars = ['workclass', 'education', 'marital-status', 'occupation',
            'relationship', 'sex', 'native-country']

# This is an internal function which one-hot encodes all categorical and normalizes all numeric variables
model_data = set_data.set_data(model_input, dependent_bin, cat_vars, num_vars)

# Print shape of final machine-learning ready data frame
print("Model data shape: ", model_data.shape)

Model data shape:  (32561, 106)


In [4]:
model_data.head()

Unnamed: 0,rich_outcome,ID,workclass_ ?,workclass_ Federal-gov,workclass_ Local-gov,workclass_ Never-worked,workclass_ Private,workclass_ Self-emp-inc,workclass_ Self-emp-not-inc,workclass_ State-gov,...,native-country_ United-States,native-country_ Vietnam,native-country_ Yugoslavia,age,fnlwgt,education-num,race,capital-gain,capital-loss,hours-per-week
0,0,0,0,0,0,0,0,0,0,1,...,1,0,0,0.30137,0.044302,0.8,0.0,0.02174,0.0,0.397959
1,0,1,0,0,0,0,0,0,1,0,...,1,0,0,0.452055,0.048238,0.8,0.0,0.0,0.0,0.122449
2,0,2,0,0,0,0,1,0,0,0,...,1,0,0,0.287671,0.138113,0.533333,0.0,0.0,0.0,0.397959
3,0,3,0,0,0,0,1,0,0,0,...,1,0,0,0.493151,0.151068,0.4,1.0,0.0,0.0,0.397959
4,0,4,0,0,0,0,1,0,0,0,...,0,0,0,0.150685,0.221488,0.8,1.0,0.0,0.0,0.397959


### 01 - Bias Index Check

In [5]:
RAI_bias_checker = RAIDataBaisCheck(protected_group='race',
                                    test_col='rich_outcome', 
                                    pvalue_threshold=0.1, 
                                    test_type="z-test",
                                    is_2_sided=False)

In [6]:
bias, p_value = RAI_bias_checker.bias_test(model_input)
bias, p_value

(True, 1.1361393491156106e-58)

In [7]:
RAI_bias_checker.historic_crosstab

rich_outcome,0,1
race,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.746781,0.253219
1,0.87612,0.12388


### 03 - Fairness Scenarios

In [8]:
### If running model on own, repeat steps to create required binary arrays below
protected_group = 'race' 

### Required model inputs [update with your own model]
# outcome_array = binary y_true for your validation set -- what you are trying to predict
# pg_array = binary protected group array -- 1/0 indicator where 1 is the PG (ex. black, female)
# preds_proba = your model predicted scores for each record in your validation set = output from your model predict_proba() function from sklearn
# preds_naive = your model predictions at optimal threshold -- use the one selected for your baseline / champion model

# Code example -- where rf_model is your model
#     preds_proba_raw = rf_model.predict_proba(X_test)
#     preds_proba = np.array(preds_proba_raw)[:, 1]

# Train model - generate data frames for scenario analysis
outcome_array, pg_array, preds_proba, preds_naive = train_model.train_model(model_data,
                                                                            dependent_bin,
                                                                            protected_group,
                                                                            test_size,
                                                                            random_state,
                                                                            n_estimators)

y_true = outcome_array["rich_outcome"]
y_pred_proba = preds_proba
y_pred_naive = preds_naive
pg = pg_array["race"]

Seconds to run:  9.29560136795044
log-loss 0.3139295316819027


In [9]:
RAI_fairness_simulator = RAIFairnessScenarios(target_rate=0.9,
                                              bias_detect_thresh=0.2)

In [10]:
RAI_fairness_simulator.fit(y_true,
                           y_pred_naive,
                           y_pred_proba,
                           pg)

Unnamed: 0_level_0,Bias_Test,Bias_Index,Accuracy,TP,FN,TN,FP,Non_PG_Outcome_Rate,PG_Outcome_Rate
Scenario,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1 - Naive,Fail,0.4488,0.8618,0.7583,0.2417,0.8868,0.1132,0.2051,0.092
2 - Threshold Best,Fail,0.4009,0.8389,0.8891,0.1109,0.8334,0.1666,0.1056,0.0423
3 - Historic Parity,Fail,0.4934,0.8397,0.8893,0.1107,0.8341,0.1659,0.1056,0.0521
4 - Demographic Parity,Pass,1.0022,0.8383,0.8848,0.1152,0.8331,0.1669,0.0999,0.1001


In [11]:
RAI_fairness_simulator.thresh_best

0.7325

In [12]:
RAI_fairness_simulator.thresh_hist_pg

0.684258721844227

In [13]:
RAI_fairness_simulator.thresh_hist_non_pg

0.735

In [14]:
RAI_fairness_simulator.thresh_demog_pg

0.47324999999999984

In [15]:
RAI_fairness_simulator.thresh_demog_non_pg

0.7475

In [16]:
RAI_fairness_simulator.preds_naive

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [17]:
RAI_fairness_simulator.preds_threshold

array([0, 0, 0, ..., 0, 0, 0])

In [18]:
RAI_fairness_simulator.preds_historic

array([0, 0, 0, ..., 0, 0, 0])

In [19]:
RAI_fairness_simulator.preds_demographic

array([0, 0, 0, ..., 0, 0, 0])

In [20]:
# Steps Ahead:
# - unit tests
# - exception handling
# - feature suggestions