# Fairness Checking: COMPAS NN (Different Models Experiment)
Here, we create different models to evaluate the LP criterion with different f(X) functions. Each NN represents one f(X), and we experiment with different parameters to achieve different functions. The main NN is in compas_nn_risk_score.ipynb.

The preprocessing for this dataset is done in a separate file. In this file, we train a model, evaluate its accuracy and examine the performance of the model.

In [0]:
# Run this cell to mount your Google Drive.
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
from tensorflow import keras
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasClassifier

Using TensorFlow backend.


In [0]:
! rm -r '/content/drive/My Drive/colab/fairness_checking/depth_models/'
! mkdir '/content/drive/My Drive/colab/fairness_checking/depth_models/'

## Import and Split the Dataset
Our preprocessed dataset includes information about sex, age, and past and future criminal history. Each entry has 10 different features, and the label is the last column: risk_recid. 

risk_recid can take two different values: 0 or 1. 0 translates to the COMPAS labels 'Low' and 'Medium' and 1 translates to the COMPAS label 'High.' Reasoning for this is in the preprocessing file. We have a binary classification problem.

In [0]:
df = pd.read_csv('/content/drive/My Drive/colab/fairness_checking/preprocessed_compas_data.csv')

In [0]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,sex,age,race,juv_fel_count,juv_misd_count,juv_other_count,priors_count,two_year_recid,c_charge_degree_F,c_charge_degree_M,risk_recid
0,0,1,69,0,0,0,0,0,0,1,0,0
1,1,1,34,1,0,0,0,0,1,1,0,0
2,2,1,24,1,0,0,1,4,1,1,0,0
3,5,1,44,0,0,0,0,0,0,0,1,0
4,6,1,41,0,0,0,0,14,1,1,0,1


In [0]:
len(df)

6159

In [0]:
normalized_df=(df-df.min())/(df.max()-df.min())

In [0]:
normalized_df.head(5)

Unnamed: 0.1,Unnamed: 0,sex,age,race,juv_fel_count,juv_misd_count,juv_other_count,priors_count,two_year_recid,c_charge_degree_F,c_charge_degree_M,risk_recid
0,0.0,1.0,0.653846,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.000139,1.0,0.205128,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
2,0.000277,1.0,0.076923,1.0,0.0,0.0,0.111111,0.105263,1.0,1.0,0.0,0.0
3,0.000693,1.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.000832,1.0,0.294872,0.0,0.0,0.0,0.0,0.368421,1.0,1.0,0.0,1.0


In [0]:
len(normalized_df)

6159

In [0]:
# Train-Test Split for Dataframe
train = normalized_df.sample(frac=0.8,random_state=42)
test = normalized_df.drop(train.index)

In [0]:
X_train = train.iloc[:, 1:-1].values
y_train = train.iloc[:, -1].values

X_test = test.iloc[:, 1:-1].values
y_test = test.iloc[:, -1].values

In [0]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(4927, 10)
(4927,)
(1232, 10)
(1232,)


In [0]:
X_train

array([[1.        , 0.03846154, 1.        , ..., 1.        , 1.        ,
        0.        ],
       [0.        , 0.12820513, 1.        , ..., 0.        , 1.        ,
        0.        ],
       [1.        , 0.05128205, 1.        , ..., 1.        , 1.        ,
        0.        ],
       ...,
       [0.        , 0.35897436, 0.        , ..., 1.        , 1.        ,
        0.        ],
       [1.        , 0.08974359, 1.        , ..., 0.        , 0.        ,
        1.        ],
       [1.        , 0.20512821, 1.        , ..., 0.        , 1.        ,
        0.        ]])

In [0]:
y_train = y_train.astype(int)
y_test = y_test.astype(int)

## Experiment 1: Vary NN Depth
We first experiment with different models of varying depths.

In [0]:
NUMBER_OF_MODELS = 20

In [0]:
depth_models = []
adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

for i in range(NUMBER_OF_MODELS):
  model = keras.Sequential()
  model.add(keras.layers.Dense(200, input_dim=10, activation="relu"))
  for j in range(i):
    model.add(keras.layers.Dense(200, activation="relu"))
  model.add(keras.layers.Dense(1, activation='sigmoid'))
  model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
  depth_models.append(model)
  if(i % 10 == 0):
    print(i)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
0
10


In [0]:
# Sanity check
print(len(depth_models))
depth_models[0].summary()

20
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 200)               2200      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 201       
Total params: 2,401
Trainable params: 2,401
Non-trainable params: 0
_________________________________________________________________


In [0]:
# Sanity check
depth_models[3].summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 200)               2200      
_________________________________________________________________
dense_10 (Dense)             (None, 200)               40200     
_________________________________________________________________
dense_11 (Dense)             (None, 200)               40200     
_________________________________________________________________
dense_12 (Dense)             (None, 200)               40200     
_________________________________________________________________
dense_13 (Dense)             (None, 1)                 201       
Total params: 123,001
Trainable params: 123,001
Non-trainable params: 0
_________________________________________________________________


### Train and Evaluate All Depth Models
Iterate over the depth models and train each of them, storing the history in a dict.

In [0]:
history = dict()
for i in range(len(depth_models)):
  history[i] = depth_models[i].fit(X_train, y_train, validation_split = 0.1, verbose = False, epochs=20)
  if i % 10 == 0:
    print(i)

0
10


In [0]:
# Sanity check
depth_models[5].evaluate(X_test, y_test)
depth_models[10].evaluate(X_test, y_test)
depth_models[19].evaluate(X_test, y_test)



[0.687052736034641, 0.5551948]

In [0]:
# Store the test accuracy of each of the models
depth_model_accuracies = []
for i in range(len(depth_models)):
  score, acc = depth_models[i].evaluate(X_test, y_test)
  depth_model_accuracies.append(acc)



## Predict on Entire Dataset

In [0]:
# Predicted labels
def predict_labels(model, X_test):
  pred = model.predict_classes(X_test)
  predicted_labels = []

  for i in range(len(pred)):
      predicted_labels.append(pred[i][0])
  predicted_labels = np.asarray(predicted_labels)
  return predicted_labels

In [0]:
# Predicted probabilities
def predict_probabilities(model, X_test):
  pred = model.predict(X_test)
  predicted_probs = []

  for i in range(len(pred)):
      predicted_probs.append(pred[i][0])
  predicted_probs = np.asarray(predicted_probs)
  return predicted_probs

In [0]:
# Predict the labels with all the models
pred_labels = dict()
for j in range(len(depth_models)):
  if j % 10 == 0:
    print(j)
  pred_labels[j] = predict_labels(depth_models[j], X_test)

0
10


In [0]:
# Predict the probabilities (logits) with all the models
pred_probs = dict()
for j in range(len(depth_models)):
  if j % 10 == 0:
    print(j)
  pred_probs[j] = predict_probabilities(depth_models[j], X_test)

0
10


In [0]:
print(len(pred_probs[0]))
print(len(pred_labels[0]))

1232
1232


In [0]:
# Sanity check to make sure we have different probabilities and labels
for i in range(len(pred_probs[0])):
  if((pred_probs[0][i] != pred_probs[1][i]) and i < 20):
    print(i)
    
for i in range(len(pred_labels[0])):
  if((pred_labels[0][i] != pred_labels[1][i]) and i < 20):
    print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
8


In [0]:
# Sanity Check (make sure different)
pred_probs[1]

array([0.04007128, 0.7106999 , 0.16918698, ..., 0.99504375, 0.46565092,
       0.43461204], dtype=float32)

In [0]:
# Sanity Check (make sure different)
pred_probs[2]

array([0.03949991, 0.71844226, 0.16511649, ..., 0.99807984, 0.33052123,
       0.3587618 ], dtype=float32)

In [0]:
# Append final answers for each of the models and save as CSV
for i in range(len(pred_probs)):
  save_path = '/content/drive/My Drive/colab/fairness_checking/depth_models/diffmodel_{}.csv'.format(i)
  test['prediction_probs'] = pred_probs[i]
  test['prediction'] = pred_labels[i]
  test.to_csv(save_path)

In [0]:
test.head(5)

Unnamed: 0.1,Unnamed: 0,sex,age,race,juv_fel_count,juv_misd_count,juv_other_count,priors_count,two_year_recid,c_charge_degree_F,c_charge_degree_M,risk_recid,prediction_probs,prediction
3,0.000693,1.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.44713,0
4,0.000832,1.0,0.294872,0.0,0.0,0.0,0.0,0.368421,1.0,1.0,0.0,1.0,0.44713,0
5,0.00097,1.0,0.320513,0.0,0.0,0.0,0.0,0.078947,0.0,1.0,0.0,0.0,0.44713,0
9,0.001664,0.0,0.24359,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.44713,0
16,0.002634,1.0,0.166667,0.0,0.0,0.0,0.0,0.131579,1.0,1.0,0.0,0.0,0.44713,0


In [0]:
# Save a simple CSV with depth model accuracies
df = pd.DataFrame({'Model Depth':list(range(NUMBER_OF_MODELS)), 'Model Accuracy':depth_model_accuracies})
df.head(5)
df.to_csv('/content/drive/My Drive/colab/fairness_checking/depth_models/depth_model_accuracies.csv')