This notebook trains a model to mimic the behavior of the COMPAS recidivism classifier. The specific binary classification task for this model is to determine if a person belongs in the "Low" risk class according to COMPAS (negative class), or the "Medium" or "High" risk class (positive class).

In [1]:
import tensorflow
print(tensorflow.__version__)

2.15.0


In [2]:
# Import libraries
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from google.colab import drive
import pickle

from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

from sklearn.utils import shuffle
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [3]:
# Read data
df = pd.read_csv('compas-scores-two-years.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7214 entries, 0 to 7213
Data columns (total 53 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       7214 non-null   int64  
 1   name                     7214 non-null   object 
 2   first                    7214 non-null   object 
 3   last                     7214 non-null   object 
 4   compas_screening_date    7214 non-null   object 
 5   sex                      7214 non-null   object 
 6   dob                      7214 non-null   object 
 7   age                      7214 non-null   int64  
 8   age_cat                  7214 non-null   object 
 9   race                     7214 non-null   object 
 10  juv_fel_count            7214 non-null   int64  
 11  decile_score             7214 non-null   int64  
 12  juv_misd_count           7214 non-null   int64  
 13  juv_other_count          7214 non-null   int64  
 14  priors_count            

In [4]:
#@title Preprocess the data

# Filter out entries with no indication of recidivism or no compass score
df = df[df['is_recid'] != -1]
df = df[df['decile_score'] != -1]

# Rename recidivism column
df['recidivism_within_2_years'] = df['is_recid']

# Make the COMPASS label column numeric (0 and 1), for use in our model
df['COMPASS_determination'] = np.where(df['score_text'] == 'Low', 0, 1)

df = pd.get_dummies(df, columns=['sex', 'race'])

# Get list of all columns from the dataset we will use for model input or output.
input_features = ['sex_Female', 'sex_Male', 'age', 'race_African-American', 'race_Caucasian', 'race_Hispanic', 'race_Native American', 'race_Other', 'priors_count', 'juv_fel_count', 'juv_misd_count', 'juv_other_count']
#input_features = ['income_Lower', 'income_Higher', 'age', 'social-class_SC1', 'social-class_SC2', 'social-class_SC3', 'social-class_SC4', 'social-class_Other', 'priors_count', 'juv_fel_count', 'juv_misd_count', 'juv_other_count']

to_keep = input_features + ['recidivism_within_2_years', 'COMPASS_determination']

to_remove = [col for col in df.columns if col not in to_keep]
df = df.drop(columns=to_remove)

input_columns = df.columns.tolist()
labels = df['COMPASS_determination']
df.head()

Unnamed: 0,age,juv_fel_count,juv_misd_count,juv_other_count,priors_count,recidivism_within_2_years,COMPASS_determination,sex_Female,sex_Male,race_African-American,race_Caucasian,race_Hispanic,race_Native American,race_Other
0,69,0,0,0,0,0,0,0,1,0,0,0,0,1
1,34,0,0,0,0,1,0,0,1,1,0,0,0,0
2,24,0,0,1,4,1,0,0,1,1,0,0,0,0
3,23,0,1,0,1,0,1,0,1,1,0,0,0,0
4,43,0,0,0,2,0,0,0,1,0,0,0,0,1


In [5]:
#@title Create data structures needed for training and testing
# The training data doesn't contain the column we are predicting,
# 'COMPASS_determination', or the column we are using for evaluation of our
# trained model, 'recidivism_within_2_years'.
df_for_training = df.drop(columns=['COMPASS_determination', 'recidivism_within_2_years'])
train_size = int(len(df_for_training) * 0.8)
train_data = df_for_training[:train_size]
train_labels = labels[:train_size]

test_data_with_labels = df[train_size:]
test_data = test_data_with_labels.drop(columns=['COMPASS_determination', 'recidivism_within_2_years'])
test_labels = labels[train_size:]

In [6]:
#@title Regression model
# This is the size of the array we'll be feeding into our model for each example
input_size = len(train_data.iloc[0])

model = Sequential()
model.add(Dense(200, input_shape=(input_size,), activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(25, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='mean_squared_error', optimizer='adam')


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 200)               2600      
                                                                 
 dense_1 (Dense)             (None, 50)                10050     
                                                                 
 dense_2 (Dense)             (None, 25)                1275      
                                                                 
 dense_3 (Dense)             (None, 1)                 26        
                                                                 
Total params: 13951 (54.50 KB)
Trainable params: 13951 (54.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [7]:
# Train the model
model.fit(train_data.values, train_labels.values, epochs=10, batch_size=32, validation_split=0.1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7feefe485c90>

In [8]:
# Predict on the validation set
predictions = model.predict(test_data.values)

# Calculate Mean Absolute Error
mae = mean_absolute_error(test_labels.values, predictions)

# Calculate Mean Squared Error
mse = mean_squared_error(test_labels.values, predictions)

# Calculate R^2 score
r2 = r2_score(test_labels.values, predictions)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("R^2 Score:", r2)

Mean Absolute Error: 0.3400367973972569
Mean Squared Error: 0.17672059420720865
R^2 Score: 0.2899089547338609


In [9]:
#@title Binary Classification model
# This is the size of the array we'll be feeding into our model for each example
input_size = len(train_data.iloc[0])

model = Sequential()
model.add(Dense(200, input_shape=(input_size,), activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(25, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()

# Compile the model with binary_crossentropy loss and accuracy metric
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 200)               2600      
                                                                 
 dense_5 (Dense)             (None, 50)                10050     
                                                                 
 dense_6 (Dense)             (None, 25)                1275      
                                                                 
 dense_7 (Dense)             (None, 1)                 26        
                                                                 
Total params: 13951 (54.50 KB)
Trainable params: 13951 (54.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [10]:
# Train the model
history = model.fit(train_data.values, train_labels.values, epochs=10, batch_size=32, validation_split=0.1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(test_data.values, test_labels.values)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

# Predict labels for test data
test_predictions = model.predict(test_data.values)
test_predictions_rounded = test_predictions.round()

# Calculate precision and recall
precision = precision_score(test_labels.values, test_predictions_rounded)
recall = recall_score(test_labels.values, test_predictions_rounded)

print("Test Precision:", precision)
print("Test Recall:", recall)

# Calculate train metrics
train_predictions = model.predict(train_data.values)
train_predictions_rounded = train_predictions.round()

train_accuracy = accuracy_score(train_labels.values, train_predictions_rounded)
train_precision = precision_score(train_labels.values, train_predictions_rounded)
train_recall = recall_score(train_labels.values, train_predictions_rounded)

print("Train Accuracy:", train_accuracy)
print("Train Precision:", train_precision)
print("Train Recall:", train_recall)

Test Loss: 0.5207697749137878
Test Accuracy: 0.7297297120094299
Test Precision: 0.6968011126564673
Test Recall: 0.7444279346210996
Train Accuracy: 0.7527291630566626
Train Precision: 0.7216757741347906
Train Recall: 0.7492435703479576
