# Couterfactual Explanations

In [25]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import StratifiedKFold, cross_val_predict, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc, precision_recall_curve, confusion_matrix

In [44]:
# Read data
df = pd.read_csv('compas-scores-two-years.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7214 entries, 0 to 7213
Data columns (total 53 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       7214 non-null   int64  
 1   name                     7214 non-null   object 
 2   first                    7214 non-null   object 
 3   last                     7214 non-null   object 
 4   compas_screening_date    7214 non-null   object 
 5   sex                      7214 non-null   object 
 6   dob                      7214 non-null   object 
 7   age                      7214 non-null   int64  
 8   age_cat                  7214 non-null   object 
 9   race                     7214 non-null   object 
 10  juv_fel_count            7214 non-null   int64  
 11  decile_score             7214 non-null   int64  
 12  juv_misd_count           7214 non-null   int64  
 13  juv_other_count          7214 non-null   int64  
 14  priors_count            

In [45]:
#@title Preprocess the data

# Read data
df = pd.read_csv('compas-scores-two-years.csv')

# Filter out entries with no indication of recidivism or no compass score
df = df[df['is_recid'] != -1]
df = df[df['days_b_screening_arrest'].notnull()]

# Rename recidivism column
df['recidivism_within_2_years'] = df['is_recid']

# Make the COMPASS label column numeric (0 and 1), for use in our model
df['COMPASS_determination'] = np.where(df['score_text'] == 'Low', 0, 1)

df = pd.get_dummies(df, columns=['sex', 'race', 'c_charge_degree'])
df['duration'] = df['end'] - df['start']

# Get list of all columns from the dataset we will use for model input or output.
input_features = ['sex_Male', 'age', 'c_charge_degree_F', 'race_African-American', 'race_Caucasian',
                  'race_Hispanic', 'race_Native American', 'race_Other', 'priors_count', 'juv_fel_count',
                  'juv_misd_count', 'juv_other_count', 'days_b_screening_arrest','c_days_from_compas', 'duration']

to_keep = input_features + ['recidivism_within_2_years', 'COMPASS_determination']

to_remove = [col for col in df.columns if col not in to_keep]
df = df.drop(columns=to_remove)

input_columns = df.columns.tolist()
labels = df['COMPASS_determination']
df.head(20)

Unnamed: 0,age,juv_fel_count,juv_misd_count,juv_other_count,priors_count,days_b_screening_arrest,c_days_from_compas,recidivism_within_2_years,COMPASS_determination,sex_Male,race_African-American,race_Caucasian,race_Hispanic,race_Native American,race_Other,c_charge_degree_F,duration
0,69,0,0,0,0,-1.0,1.0,0,0,1,0,0,0,0,1,1,327
1,34,0,0,0,0,-1.0,1.0,1,0,1,1,0,0,0,0,1,150
2,24,0,0,1,4,-1.0,1.0,1,0,1,1,0,0,0,0,1,63
5,44,0,0,0,0,0.0,0.0,0,0,1,0,0,0,0,1,0,852
6,41,0,0,0,14,-1.0,1.0,1,1,1,0,1,0,0,0,1,35
7,43,0,0,0,3,-1.0,1.0,0,0,1,0,0,0,0,1,1,265
8,39,0,0,0,0,-1.0,1.0,0,0,0,0,1,0,0,0,0,745
9,21,0,0,0,1,428.0,308.0,1,0,1,0,1,0,0,0,1,428
10,27,0,0,0,0,-1.0,1.0,0,0,1,0,1,0,0,0,1,857
11,23,0,0,0,3,0.0,0.0,1,1,1,1,0,0,0,0,0,124


In [46]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score

df = df.drop('recidivism_within_2_years', axis=1)

#@title Create data structures needed for training and testing
# The training data doesn't contain the column we are predicting,
# 'COMPASS_determination', or the column we are using for evaluation of our
# trained model, 'recidivism_within_2_years'.
X = df.drop(columns=['COMPASS_determination'])
y = df['COMPASS_determination']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


model = LogisticRegression()
model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [47]:
# Evaluate the Logistic Regression model on test data
logistic_test_predictions = model.predict(X_test)
logistic_test_accuracy = accuracy_score(y_test, logistic_test_predictions)
logistic_precision = precision_score(y_test, logistic_test_predictions)
logistic_recall = recall_score(y_test, logistic_test_predictions)

# Print evaluation metrics for Logistic Regression model
print(f"Logistic Regression Test Accuracy: {logistic_test_accuracy:.4f}")
print(f"Logistic Regression Test Precision: {logistic_precision:.4f}")
print(f"Logistic Regression Test Recall: {logistic_recall:.4f}")

Logistic Regression Test Accuracy: 0.7402
Logistic Regression Test Precision: 0.7136
Logistic Regression Test Recall: 0.7079


In [16]:
!pip install dice_ml



In [34]:
import dice_ml

In [55]:
y_test

576     0
3644    0
4707    0
6727    1
3683    1
       ..
2157    1
5978    0
5121    0
577     0
3101    0
Name: COMPASS_determination, Length: 1382, dtype: int64

In [48]:
d = dice_ml.Data(dataframe=df, continuous_features=['age', 'duration'], outcome_name='COMPASS_determination')
m = dice_ml.Model(model=model, backend='sklearn')

exp = dice_ml.Dice(d, m, method='random')

In [58]:
e = exp.generate_counterfactuals(X_test[3:4], total_CFs=5, desired_class="opposite", features_to_vary=['race_African-American', 'race_Caucasian', 'race_Hispanic', 'race_Native American', 'race_Other'])
e.visualize_as_dataframe(show_only_changes=True)

100%|██████████| 1/1 [00:00<00:00,  4.77it/s]

Query instance (original outcome : 1)





Unnamed: 0,age,juv_fel_count,juv_misd_count,juv_other_count,priors_count,days_b_screening_arrest,c_days_from_compas,sex_Male,race_African-American,race_Caucasian,race_Hispanic,race_Native American,race_Other,c_charge_degree_F,duration,COMPASS_determination
0,54,0,0,0,11,-127.0,127.0,1,0,1,0,0,0,1,214,1



Diverse Counterfactual set (new outcome: 0)


Unnamed: 0,age,juv_fel_count,juv_misd_count,juv_other_count,priors_count,days_b_screening_arrest,c_days_from_compas,sex_Male,race_African-American,race_Caucasian,race_Hispanic,race_Native American,race_Other,c_charge_degree_F,duration,COMPASS_determination
0,-,-,-,-,-,-,-,-,-,-,-,-,1.0,-,-,0.0
1,-,-,-,-,-,-,-,-,-,-,1.0,-,1.0,-,-,0.0
2,-,-,-,-,-,-,-,-,-,0.0,-,-,-,-,-,0.0
3,-,-,-,-,-,-,-,-,-,0.0,-,-,-,-,-,0.0
4,-,-,-,-,-,-,-,-,-,0.0,-,-,-,-,-,0.0


In [59]:
e = exp.generate_counterfactuals(X_test[3:4], total_CFs=5, desired_class="opposite")
e.visualize_as_dataframe(show_only_changes=True)

100%|██████████| 1/1 [00:00<00:00,  1.70it/s]

Query instance (original outcome : 1)





Unnamed: 0,age,juv_fel_count,juv_misd_count,juv_other_count,priors_count,days_b_screening_arrest,c_days_from_compas,sex_Male,race_African-American,race_Caucasian,race_Hispanic,race_Native American,race_Other,c_charge_degree_F,duration,COMPASS_determination
0,54,0,0,0,11,-127.0,127.0,1,0,1,0,0,0,1,214,1



Diverse Counterfactual set (new outcome: 0)


Unnamed: 0,age,juv_fel_count,juv_misd_count,juv_other_count,priors_count,days_b_screening_arrest,c_days_from_compas,sex_Male,race_African-American,race_Caucasian,race_Hispanic,race_Native American,race_Other,c_charge_degree_F,duration,COMPASS_determination
0,-,-,-,-,-,-,-,-,-,-,-,-,-,-,1121.0,0.0
1,-,-,-,-,1.0,-,-,-,-,-,-,-,1.0,-,-,0.0
2,-,-,-,-,-,-,-,0.0,-,-,-,-,-,-,1182.0,0.0
3,-,-,-,-,-,-,-,-,-,-,-,-,-,-,1163.0,0.0
4,-,-,-,-,-,-,-,-,-,-,-,-,-,0.0,-,0.0
