In [1]:
import os
import pandas as pd
from sklearn.metrics import precision_score, recall_score, accuracy_score, precision_recall_curve
import json
import matplotlib.pyplot as plt

In [2]:
with open('dry_run.json') as json_file:
    data = json.load(json_file)

In [3]:
trial_df = pd.DataFrame(data["values"], columns = ['id', 'observation_id', 'observation', 'proba', 'true_class', 'predicted_class']) 
trial_df.head()

Unnamed: 0,id,observation_id,observation,proba,true_class,predicted_class
0,4,3,"{""id"": 3, ""observation"": {""Department Name"": ""...",0.324282,1.0,0
1,5,4,"{""id"": 4, ""observation"": {""Department Name"": ""...",0.150087,0.0,0
2,6,5,"{""id"": 5, ""observation"": {""Department Name"": ""...",0.150087,0.0,0
3,7,6,"{""id"": 6, ""observation"": {""Department Name"": ""...",0.259167,0.0,0
4,8,7,"{""id"": 7, ""observation"": {""Department Name"": ""...",0.259167,0.0,0


In [4]:
# Get only observations that received the true_class
trial_df = trial_df[(trial_df.true_class == 1) | (trial_df.true_class == 0)]

In [5]:
precision_score(trial_df.true_class, trial_df.predicted_class)

0.5238095238095238

In [6]:
recall_score(trial_df.true_class, trial_df.predicted_class)

0.5789473684210527

In [7]:
accuracy_score(trial_df.true_class, trial_df.predicted_class)

0.6814159292035398

In [8]:
obs_id = trial_df.observation.apply(lambda x: json.loads(x)['id'])
obs = trial_df.observation.apply(lambda x: json.loads(x)['observation']).apply(pd.Series).set_index(obs_id)
obs = obs.reset_index().rename(columns={"observation": "observation_id"})
obs.head()

Unnamed: 0,observation_id,Department Name,InterventionDateTime,InterventionLocationName,InterventionReasonCode,ReportingOfficerIdentificationID,ResidentIndicator,SearchAuthorizationCode,StatuteReason,SubjectAge,SubjectEthnicityCode,SubjectRaceCode,SubjectSexCode,TownResidentIndicator
0,3,Milford,05/16/2018 04:08:00 AM,MILFORD,E,473,True,I,Other,49.0,N,B,F,False
1,4,Norwalk,05/16/2018 09:38:00 AM,NORWALK,V,100000335,True,I,Defective Lights,35.0,N,W,M,True
2,5,Norwalk,05/16/2018 09:38:00 AM,NORWALK,V,100000335,True,I,Defective Lights,35.0,N,W,M,True
3,6,Naugatuck,05/16/2018 11:43:00 AM,NAUGATUCK,I,100296,True,I,Administrative Offense,23.0,H,W,M,False
4,7,Naugatuck,05/16/2018 11:43:00 AM,NAUGATUCK,I,100296,True,I,Administrative Offense,23.0,H,W,M,False


In [9]:
trial_df = trial_df.drop(columns=["observation"])
trial_df = trial_df.merge(obs, on='observation_id')
trial_df.head()

Unnamed: 0,id,observation_id,proba,true_class,predicted_class,Department Name,InterventionDateTime,InterventionLocationName,InterventionReasonCode,ReportingOfficerIdentificationID,ResidentIndicator,SearchAuthorizationCode,StatuteReason,SubjectAge,SubjectEthnicityCode,SubjectRaceCode,SubjectSexCode,TownResidentIndicator
0,4,3,0.324282,1.0,0,Milford,05/16/2018 04:08:00 AM,MILFORD,E,473,True,I,Other,49.0,N,B,F,False
1,5,4,0.150087,0.0,0,Norwalk,05/16/2018 09:38:00 AM,NORWALK,V,100000335,True,I,Defective Lights,35.0,N,W,M,True
2,6,5,0.150087,0.0,0,Norwalk,05/16/2018 09:38:00 AM,NORWALK,V,100000335,True,I,Defective Lights,35.0,N,W,M,True
3,7,6,0.259167,0.0,0,Naugatuck,05/16/2018 11:43:00 AM,NAUGATUCK,I,100296,True,I,Administrative Offense,23.0,H,W,M,False
4,8,7,0.259167,0.0,0,Naugatuck,05/16/2018 11:43:00 AM,NAUGATUCK,I,100296,True,I,Administrative Offense,23.0,H,W,M,False


In [10]:
#get the observations where we got the predicted class wrong
#trial_df = trial_df[trial_df.true_class != trial_df.predicted_class]
#trial_df.head()

In [11]:
precision_female = precision_score(trial_df[trial_df.SubjectSexCode == 'F'].true_class, trial_df[trial_df.SubjectSexCode == 'F'].predicted_class)
precision_male = precision_score(trial_df[trial_df.SubjectSexCode == 'M'].true_class, trial_df[trial_df.SubjectSexCode == 'M'].predicted_class)
precision_hispanic = precision_score(trial_df[trial_df.SubjectEthnicityCode == 'H'].true_class, trial_df[trial_df.SubjectEthnicityCode == 'H'].predicted_class)
precision_middle_eastern = precision_score(trial_df[trial_df.SubjectEthnicityCode == 'M'].true_class, trial_df[trial_df.SubjectEthnicityCode == 'M'].predicted_class)
precision_not_applicable_ethnicity = precision_score(trial_df[trial_df.SubjectEthnicityCode == 'N'].true_class, trial_df[trial_df.SubjectEthnicityCode == 'N'].predicted_class)
precision_white = precision_score(trial_df[trial_df.SubjectRaceCode == 'W'].true_class, trial_df[trial_df.SubjectRaceCode == 'W'].predicted_class)
precision_black = precision_score(trial_df[trial_df.SubjectRaceCode == 'B'].true_class, trial_df[trial_df.SubjectRaceCode == 'B'].predicted_class)
precision_indian = precision_score(trial_df[trial_df.SubjectRaceCode == 'I'].true_class, trial_df[trial_df.SubjectRaceCode == 'I'].predicted_class)
precision_asian = precision_score(trial_df[trial_df.SubjectRaceCode == 'A'].true_class, trial_df[trial_df.SubjectRaceCode == 'A'].predicted_class)
print(precision_female)
print(precision_male)
print(precision_hispanic)
print(precision_middle_eastern)
print(precision_not_applicable_ethnicity)
print(precision_white)
print(precision_black)
print(precision_indian)
print(precision_asian)

0.5
0.5294117647058824
0.25
0.0
0.5882352941176471
0.5454545454545454
0.5
0.0
0.0


  'precision', 'predicted', average, warn_for)


Investigate middle_eastern, indian and asian precisions:

In [13]:
trial_df[trial_df.SubjectEthnicityCode == 'M'].shape

(0, 18)

In [14]:
trial_df[trial_df.SubjectRaceCode == 'I'].shape

(2, 18)

In [15]:
trial_df[trial_df.SubjectRaceCode == 'A'].shape

(0, 18)

We will disregard the indians as 2 observations is very few

The precision in the sex code seems to be ok

Investigating hispanics

In [16]:
trial_df[trial_df.SubjectEthnicityCode == 'H'].shape

(21, 18)

We are having a very low precision for hispanics that is making the difference of precision to not_applicable_ethnicity drivers very high. Since we have very few observations overall, we will wait for more to make a judgement.

The difference of precision between white and black drivers is also low (<5%).