Author: Susan Hopper

## Create a test ('serving') dataset

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Pull latest data from Cook County website, https://datacatalog.cookcountyil.gov/Public-Safety/Medical-Examiner-Case-Archive/cjeq-bs86 <br>
Read in original data (from 26 Oct 23) and new data (from 6 Nov 23)


In [7]:
orig = pd.read_csv('../susan/data/Medical_Examiner_Case_Archive.csv')
new = pd.read_csv('../susan/data/Medical_Examiner_Case_Archive_6Nov23.csv')

  orig = pd.read_csv('../susan/data/Medical_Examiner_Case_Archive.csv')
  new = pd.read_csv('../susan/data/Medical_Examiner_Case_Archive_6Nov23.csv')


Reformat column names so we can tell which dataset they came from when we merge later

In [9]:
orig.columns = orig.columns.str.lower().str.replace(' ', '_')
new.columns = new.columns.str.upper().str.replace(' ', '_')

In [11]:
orig.head()

Unnamed: 0,case_number,date_of_incident,date_of_death,age,gender,race,latino,manner_of_death,primary_cause,primary_cause_line_a,...,incident_zip_code,longitude,latitude,location,residence_city,residence_zip,objectid,chicago_ward,chicago_community_area,covid_related
0,ME2023-06354,10/26/2023 02:34:00 AM,10/26/2023 02:01:00 AM,,Male,Black,False,,,,...,60661.0,-87.640339,41.885803,"(41.8858033, -87.64033894)",,,78295,34.0,NEAR WEST SIDE,False
1,ME2023-06353,10/25/2023 10:08:00 PM,10/25/2023 09:42:00 PM,20.0,Male,Black,False,,,,...,,-87.628879,41.600921,"(41.60092087, -87.62887866)",Glenwood,60425.0,78291,,,False
2,ME2023-06352,10/25/2023 09:28:00 PM,10/25/2023 09:04:00 PM,67.0,Male,White,False,,,,...,60804.0,-87.769649,41.827775,"(41.82777541, -87.76964878)",Cicero,60804.0,78290,,,False
3,ME2023-06351,10/25/2023 08:52:00 PM,10/25/2023 07:49:00 PM,48.0,Male,Black,False,,,,...,60707.0,-87.804452,41.912988,"(41.91298845, -87.8044522)",CHICAGO,60707.0,78292,29.0,AUSTIN,False
4,ME2023-06350,10/25/2023 07:21:00 PM,10/25/2023 06:47:00 PM,40.0,Male,White,False,,,,...,60458.0,-87.831108,41.75876,"(41.75876008, -87.83110831)",Justice,60458.0,78296,,,False


Select data where manner of death had not been determined as of 26 Oct

In [12]:
orig_unk = orig[(orig['manner_of_death'] == 'UNDETERMINED') | (orig['manner_of_death']=='PENDING') | (orig['manner_of_death'].isna())]

In [13]:
orig_unk

Unnamed: 0,case_number,date_of_incident,date_of_death,age,gender,race,latino,manner_of_death,primary_cause,primary_cause_line_a,...,incident_zip_code,longitude,latitude,location,residence_city,residence_zip,objectid,chicago_ward,chicago_community_area,covid_related
0,ME2023-06354,10/26/2023 02:34:00 AM,10/26/2023 02:01:00 AM,,Male,Black,False,,,,...,60661,-87.640339,41.885803,"(41.8858033, -87.64033894)",,,78295,34.0,NEAR WEST SIDE,False
1,ME2023-06353,10/25/2023 10:08:00 PM,10/25/2023 09:42:00 PM,20.0,Male,Black,False,,,,...,,-87.628879,41.600921,"(41.60092087, -87.62887866)",Glenwood,60425,78291,,,False
2,ME2023-06352,10/25/2023 09:28:00 PM,10/25/2023 09:04:00 PM,67.0,Male,White,False,,,,...,60804,-87.769649,41.827775,"(41.82777541, -87.76964878)",Cicero,60804,78290,,,False
3,ME2023-06351,10/25/2023 08:52:00 PM,10/25/2023 07:49:00 PM,48.0,Male,Black,False,,,,...,60707,-87.804452,41.912988,"(41.91298845, -87.8044522)",CHICAGO,60707,78292,29.0,AUSTIN,False
4,ME2023-06350,10/25/2023 07:21:00 PM,10/25/2023 06:47:00 PM,40.0,Male,White,False,,,,...,60458,-87.831108,41.758760,"(41.75876008, -87.83110831)",Justice,60458,78296,,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77953,ME2014-00268,08/29/2014 05:57:00 PM,08/29/2014 06:00:00 PM,,,,False,,,,...,60612,-87.701808,41.880564,"(41.88056389, -87.70180768)",Chicago,60612,14044,28.0,EAST GARFIELD PARK,False
78070,ME2014-00151,08/21/2014 03:05:00 AM,08/21/2014 03:29:00 AM,50.0,Female,Black,False,UNDETERMINED,MULTIPLE INJURIES . FALL FROM HEIGHT,MULTIPLE INJURIES,...,60636,-87.663899,41.808544,"(41.80854417, -87.66389931)",Chicago,60636,22357,20.0,NEW CITY,False
78082,ME2014-00139,08/20/2014 02:20:00 AM,08/20/2014 03:27:00 AM,0.0,Female,Black,False,UNDETERMINED,UNDETERMINED,UNDETERMINED,...,60653,-87.619237,41.810031,"(41.81003097, -87.61923746)",Chicago,60653,26767,3.0,GRAND BOULEVARD,False
78172,ME2014-00049,08/14/2014 05:14:00 AM,08/14/2014 05:32:00 AM,32.0,Male,White,False,UNDETERMINED,MIXED DRUG INTOXICATION (PROPRANOLOL. LORAZEPA...,MIXED DRUG INTOXICATION (PROPRANOLOL. LORAZEPA...,...,60659,-87.705971,41.989193,"(41.98919257, -87.70597098)",Chicago,60618,27491,50.0,WEST RIDGE,False


In [14]:
orig.shape, new.shape

((78221, 30), (78476, 30))

Merge on case number to add the 6 Nov 23 manner of death

In [15]:
df2 = pd.merge(left = orig_unk, right = new[['CASE_NUMBER', 'MANNER_OF_DEATH']], how = 'left', left_on = 'case_number', right_on='CASE_NUMBER')

In [16]:
df2.head()

Unnamed: 0,case_number,date_of_incident,date_of_death,age,gender,race,latino,manner_of_death,primary_cause,primary_cause_line_a,...,latitude,location,residence_city,residence_zip,objectid,chicago_ward,chicago_community_area,covid_related,CASE_NUMBER,MANNER_OF_DEATH
0,ME2023-06354,10/26/2023 02:34:00 AM,10/26/2023 02:01:00 AM,,Male,Black,False,,,,...,41.885803,"(41.8858033, -87.64033894)",,,78295,34.0,NEAR WEST SIDE,False,ME2023-06354,PENDING
1,ME2023-06353,10/25/2023 10:08:00 PM,10/25/2023 09:42:00 PM,20.0,Male,Black,False,,,,...,41.600921,"(41.60092087, -87.62887866)",Glenwood,60425.0,78291,,,False,ME2023-06353,ACCIDENT
2,ME2023-06352,10/25/2023 09:28:00 PM,10/25/2023 09:04:00 PM,67.0,Male,White,False,,,,...,41.827775,"(41.82777541, -87.76964878)",Cicero,60804.0,78290,,,False,ME2023-06352,SUICIDE
3,ME2023-06351,10/25/2023 08:52:00 PM,10/25/2023 07:49:00 PM,48.0,Male,Black,False,,,,...,41.912988,"(41.91298845, -87.8044522)",CHICAGO,60707.0,78292,29.0,AUSTIN,False,ME2023-06351,PENDING
4,ME2023-06350,10/25/2023 07:21:00 PM,10/25/2023 06:47:00 PM,40.0,Male,White,False,,,,...,41.75876,"(41.75876008, -87.83110831)",Justice,60458.0,78296,,,False,ME2023-06350,PENDING


Keep those rows where the 6 Nov 23 manner of death (the 'MANNER_OF_DEATH' column) is one that we are tracking

In [29]:
serv_data = df2[df2['MANNER_OF_DEATH'].isin(['ACCIDENT', 'HOMICIDE', 'SUICIDE'])]

In [34]:
serv_data.head()

Unnamed: 0,case_number,date_of_incident,date_of_death,age,gender,race,latino,manner_of_death,primary_cause,primary_cause_line_a,...,latitude,location,residence_city,residence_zip,objectid,chicago_ward,chicago_community_area,covid_related,CASE_NUMBER,MANNER_OF_DEATH
1,ME2023-06353,10/25/2023 10:08:00 PM,10/25/2023 09:42:00 PM,20.0,Male,Black,False,,,,...,41.600921,"(41.60092087, -87.62887866)",Glenwood,60425.0,78291,,,False,ME2023-06353,ACCIDENT
2,ME2023-06352,10/25/2023 09:28:00 PM,10/25/2023 09:04:00 PM,67.0,Male,White,False,,,,...,41.827775,"(41.82777541, -87.76964878)",Cicero,60804.0,78290,,,False,ME2023-06352,SUICIDE
7,ME2023-06347,10/25/2023 03:54:00 PM,10/25/2023 03:29:00 PM,,Male,Black,False,,,,...,41.786962,"(41.7869618, -87.65565789)",,,78293,16.0,WEST ENGLEWOOD,False,ME2023-06347,HOMICIDE
10,ME2023-06344,10/22/2023 03:01:00 PM,10/25/2023 02:48:00 PM,87.0,Female,Asian,False,,,,...,41.972926,"(41.97292598, -87.66487117)",Chicago,60640.0,78287,47.0,UPTOWN,False,ME2023-06344,ACCIDENT
11,ME2023-06343,10/18/2023 02:58:00 PM,10/25/2023 01:49:00 PM,91.0,Male,White,False,,,,...,41.719392,"(41.71939185, -87.83287906)",Hickory Hills,60457.0,78282,,,False,ME2023-06343,ACCIDENT


In [32]:
serv_data.to_csv('../data/serv_data.csv')

In [35]:
serv_data[['manner_of_death', 'MANNER_OF_DEATH']]

Unnamed: 0,manner_of_death,MANNER_OF_DEATH
1,,ACCIDENT
2,,SUICIDE
7,,HOMICIDE
10,,ACCIDENT
11,,ACCIDENT
...,...,...
778,PENDING,ACCIDENT
799,PENDING,HOMICIDE
861,PENDING,SUICIDE
888,PENDING,ACCIDENT
