In [8]:
import pandas as pd
pd.set_option("display.max_rows", 1000)
from datetime import datetime, date
import requests
import json

# pull in felony intake data
url="https://datacatalog.cookcountyil.gov/resource/3k7z-hchi.json?$limit=1000000"

request = requests.get(url)
data_dict = json.loads(request.text)

felony_intakes = pd.DataFrame(data_dict)

felony_intakes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 429724 entries, 0 to 429723
Data columns (total 17 columns):
 #   Column                   Non-Null Count   Dtype 
---  ------                   --------------   ----- 
 0   case_id                  429724 non-null  object
 1   case_participant_id      429724 non-null  object
 2   received_date            429724 non-null  object
 3   offense_category         429724 non-null  object
 4   participant_status       407297 non-null  object
 5   age_at_incident          407695 non-null  object
 6   race                     416498 non-null  object
 7   gender                   418512 non-null  object
 8   incident_begin_date      411058 non-null  object
 9   law_enforcement_agency   412059 non-null  object
 10  arrest_date              389894 non-null  object
 11  felony_review_date       251224 non-null  object
 12  felony_review_result     251224 non-null  object
 13  update_offense_category  429724 non-null  object
 14  incident_city       

In [9]:
# all of the date columns in here are not date types, let's fix that
intake_cols = felony_intakes.columns

for col in intake_cols:
    if '_date' in col:
        felony_intakes[col] = pd.to_datetime(felony_intakes[col], errors='coerce')
    
felony_intakes

Unnamed: 0,case_id,case_participant_id,received_date,offense_category,participant_status,age_at_incident,race,gender,incident_begin_date,law_enforcement_agency,arrest_date,felony_review_date,felony_review_result,update_offense_category,incident_city,unit,incident_end_date
0,217923815445,209352968780,2011-05-24,PROMIS Conversion,Continued Investigation,25,Black,Male,2007-03-12,PROMIS Data Conversion,2007-04-03 00:00:00,2011-05-24,Approved,Homicide,,,NaT
1,217923815445,209353063152,2011-05-24,PROMIS Conversion,Approved,22,Black,Male,2007-03-12,PROMIS Data Conversion,2011-05-22 18:51:00,2011-05-24,Approved,Homicide,,,NaT
2,217923815445,209353157524,2011-05-24,PROMIS Conversion,Continued Investigation,19,Black,Male,2007-03-12,PROMIS Data Conversion,NaT,2011-05-24,Approved,Homicide,,,NaT
3,223435124424,219042164387,2012-01-27,PROMIS Conversion,Approved,,Black,Male,2009-06-25,CPD SPEC INV-YOUTH,2009-07-14 14:34:00,2009-07-14,Charge(S) Approved,Attempt Homicide,,,NaT
4,236914327334,218815387947,2011-01-31,PROMIS Conversion,Approved,37,HISPANIC,Male,2010-04-07,SPEC. OPERATIONS,2010-04-07 20:40:00,NaT,,Narcotics,,,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
429719,446279599359,907765702925,2021-03-31,UUW - Unlawful Use of Weapon,Approved,22,Black,Male,2021-03-31,CHICAGO PD,2021-03-31 19:57:00,NaT,,UUW - Unlawful Use of Weapon,Chicago,,NaT
429720,446279746213,907765986042,2021-03-31,UUW - Unlawful Use of Weapon,Approved,30,Black,Male,2021-03-31,COOK COUNTY SHERIFF (IL0160000),2021-03-31 21:05:00,2021-04-01,Approved,UUW - Unlawful Use of Weapon,Chicago,,NaT
429721,446279746213,907766080414,2021-03-31,UUW - Unlawful Use of Weapon,Approved,28,Black,Male,2021-03-31,COOK COUNTY SHERIFF (IL0160000),2021-03-31 21:05:00,2021-04-01,Approved,UUW - Unlawful Use of Weapon,Chicago,,NaT
429722,446279893068,907766552275,2021-03-31,UUW - Unlawful Use of Weapon,,29,Black,Male,2021-03-31,ISP DISTRICT CHICAGO,NaT,2021-04-01,Continued Investigation,UUW - Unlawful Use of Weapon,Chicago,,NaT


In [17]:
# What are the unique values in some of these categorical columns
unique_offense_cat = felony_intakes['offense_category'].unique()
unique_participant_status = felony_intakes['participant_status'].unique()
unique_enforcement_agency = felony_intakes['law_enforcement_agency'].unique()
unique_review_result = felony_intakes['felony_review_result'].unique()
unique_updated_offense = felony_intakes['update_offense_category'].unique()

In [18]:
# how does this data shake up if we restrict it to only Chicago
chicago_felonies = felony_intakes[felony_intakes['incident_city']=='Chicago']
non_chicago_felonies = felony_intakes[felony_intakes['incident_city']!='Chicago']

# Uncomment to see info
chicago_felonies.info()
#non_chicago_felonies.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 295042 entries, 7 to 429723
Data columns (total 17 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   case_id                  295042 non-null  object        
 1   case_participant_id      295042 non-null  object        
 2   received_date            295042 non-null  datetime64[ns]
 3   offense_category         295042 non-null  object        
 4   participant_status       282718 non-null  object        
 5   age_at_incident          292078 non-null  object        
 6   race                     288915 non-null  object        
 7   gender                   289602 non-null  object        
 8   incident_begin_date      294474 non-null  datetime64[ns]
 9   law_enforcement_agency   295042 non-null  object        
 10  arrest_date              273218 non-null  datetime64[ns]
 11  felony_review_date       157178 non-null  datetime64[ns]
 12  felony_review_re

In [19]:
# in chicago, what units are charging the most felonies?
chicago_unit_felonies = chicago_felonies.groupby('unit').agg('size').sort_values(ascending=False)
#chicago_unit_felonies

In [41]:
# in chicago, why are some of the 'unit' fields null?
null_units = chicago_felonies[chicago_felonies['unit'].isna()].reset_index()

# get year from created date
null_units['year'] = null_units['received_date'].dt.year

# Is it by date?
#print("Null Units by Year")
#print(null_units.groupby('year').agg('size'), "\n")

# Is it by status?
#print("Null Units by Status")
#print(null_units.groupby('participant_status').agg('size').sort_values(ascending=False), "\n")

# Is it by enforcement agency?
#print("Null Units by Enforcement Agency")
#print(null_units.groupby('law_enforcement_agency').agg('size').sort_values(ascending=False), "\n")

# Is it by review result?
#print("Null Units by Review Result")
#print(null_units.groupby('felony_review_result').agg('size').sort_values(ascending=False), "\n")

# Is it by review result?
#print("Null Units by Updated Offense")
#print(null_units.groupby('update_offense_category').agg('size').sort_values(ascending=False), "\n")


### There's about 14,000 arrests that were made in Chicago BUT NOT by CPD. Therefore, unit does not likely apply. 
### Still, null unit rate is about 40% in Chicago arrests

Null Units by Enforcement Agency
law_enforcement_agency
CHICAGO PD                                                      120827
COOK COUNTY SHERIFF (IL0160000)                                   6446
ISP DISTRICT CHICAGO                                              1370
U OF I POLICE DEPARTMENT CHICAGO                                   767
ISP DISTRICT 03                                                    726
COOK COUNTY DEPT. OF CORRECTIONS                                   470
UNIVERSITY OF CHICAGO POLICE DEPT                                  297
CICERO PD                                                          227
AMTRAK NATIONAL RAILROAD PASSENGER CORP                            222
ISP DISTRICT 01                                                    201
METRA PD                                                           196
EVERGREEN PARK PD                                                  158
COOK COUNTY SHERIFF'S FUGITIVE-WARRANTS (IL0169Q00)                148
COOK COUNTY SHERIFF'S