# Cleaning various Chicago Open Data Portal datasets
#### Abhilash Biswas
#### 11/23/2021

This file merges Census tract id from ACS dataset to the geo-coordinates of 5 datasets obtained from the Chicago open data portal. Additionally, this file also attaches census tract ID to the 84 civilian killings that has happened in Chicago till date. 

The 5 datasets extracted from Chicago open data portal are:
1. Police station locations
2. Fire station locations
3. List of public schools (as of 2019) (2016-17 data also available)
4. List of Parks maintained by Chicago Park district
5. List of all licensed commercial establishments
6. List of violent, non-violent, property and other crimes from 2015 to 2019

In addition to these 5, the police killings dataset is obtained from the MPV csv file. 

### Process
1. Get the clean ACS dataset (containing boundary polygons and census ids (geo id)) and convert it into a geopandas dataframe
2. Obtain each of the above 6 predictor datasets and convert it into geopandas dataframe
3. Merge the geo ids to each of the predictor datasets
4. Aggregate the information for each predictor at a census tract level
5. Combine all the datasets into 1

In [109]:
#Import all packages
import pandas as pd
import censusdata
from tabulate import tabulate
import matplotlib.pyplot as plt
from sodapy import Socrata
import geopandas as gpd
from shapely import wkt
import json
import requests
from pyprojroot import here



# Census tract information

In [110]:
#Get the cleaned csv
acs = pd.read_csv(here('./data/CleanACSFile.csv'))

#Convert it into a geopandas dataframe
acs['geometry'] = acs['geometry'].apply(wkt.loads)
gdf_acs = gpd.GeoDataFrame(acs, crs = 'epsg:4326')

gdf_acs

Unnamed: 0,geo_id,B01001_001E,DP02_0002PE,DP02_0004PE,DP02_0006PE,DP02_0010PE,DP02_0014PE,DP02_0015PE,DP02_0016E,DP02_0017E,...,DP05_0018E,DP05_0019PE,DP05_0024PE,DP05_0037PE,DP05_0038PE,DP05_0044PE,DP05_0058PE,DP05_0071PE,geometry,GEOID10
0,1400000US17031010100,4599.0,23.8,2.5,39.5,34.2,21.5,8.7,1.89,3.05,...,35.6,19.9,6.0,46.7,45.2,1.0,3.4,11.4,"MULTIPOLYGON (((-87.67720 42.02294, -87.67007 ...",17031010100
1,1400000US17031010201,7455.0,33.7,7.2,28.3,30.8,28.2,14.9,2.65,3.50,...,34.8,25.6,6.8,46.4,33.8,4.0,8.0,22.4,"MULTIPOLYGON (((-87.68465 42.01949, -87.68045 ...",17031010201
2,1400000US17031010202,2896.0,23.1,13.6,23.0,40.4,26.9,17.9,2.27,3.31,...,35.0,20.3,13.1,46.7,33.9,5.4,1.4,26.0,"MULTIPOLYGON (((-87.67685 42.01941, -87.67339 ...",17031010202
3,1400000US17031010300,6485.0,25.3,7.4,25.2,42.1,17.0,18.7,1.80,2.79,...,42.2,14.5,18.5,59.6,30.9,1.0,4.2,16.9,"MULTIPOLYGON (((-87.67133 42.01937, -87.66950 ...",17031010300
4,1400000US17031010400,5213.0,17.4,5.7,36.4,40.5,12.5,10.7,1.82,2.93,...,25.2,10.7,5.0,70.8,21.3,4.6,1.8,7.5,"MULTIPOLYGON (((-87.66345 42.01283, -87.66133 ...",17031010400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
793,1400000US17031843500,10169.0,33.0,22.8,25.6,18.6,50.9,10.2,3.58,4.03,...,29.9,3.7,2.2,31.0,63.4,0.3,0.0,26.3,"MULTIPOLYGON (((-87.70504 41.84452, -87.70258 ...",17031843500
794,1400000US17031843600,2898.0,12.5,8.8,18.9,59.7,28.4,20.9,2.06,2.90,...,33.6,26.9,11.7,7.6,87.6,0.9,2.4,7.9,"MULTIPOLYGON (((-87.61150 41.81128, -87.60661 ...",17031843600
795,1400000US17031843700,2527.0,51.3,5.6,17.9,25.1,39.6,22.4,2.55,3.50,...,35.8,27.9,9.8,80.0,4.0,6.7,6.4,25.7,"MULTIPOLYGON (((-87.69676 41.95046, -87.69445 ...",17031843700
796,1400000US17031843800,1520.0,19.8,9.5,31.9,38.8,32.4,32.5,2.23,3.04,...,39.9,22.4,17.4,25.8,66.4,7.3,0.5,7.0,"MULTIPOLYGON (((-87.64554 41.80886, -87.64068 ...",17031843800


# Chicago police district station locations

In [111]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofchicago.org", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cityofchicago.org,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("z8bn-74gv", limit=2000)

# Convert to pandas DataFrame
police_stations = pd.DataFrame.from_records(results)



In [112]:
police_stations['lat'] = ''
police_stations['long'] = ''


for i in range(0,len(police_stations)):
    police_stations['lat'].iloc[i] = police_stations['location'].iloc[i]['latitude']
    police_stations['long'].iloc[i] = police_stations['location'].iloc[i]['longitude']
    

    
police_stations = police_stations[['district','district_name','zip','location','lat','long']]
police_stations

Unnamed: 0,district,district_name,zip,location,lat,long
0,Headquarters,Headquarters,60653,"{'latitude': '41.8307016873', 'longitude': '-8...",41.8307016873,-87.6233953459
1,18,Near North,60610,"{'latitude': '41.9032416531', 'longitude': '-8...",41.9032416531,-87.6433521393
2,19,Town Hall,60613,"{'latitude': '41.9474004564', 'longitude': '-8...",41.9474004564,-87.651512018
3,20,Lincoln,60625,"{'latitude': '41.9795495131', 'longitude': '-8...",41.9795495131,-87.6928445094
4,22,Morgan Park,60643,"{'latitude': '41.6914347795', 'longitude': '-8...",41.6914347795,-87.6685203937
5,24,Rogers Park,60626,"{'latitude': '41.9997634842', 'longitude': '-8...",41.9997634842,-87.6713242922
6,25,Grand Central,60639,"{'latitude': '41.9186088912', 'longitude': '-8...",41.9186088912,-87.765574479
7,1,Central,60616,"{'latitude': '41.8583725929', 'longitude': '-8...",41.8583725929,-87.627356171
8,2,Wentworth,60609,"{'latitude': '41.8018110912', 'longitude': '-8...",41.8018110912,-87.6305601801
9,3,Grand Crossing,60637,"{'latitude': '41.7664308925', 'longitude': '-8...",41.7664308925,-87.6057478606


In [113]:
#Attach geo ids
gdf_ps = gpd.GeoDataFrame(
    police_stations, geometry=gpd.points_from_xy(police_stations.long, police_stations.lat), crs = 'epsg:4326')

ps_acs = gpd.sjoin(gdf_ps, gdf_acs[['geo_id','geometry']], how='left' )

ps_acs

Unnamed: 0,district,district_name,zip,location,lat,long,geometry,index_right,geo_id
0,Headquarters,Headquarters,60653,"{'latitude': '41.8307016873', 'longitude': '-8...",41.8307016873,-87.6233953459,POINT (-87.62340 41.83070),399,1400000US17031351400
1,18,Near North,60610,"{'latitude': '41.9032416531', 'longitude': '-8...",41.9032416531,-87.6433521393,POINT (-87.64335 41.90324),781,1400000US17031842200
2,19,Town Hall,60613,"{'latitude': '41.9474004564', 'longitude': '-8...",41.9474004564,-87.651512018,POINT (-87.65151 41.94740),83,1400000US17031061000
3,20,Lincoln,60625,"{'latitude': '41.9795495131', 'longitude': '-8...",41.9795495131,-87.6928445094,POINT (-87.69284 41.97955),55,1400000US17031040202
4,22,Morgan Park,60643,"{'latitude': '41.6914347795', 'longitude': '-8...",41.6914347795,-87.6685203937,POINT (-87.66852 41.69143),678,1400000US17031750500
5,24,Rogers Park,60626,"{'latitude': '41.9997634842', 'longitude': '-8...",41.9997634842,-87.6713242922,POINT (-87.67132 41.99976),689,1400000US17031830600
6,25,Grand Central,60639,"{'latitude': '41.9186088912', 'longitude': '-8...",41.9186088912,-87.765574479,POINT (-87.76557 41.91861),230,1400000US17031191200
7,1,Central,60616,"{'latitude': '41.8583725929', 'longitude': '-8...",41.8583725929,-87.627356171,POINT (-87.62736 41.85837),390,1400000US17031330200
8,2,Wentworth,60609,"{'latitude': '41.8018110912', 'longitude': '-8...",41.8018110912,-87.6305601801,POINT (-87.63056 41.80181),728,1400000US17031835600
9,3,Grand Crossing,60637,"{'latitude': '41.7664308925', 'longitude': '-8...",41.7664308925,-87.6057478606,POINT (-87.60575 41.76643),440,1400000US17031421200


In [114]:
#Aggregate information at a census tract level
ps_acs['count'] = 1
ps_acs = ps_acs.groupby(['geo_id'],as_index = False)['count'].sum()
ps_acs.rename(columns = {"count":"police_stations"}, inplace = True)
ps_acs

Unnamed: 0,geo_id,police_stations
0,1400000US17031040202,1
1,1400000US17031061000,1
2,1400000US17031110400,1
3,1400000US17031140500,1
4,1400000US17031191200,1
5,1400000US17031221300,1
6,1400000US17031252101,1
7,1400000US17031330200,1
8,1400000US17031351400,1
9,1400000US17031421200,1


# Fire station locations

In [115]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofchicago.org", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cityofchicago.org,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("28km-gtjn", limit=2000)

# Convert to pandas DataFrame
fire_stations_locations = pd.DataFrame.from_records(results)




In [116]:
fire_stations_locations['lat'] = ''
fire_stations_locations['long'] = ''


for i in range(0,len(fire_stations_locations)):
    fire_stations_locations['lat'].iloc[i] = fire_stations_locations['location'].iloc[i]['latitude']
    fire_stations_locations['long'].iloc[i] = fire_stations_locations['location'].iloc[i]['longitude']
    

    
fire_stations_locations = fire_stations_locations[['name','address','zip','location','lat','long']]
fire_stations_locations

Unnamed: 0,name,address,zip,location,lat,long
0,E5,324 S DESPLAINES ST,60661,"{'latitude': '41.877028304420755', 'longitude'...",41.877028304420755,-87.64430865193455
1,E11,5343 N CUMBERLAND AVE,60656,"{'latitude': '41.97685625348317', 'longitude':...",41.97685625348317,-87.836495886321
2,E81,10458 S HOXIE AVE,60617,"{'latitude': '41.705334319654064', 'longitude'...",41.705334319654064,-87.56088524816063
3,E22,605 W ARMITAGE AVE,60614,"{'latitude': '41.91792047709303', 'longitude':...",41.91792047709303,-87.64396690956342
4,E50,5000 S UNION AVE,60609,"{'latitude': '41.80344788181221', 'longitude':...",41.80344788181221,-87.64299386409898
...,...,...,...,...,...,...
87,E19,3421 S CALUMET AVE,60616,"{'latitude': '41.83227804024279', 'longitude':...",41.83227804024279,-87.61779663851078
88,E26,10 N LEAVITT ST,60612,"{'latitude': '41.88151592134697', 'longitude':...",41.88151592134697,-87.68185534665783
89,E82,817 E 91ST ST,60619,"{'latitude': '41.72933327959225', 'longitude':...",41.72933327959225,-87.60425730151255
90,E34,4034 W 47TH ST,60632,"{'latitude': '41.80790024096418', 'longitude':...",41.80790024096418,-87.72485128276466


In [117]:
#Attach geo ids
gdf_fs = gpd.GeoDataFrame(
    fire_stations_locations, geometry=gpd.points_from_xy(fire_stations_locations.long, fire_stations_locations.lat), 
    crs = 'epsg:4326')

fs_acs = gpd.sjoin(gdf_fs, gdf_acs[['geo_id','geometry']], how='left' )

fs_acs

Unnamed: 0,name,address,zip,location,lat,long,geometry,index_right,geo_id
0,E5,324 S DESPLAINES ST,60661,"{'latitude': '41.877028304420755', 'longitude'...",41.877028304420755,-87.64430865193455,POINT (-87.64431 41.87703),353,1400000US17031281900
1,E11,5343 N CUMBERLAND AVE,60656,"{'latitude': '41.97685625348317', 'longitude':...",41.97685625348317,-87.836495886321,POINT (-87.83650 41.97686),150,1400000US17031100500
2,E81,10458 S HOXIE AVE,60617,"{'latitude': '41.705334319654064', 'longitude'...",41.705334319654064,-87.56088524816063,POINT (-87.56089 41.70533),753,1400000US17031838800
3,E22,605 W ARMITAGE AVE,60614,"{'latitude': '41.91792047709303', 'longitude':...",41.91792047709303,-87.64396690956342,POINT (-87.64397 41.91792),124,1400000US17031071800
4,E50,5000 S UNION AVE,60609,"{'latitude': '41.80344788181221', 'longitude':...",41.80344788181221,-87.64299386409898,POINT (-87.64299 41.80345),796,1400000US17031843800
...,...,...,...,...,...,...,...,...,...
87,E19,3421 S CALUMET AVE,60616,"{'latitude': '41.83227804024279', 'longitude':...",41.83227804024279,-87.61779663851078,POINT (-87.61780 41.83228),757,1400000US17031839500
88,E26,10 N LEAVITT ST,60612,"{'latitude': '41.88151592134697', 'longitude':...",41.88151592134697,-87.68185534665783,POINT (-87.68186 41.88152),746,1400000US17031837800
89,E82,817 E 91ST ST,60619,"{'latitude': '41.72933327959225', 'longitude':...",41.72933327959225,-87.60425730151255,POINT (-87.60426 41.72933),462,1400000US17031440800
90,E34,4034 W 47TH ST,60632,"{'latitude': '41.80790024096418', 'longitude':...",41.80790024096418,-87.72485128276466,POINT (-87.72485 41.80790),527,1400000US17031570200


In [118]:
#Aggregate information at a census tract level
fs_acs['count'] = 1
fs_acs = fs_acs.groupby(['geo_id'],as_index = False)['count'].sum()
fs_acs.rename(columns = {"count":"fire_stations"}, inplace = True)
fs_acs

Unnamed: 0,geo_id,fire_stations
0,1400000US17031010202,1
1,1400000US17031020802,1
2,1400000US17031030300,1
3,1400000US17031031700,1
4,1400000US17031040100,1
...,...,...
85,1400000US17031843300,1
86,1400000US17031843600,1
87,1400000US17031843700,1
88,1400000US17031843800,1


# Public Schools (as of 2019)

In [119]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofchicago.org", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cityofchicago.org,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("tz49-n8ze", limit=2000)

# Convert to pandas DataFrame
schools = pd.DataFrame.from_records(results)




In [120]:
schools = schools.rename(columns = {'x':'long','y':'lat'})
schools = schools[['school_id','school_nm','sch_type','lat','long']]
schools

Unnamed: 0,school_id,school_nm,sch_type,lat,long
0,610587,DYETT ARTS HS,Traditional,41.80120417,-87.61223911
1,400111,LEARN - PERKINS,Traditional,41.74312177,-87.66572106
2,610568,PATHWAYS - AVONDALE HS,Options,41.93943321,-87.70520632
3,610027,KIPLING,Traditional,41.72362691,-87.63952072
4,609712,HIRSCH HS,Traditional,41.75374796,-87.60172727
...,...,...,...,...,...
649,610200,THORP J,Traditional,41.73332424,-87.54427998
650,610139,PULLMAN,Traditional,41.68881935,-87.60943097
651,610026,KINZIE,Traditional,41.78996463,-87.7794826
652,609844,CARTER,Traditional,41.78982791,-87.62245275


In [121]:
#Attach geo ids
gdf_school = gpd.GeoDataFrame(
    schools, geometry=gpd.points_from_xy(schools.long, schools.lat), 
    crs = 'epsg:4326')

schools_acs = gpd.sjoin(gdf_school, gdf_acs[['geo_id','geometry']], how='left' )

schools_acs

Unnamed: 0,school_id,school_nm,sch_type,lat,long,geometry,index_right,geo_id
0,610587,DYETT ARTS HS,Traditional,41.80120417,-87.61223911,POINT (-87.61224 41.80120),733.0,1400000US17031836100
1,400111,LEARN - PERKINS,Traditional,41.74312177,-87.66572106,POINT (-87.66572 41.74312),651.0,1400000US17031711200
2,610568,PATHWAYS - AVONDALE HS,Options,41.93943321,-87.70520632,POINT (-87.70521 41.93943),238.0,1400000US17031210100
3,610027,KIPLING,Traditional,41.72362691,-87.63952072,POINT (-87.63952 41.72363),662.0,1400000US17031730100
4,609712,HIRSCH HS,Traditional,41.75374796,-87.60172727,POINT (-87.60173 41.75375),631.0,1400000US17031691500
...,...,...,...,...,...,...,...,...
649,610200,THORP J,Traditional,41.73332424,-87.54427998,POINT (-87.54428 41.73332),714.0,1400000US17031833900
650,610139,PULLMAN,Traditional,41.68881935,-87.60943097,POINT (-87.60943 41.68882),495.0,1400000US17031500300
651,610026,KINZIE,Traditional,41.78996463,-87.7794826,POINT (-87.77948 41.78996),525.0,1400000US17031561100
652,609844,CARTER,Traditional,41.78982791,-87.62245275,POINT (-87.62245 41.78983),420.0,1400000US17031400500


In [122]:
#Aggregate information at a census tract level
schools_acs['count'] = 1
schools_acs = schools_acs.groupby(['geo_id'],as_index = False)['count'].sum()
schools_acs.rename(columns = {"count":"public_schools"}, inplace = True)
schools_acs

Unnamed: 0,geo_id,public_schools
0,1400000US17031010100,1
1,1400000US17031010201,1
2,1400000US17031010202,1
3,1400000US17031010600,2
4,1400000US17031010702,1
...,...,...
445,1400000US17031843200,1
446,1400000US17031843300,2
447,1400000US17031843500,1
448,1400000US17031843700,1


# Parks (maintained by Chicago Park district)

In [123]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofchicago.org", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cityofchicago.org,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("y7qa-tvqx", limit=5000)

# Convert to pandas DataFrame
parks = pd.DataFrame.from_records(results)



In [124]:
parks['lat'] = ''
parks['long'] = ''


for i in range(0,len(parks)):
    parks['lat'].iloc[i] = parks['location'].iloc[i]['latitude']
    parks['long'].iloc[i] = parks['location'].iloc[i]['longitude']
    

    
parks = parks[['park','park_number','location','lat','long']]
parks = parks.drop_duplicates(subset = ['park_number'])

parks

Unnamed: 0,park,park_number,location,lat,long
0,ABBOTT (ROBERT),259,"{'latitude': '41.72096', 'longitude': '-87.621...",41.72096,-87.621351
10,ADA (SAWYER GARRETT),45,"{'latitude': '41.687785', 'longitude': '-87.65...",41.687785,-87.655389
26,ADAMS (GEORGE & ADELE),1019,"{'latitude': '41.91689', 'longitude': '-87.655...",41.91689,-87.655092
28,AIELLO (JOHN),1280,"{'latitude': '41.919151', 'longitude': '-87.77...",41.919151,-87.776356
29,ALGONQUIN,1161,"{'latitude': '41.935202', 'longitude': '-87.69...",41.935202,-87.694918
...,...,...,...,...,...
3997,ROWAN (WILLIAM),248,"{'latitude': '41.686061', 'longitude': '-87.53...",41.686061,-87.538167
4022,RUTHERFORD SAYRE,127,"{'latitude': '41.920557', 'longitude': '-87.79...",41.920557,-87.795929
4051,SCHAEFER (EDWARD),1148,"{'latitude': '41.925746', 'longitude': '-87.66...",41.925746,-87.669035
4060,SENECA,1242,"{'latitude': '41.897006', 'longitude': '-87.62...",41.897006,-87.622414


In [125]:
#Attach geo ids
gdf_parks = gpd.GeoDataFrame(
    parks, geometry=gpd.points_from_xy(parks.long, parks.lat), 
    crs = 'epsg:4326')

parks_acs = gpd.sjoin(gdf_parks, gdf_acs[['geo_id','geometry']], how='left' )

parks_acs

Unnamed: 0,park,park_number,location,lat,long,geometry,index_right,geo_id
0,ABBOTT (ROBERT),259,"{'latitude': '41.72096', 'longitude': '-87.621...",41.72096,-87.621351,POINT (-87.62135 41.72096),482.0,1400000US17031490500
10,ADA (SAWYER GARRETT),45,"{'latitude': '41.687785', 'longitude': '-87.65...",41.687785,-87.655389,POINT (-87.65539 41.68778),679.0,1400000US17031750600
26,ADAMS (GEORGE & ADELE),1019,"{'latitude': '41.91689', 'longitude': '-87.655...",41.91689,-87.655092,POINT (-87.65509 41.91689),709.0,1400000US17031832600
28,AIELLO (JOHN),1280,"{'latitude': '41.919151', 'longitude': '-87.77...",41.919151,-87.776356,POINT (-87.77636 41.91915),231.0,1400000US17031191301
29,ALGONQUIN,1161,"{'latitude': '41.935202', 'longitude': '-87.69...",41.935202,-87.694918,POINT (-87.69492 41.93520),246.0,1400000US17031210900
...,...,...,...,...,...,...,...,...
3997,ROWAN (WILLIAM),248,"{'latitude': '41.686061', 'longitude': '-87.53...",41.686061,-87.538167,POINT (-87.53817 41.68606),504.0,1400000US17031520600
4022,RUTHERFORD SAYRE,127,"{'latitude': '41.920557', 'longitude': '-87.79...",41.920557,-87.795929,POINT (-87.79593 41.92056),699.0,1400000US17031831600
4051,SCHAEFER (EDWARD),1148,"{'latitude': '41.925746', 'longitude': '-87.66...",41.925746,-87.669035,POINT (-87.66903 41.92575),115.0,1400000US17031070700
4060,SENECA,1242,"{'latitude': '41.897006', 'longitude': '-87.62...",41.897006,-87.622414,POINT (-87.62241 41.89701),134.0,1400000US17031081300


In [126]:
#Aggregate information at a census tract level
parks_acs['count'] = 1
parks_acs = parks_acs.groupby(['geo_id'],as_index = False)['count'].sum()
parks_acs.rename(columns = {"count":"parks"}, inplace = True)
parks_acs

Unnamed: 0,geo_id,parks
0,1400000US17031010100,3
1,1400000US17031010201,1
2,1400000US17031010202,2
3,1400000US17031010300,1
4,1400000US17031010400,3
...,...,...
388,1400000US17031843100,1
389,1400000US17031843600,1
390,1400000US17031843700,2
391,1400000US17031843800,1


# Commerical establishments

In [127]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofchicago.org", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cityofchicago.org,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("uupf-x98q", limit=60000)

# Convert to pandas DataFrame
comm_est = pd.DataFrame.from_records(results)



In [128]:
comm_est.dropna(subset = ['location'], inplace = True)

comm_est['lat'] = ''
comm_est['long'] = ''

#Loop takes some time to run
for i in range(0,len(comm_est)):
    comm_est['lat'].iloc[i] = comm_est['location'].iloc[i]['latitude']
    comm_est['long'].iloc[i] = comm_est['location'].iloc[i]['longitude']
    

    
comm_est = comm_est[['zip_code','license_description','license_id','police_district','location','lat','long']]
comm_est

Unnamed: 0,zip_code,license_description,license_id,police_district,location,lat,long
0,60618,Limited Business License,2791110,,"{'latitude': '41.93702315598332', 'human_addre...",41.93702315598332,-87.69508863011075
1,60640,Manufacturing Establishments,2787279,19,"{'latitude': '41.964276736933826', 'human_addr...",41.964276736933826,-87.67474581759035
2,60640,Motor Vehicle Services License,2788133,20,"{'latitude': '41.97237520416441', 'human_addre...",41.97237520416441,-87.66804145559702
3,60657,Regulated Business License,2797190,,"{'latitude': '41.939678060057524', 'human_addr...",41.939678060057524,-87.67329284574338
4,60611,Limited Business License,2769315,18,"{'latitude': '41.89497951109999', 'human_addre...",41.89497951109999,-87.62439997045513
...,...,...,...,...,...,...,...
54882,60611,Valet Parking Operator,2785820,18,"{'latitude': '41.89475436857259', 'human_addre...",41.89475436857259,-87.6243943466027
54883,60601,Valet Parking Operator,2791661,1,"{'latitude': '41.88637481521078', 'human_addre...",41.88637481521078,-87.6246754356595
54884,60608,Commercial Garage,2797818,12,"{'latitude': '41.85598353104698', 'human_addre...",41.85598353104698,-87.67314851164002
54885,60603,Valet Parking Operator,2802760,1,"{'latitude': '41.88066457839605', 'human_addre...",41.88066457839605,-87.6270893644417


In [129]:
#Attach geo ids
gdf_comm_est = gpd.GeoDataFrame(
    comm_est, geometry=gpd.points_from_xy(comm_est.long, comm_est.lat), 
    crs = 'epsg:4326')

comm_est_acs = gpd.sjoin(gdf_comm_est, gdf_acs[['geo_id','geometry']], how='left' )

comm_est_acs

Unnamed: 0,zip_code,license_description,license_id,police_district,location,lat,long,geometry,index_right,geo_id
0,60618,Limited Business License,2791110,,"{'latitude': '41.93702315598332', 'human_addre...",41.93702315598332,-87.69508863011075,POINT (-87.69509 41.93702),246.0,1400000US17031210900
1,60640,Manufacturing Establishments,2787279,19,"{'latitude': '41.964276736933826', 'human_addr...",41.964276736933826,-87.67474581759035,POINT (-87.67475 41.96428),691.0,1400000US17031830800
2,60640,Motor Vehicle Services License,2788133,20,"{'latitude': '41.97237520416441', 'human_addre...",41.97237520416441,-87.66804145559702,POINT (-87.66804 41.97238),42.0,1400000US17031031000
3,60657,Regulated Business License,2797190,,"{'latitude': '41.939678060057524', 'human_addr...",41.939678060057524,-87.67329284574338,POINT (-87.67329 41.93968),95.0,1400000US17031062500
4,60611,Limited Business License,2769315,18,"{'latitude': '41.89497951109999', 'human_addre...",41.89497951109999,-87.62439997045513,POINT (-87.62440 41.89498),138.0,1400000US17031081500
...,...,...,...,...,...,...,...,...,...,...
54882,60611,Valet Parking Operator,2785820,18,"{'latitude': '41.89475436857259', 'human_addre...",41.89475436857259,-87.6243943466027,POINT (-87.62439 41.89475),138.0,1400000US17031081500
54883,60601,Valet Parking Operator,2791661,1,"{'latitude': '41.88637481521078', 'human_addre...",41.88637481521078,-87.6246754356595,POINT (-87.62468 41.88637),386.0,1400000US17031320100
54884,60608,Commercial Garage,2797818,12,"{'latitude': '41.85598353104698', 'human_addre...",41.85598353104698,-87.67314851164002,POINT (-87.67315 41.85598),385.0,1400000US17031310900
54885,60603,Valet Parking Operator,2802760,1,"{'latitude': '41.88066457839605', 'human_addre...",41.88066457839605,-87.6270893644417,POINT (-87.62709 41.88066),387.0,1400000US17031320400


In [130]:
#Aggregate information at a census tract level
comm_est_acs['count'] = 1
comm_est_acs = comm_est_acs.groupby(['geo_id'],as_index = False)['count'].sum()
comm_est_acs.rename(columns = {"count":"commercial_establishments"}, inplace = True)
comm_est_acs

Unnamed: 0,geo_id,commercial_establishments
0,1400000US17031010100,24
1,1400000US17031010201,30
2,1400000US17031010202,58
3,1400000US17031010300,71
4,1400000US17031010400,41
...,...,...
788,1400000US17031843500,121
789,1400000US17031843600,42
790,1400000US17031843700,117
791,1400000US17031843800,31


# Police Killings

In [131]:
police_deaths = pd.read_csv(here("./data/raw/police_killings_MPV.csv"))

df_mask = police_deaths['City'] == "Chicago"
police_deaths = police_deaths[df_mask]

police_deaths = police_deaths[['Street Address of Incident','City','Zipcode','Agency responsible for death','Cause of death','MPV ID']]

police_deaths

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,Street Address of Incident,City,Zipcode,Agency responsible for death,Cause of death,MPV ID
1,4900 South Lavergne Avenue,Chicago,60638.0,Chicago Police Department,Gunshot,8446
14,1300 block West 19th Street,Chicago,60608.0,Chicago Police Department,Gunshot,8438
663,3600 North Ashland Avenue,Chicago,60613.0,Chicago Police Department,Gunshot,7780
769,2660 East 79th Street,Chicago,60649.0,Chicago Police Department,"Gunshot, Taser",7676
772,2100 North McVicker Ave,Chicago,60639.0,Chicago Police Department,Gunshot,7670
...,...,...,...,...,...,...
7907,W 18th St & S Springfield Ave,Chicago,60623.0,Chicago Police Department,Gunshot,518
8061,1300 South Independence Boulevard,Chicago,60623.0,Chicago Police Department,Gunshot,370
8212,3300 West Wilson Avenue,Chicago,60625.0,Chicago Police Department,Gunshot,217
8221,200 North Homan Avenue,Chicago,60624.0,Chicago Police Department,Gunshot,208


In [132]:
#Getting geo-coordinates for the street addresses
google_apikey = 'AIzaSyDitOkTVs4g0ibg_Yt04DQqLaUYlxZ1o30'

#Defining the function that gets the lat long associated with a street address using google API

def getAddressCoords(input_address, api_key = google_apikey):
    params = {'key' : api_key,
              'address' : input_address}
    url = 'https://maps.googleapis.com/maps/api/geocode/json?'
    response = requests.get(url, params)
    result = json.loads(response.text)
    
    # Check these error codes again - there may be more
    if result['status'] not in ['INVALID_REQUEST', 'ZERO_RESULTS']:
                
        lat = result['results'][0]['geometry']['location']['lat']
        long = result['results'][0]['geometry']['location']['lng']
        place_id = result['results'][0]['place_id']

        return {"lat":lat, "long":long}
    
    # Flagging if there was an error
    else:
        return "Invalid address"
    
#Applying the function to police_deaths dataframe
police_deaths['place_coords'] = police_deaths[['Street Address of Incident']].apply(getAddressCoords, axis=1)

police_deaths

Unnamed: 0,Street Address of Incident,City,Zipcode,Agency responsible for death,Cause of death,MPV ID,place_coords
1,4900 South Lavergne Avenue,Chicago,60638.0,Chicago Police Department,Gunshot,8446,Invalid address
14,1300 block West 19th Street,Chicago,60608.0,Chicago Police Department,Gunshot,8438,"{'lat': 41.8561005, 'long': -87.65881279999999}"
663,3600 North Ashland Avenue,Chicago,60613.0,Chicago Police Department,Gunshot,7780,"{'lat': 41.9473431, 'long': -87.6693382}"
769,2660 East 79th Street,Chicago,60649.0,Chicago Police Department,"Gunshot, Taser",7676,"{'lat': 41.7521292, 'long': -87.5591115}"
772,2100 North McVicker Ave,Chicago,60639.0,Chicago Police Department,Gunshot,7670,"{'lat': 41.9180154, 'long': -87.77708539999999}"
...,...,...,...,...,...,...,...
7907,W 18th St & S Springfield Ave,Chicago,60623.0,Chicago Police Department,Gunshot,518,"{'lat': 41.8570763, 'long': -87.7223995}"
8061,1300 South Independence Boulevard,Chicago,60623.0,Chicago Police Department,Gunshot,370,"{'lat': 41.8642567, 'long': -87.7204673}"
8212,3300 West Wilson Avenue,Chicago,60625.0,Chicago Police Department,Gunshot,217,"{'lat': 41.96487219999999, 'long': -87.7110163..."
8221,200 North Homan Avenue,Chicago,60624.0,Chicago Police Department,Gunshot,208,"{'lat': 41.8847163, 'long': -87.71110279999999}"


In [133]:
#Creating a lat long column

police_deaths = police_deaths[police_deaths['place_coords'] != 'Invalid address']

police_deaths['lat'] = police_deaths['place_coords'].apply(lambda x: x.get('lat'))
police_deaths['long'] = police_deaths['place_coords'].apply(lambda x: x.get('long'))

police_deaths

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  police_deaths['lat'] = police_deaths['place_coords'].apply(lambda x: x.get('lat'))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  police_deaths['long'] = police_deaths['place_coords'].apply(lambda x: x.get('long'))


Unnamed: 0,Street Address of Incident,City,Zipcode,Agency responsible for death,Cause of death,MPV ID,place_coords,lat,long
14,1300 block West 19th Street,Chicago,60608.0,Chicago Police Department,Gunshot,8438,"{'lat': 41.8561005, 'long': -87.65881279999999}",41.856100,-87.658813
663,3600 North Ashland Avenue,Chicago,60613.0,Chicago Police Department,Gunshot,7780,"{'lat': 41.9473431, 'long': -87.6693382}",41.947343,-87.669338
769,2660 East 79th Street,Chicago,60649.0,Chicago Police Department,"Gunshot, Taser",7676,"{'lat': 41.7521292, 'long': -87.5591115}",41.752129,-87.559112
772,2100 North McVicker Ave,Chicago,60639.0,Chicago Police Department,Gunshot,7670,"{'lat': 41.9180154, 'long': -87.77708539999999}",41.918015,-87.777085
916,4318 W Irving Park Rd,Chicago,60641.0,Des Plaines Police Department,Gunshot,7526,"{'lat': 41.9537193, 'long': -87.7362994}",41.953719,-87.736299
...,...,...,...,...,...,...,...,...,...
7907,W 18th St & S Springfield Ave,Chicago,60623.0,Chicago Police Department,Gunshot,518,"{'lat': 41.8570763, 'long': -87.7223995}",41.857076,-87.722399
8061,1300 South Independence Boulevard,Chicago,60623.0,Chicago Police Department,Gunshot,370,"{'lat': 41.8642567, 'long': -87.7204673}",41.864257,-87.720467
8212,3300 West Wilson Avenue,Chicago,60625.0,Chicago Police Department,Gunshot,217,"{'lat': 41.96487219999999, 'long': -87.7110163...",41.964872,-87.711016
8221,200 North Homan Avenue,Chicago,60624.0,Chicago Police Department,Gunshot,208,"{'lat': 41.8847163, 'long': -87.71110279999999}",41.884716,-87.711103


In [134]:
#Attach geo ids
gdf_police_deaths = gpd.GeoDataFrame(
    police_deaths, geometry=gpd.points_from_xy(police_deaths.long, police_deaths.lat), 
    crs = 'epsg:4326')

police_deaths_acs = gpd.sjoin(gdf_police_deaths, gdf_acs[['geo_id','geometry']], how='left' )

police_deaths_acs

Unnamed: 0,Street Address of Incident,City,Zipcode,Agency responsible for death,Cause of death,MPV ID,place_coords,lat,long,geometry,index_right,geo_id
14,1300 block West 19th Street,Chicago,60608.0,Chicago Police Department,Gunshot,8438,"{'lat': 41.8561005, 'long': -87.65881279999999}",41.856100,-87.658813,POINT (-87.65881 41.85610),382.0,1400000US17031310600
663,3600 North Ashland Avenue,Chicago,60613.0,Chicago Police Department,Gunshot,7780,"{'lat': 41.9473431, 'long': -87.6693382}",41.947343,-87.669338,POINT (-87.66934 41.94734),78.0,1400000US17031060300
769,2660 East 79th Street,Chicago,60649.0,Chicago Police Department,"Gunshot, Taser",7676,"{'lat': 41.7521292, 'long': -87.5591115}",41.752129,-87.559112,POINT (-87.55911 41.75213),452.0,1400000US17031431301
772,2100 North McVicker Ave,Chicago,60639.0,Chicago Police Department,Gunshot,7670,"{'lat': 41.9180154, 'long': -87.77708539999999}",41.918015,-87.777085,POINT (-87.77709 41.91802),231.0,1400000US17031191301
916,4318 W Irving Park Rd,Chicago,60641.0,Des Plaines Police Department,Gunshot,7526,"{'lat': 41.9537193, 'long': -87.7362994}",41.953719,-87.736299,POINT (-87.73630 41.95372),191.0,1400000US17031160200
...,...,...,...,...,...,...,...,...,...,...,...,...
7907,W 18th St & S Springfield Ave,Chicago,60623.0,Chicago Police Department,Gunshot,518,"{'lat': 41.8570763, 'long': -87.7223995}",41.857076,-87.722399,POINT (-87.72240 41.85708),363.0,1400000US17031292400
8061,1300 South Independence Boulevard,Chicago,60623.0,Chicago Police Department,Gunshot,370,"{'lat': 41.8642567, 'long': -87.7204673}",41.864257,-87.720467,POINT (-87.72047 41.86426),752.0,1400000US17031838700
8212,3300 West Wilson Avenue,Chicago,60625.0,Chicago Police Department,Gunshot,217,"{'lat': 41.96487219999999, 'long': -87.7110163...",41.964872,-87.711016,POINT (-87.71102 41.96487),175.0,1400000US17031140702
8221,200 North Homan Avenue,Chicago,60624.0,Chicago Police Department,Gunshot,208,"{'lat': 41.8847163, 'long': -87.71110279999999}",41.884716,-87.711103,POINT (-87.71110 41.88472),740.0,1400000US17031836800


In [135]:
#Aggregate information at a census tract level
police_deaths_acs['count'] = 1
police_deaths_acs = police_deaths_acs.groupby(['geo_id'],as_index = False)['count'].sum()
police_deaths_acs.rename(columns = {"count":"number_of_police_killings"}, inplace = True)
police_deaths_acs

Unnamed: 0,geo_id,number_of_police_killings
0,1400000US17031020801,1
1,1400000US17031060300,1
2,1400000US17031071500,1
3,1400000US17031110400,1
4,1400000US17031140702,1
...,...,...
64,1400000US17031838700,2
65,1400000US17031839800,1
66,1400000US17031843000,2
67,1400000US17031843400,1


# Reported crimes (2015-2019)

### 2015

In [136]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofchicago.org", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cityofchicago.org,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("vwwp-7yr9", limit=270000)

# Convert to pandas DataFrame
crimes_2015 = pd.DataFrame.from_records(results)



In [137]:
#Categorising crimes into violent, non violent and property crimes (mutually exclusive)
crimes_2015['violent'] = 0
crimes_2015['non-violent'] = 0
crimes_2015['property'] = 0
crimes_2015['other_crimes'] = 0

for i in range(0,len(crimes_2015)):
    
    #VIOLENT CRIMES
    if crimes_2015['primary_type'].iloc[i] == "HOMICIDE":
        crimes_2015['violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "CRIM SEXUAL ASSAULT":
        crimes_2015['violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "ROBBERY":
        crimes_2015['violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "BATTERY":
        crimes_2015['violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "RITUALISM":
        crimes_2015['violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "ASSAULT":
        crimes_2015['violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "HUMAN TRAFFICKING":
        crimes_2015['violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "ASSAULT":
        crimes_2015['violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "CRIMINAL TRESPASS":
        crimes_2015['violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "ASSAULT":
        crimes_2015['violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "KIDNAPPING":
        crimes_2015['violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "INTIMIDATION":
        crimes_2015['violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "CRIMINAL ABORTION":
        crimes_2015['violent'].iloc[i] = 1
    
    #NON VIOLENT CRIMES
    if crimes_2015['primary_type'].iloc[i] == "PUBLIC PEACE VIOLATION":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "STALKING":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "DECEPTIVE PRACTICE":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "GAMBLING":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "WEAPONS VIOLATION":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "CONCEALED CARRY LICENSE VIOLATION":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "NON-CRIMINAL":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "PROSTITUTION":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "OBSCENITY":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "PUBLIC INDECENCY":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "OFFENSE INVOLVING CHILDREN":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "SEX OFFENSE":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "OFFENSE INVOLVING CHILDREN":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "NARCOTICS":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "OTHER NARCOTIC VIOLATION":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "LIQUOR LAW VIOLATION":
        crimes_2015['non-violent'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "INTERFERENCE WITH PUBLIC OFFICER":
        crimes_2015['non-violent'].iloc[i] = 1
    
    
    #PROPERTY CRIMES
    if crimes_2015['primary_type'].iloc[i] == "BURGLARY":
        crimes_2015['property'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "THEFT":
        crimes_2015['property'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "MOTOR VEHICLE THEFT":
        crimes_2015['property'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "ARSON":
        crimes_2015['property'].iloc[i] = 1
    if crimes_2015['primary_type'].iloc[i] == "CRIMINAL DAMAGE":
        crimes_2015['property'].iloc[i] = 1
    
    #OTHER OFFENSE
    if crimes_2015['primary_type'].iloc[i] == "OTHER OFFENSE":
        crimes_2015['other_crimes'].iloc[i] = 1


crimes_2015


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Unnamed: 0,date,district,block,description,location_description,community_area,updated_on,iucr,ward,year,...,x_coordinate,longitude,:@computed_region_bdys_3d7i,:@computed_region_6mkv_f3dw,:@computed_region_vrxf_vc4k,:@computed_region_d3ds_rm58,violent,non-violent,property,other_crimes
0,2015-12-31T23:59:00.000,014,015XX N KEDZIE AVE,FROM BUILDING,RESIDENCE PORCH/HALLWAY,23,2018-02-09T15:44:29.000,0890,26,2015,...,,,,,,,0,0,1,0
1,2015-12-31T23:59:00.000,006,075XX S EMERALD AVE,TO VEHICLE,STREET,68,2018-02-10T15:50:01.000,1320,17,2015,...,1172605,-87.642992854,511,21554,66,229,0,0,1,0
2,2015-12-31T23:55:00.000,004,079XX S STONY ISLAND AVE,AGGRAVATED: OTHER DANG WEAPON,STREET,45,2018-02-10T15:50:01.000,0430,8,2015,...,1188223,-87.585822373,431,21202,41,224,1,0,0,0
3,2015-12-31T23:50:00.000,024,024XX W FARGO AVE,$500 AND UNDER,APARTMENT,2,2018-02-10T15:50:01.000,0820,50,2015,...,1158878,-87.690708662,359,22528,20,35,0,0,1,0
4,2015-12-31T23:50:00.000,019,037XX N CLARK ST,SIMPLE,SIDEWALK,6,2018-02-10T15:50:01.000,0460,44,2015,...,1167786,-87.658635101,691,21186,57,12,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264604,2015-01-01T00:00:00.000,016,0000X W TERMINAL ST,$500 AND UNDER,AIRPORT TERMINAL UPPER LEVEL - SECURE AREA,76,2018-02-10T15:50:01.000,0820,41,2015,...,1100726,-87.904976266,668,16197,75,24,0,0,1,0
264605,2015-01-01T00:00:00.000,017,037XX N RICHMOND ST,CHILD PORNOGRAPHY,RESIDENCE,16,2018-02-10T15:50:01.000,1582,33,2015,...,1156053,-87.70177312,372,21538,16,23,0,1,0,0
264606,2015-01-01T00:00:00.000,006,078XX S MAY ST,SEX ASSLT OF CHILD BY FAM MBR,RESIDENCE,71,2018-02-10T15:50:01.000,1753,17,2015,...,1170011,-87.65255922,487,21554,70,230,0,1,0,0
264607,2015-01-01T00:00:00.000,022,112XX S WALLACE ST,PREDATORY,RESIDENCE,49,2016-05-26T15:51:03.000,0266,34,2015,...,,,,,,,1,0,0,0


In [138]:
crimes_2015 = crimes_2015.rename(columns = {"latitude":"lat","longitude":"long"})

crimes_2015 = crimes_2015[['lat','long','id','violent','non-violent','property','other_crimes']]

crimes_2015 = crimes_2015.dropna()
crimes_2015

Unnamed: 0,lat,long,id,violent,non-violent,property,other_crimes
1,41.757366519,-87.642992854,10365064,0,0,1,0
2,41.751270452,-87.585822373,10364662,1,0,0,0
3,42.016804165,-87.690708662,10364740,0,0,1,0
4,41.949837364,-87.658635101,10364683,1,0,0,0
5,41.888165132,-87.622937212,10365142,0,0,1,0
...,...,...,...,...,...,...,...
264603,41.921156927,-87.775623089,10327496,0,1,0,0
264604,41.9764212,-87.904976266,10083956,0,0,1,0
264605,41.949185407,-87.70177312,10328772,0,1,0,0
264606,41.751770335,-87.65255922,10024172,0,1,0,0


In [139]:
#Attach geo ids
gdf_crimes_2015 = gpd.GeoDataFrame(
    crimes_2015, geometry=gpd.points_from_xy(crimes_2015.long, crimes_2015.lat), 
    crs = 'epsg:4326')

crimes_2015_acs = gpd.sjoin(gdf_crimes_2015, gdf_acs[['geo_id','geometry']], how='left' )

crimes_2015_acs

Unnamed: 0,lat,long,id,violent,non-violent,property,other_crimes,geometry,index_right,geo_id
1,41.757366519,-87.642992854,10365064,0,0,1,0,POINT (-87.64299 41.75737),620.0,1400000US17031681300
2,41.751270452,-87.585822373,10364662,1,0,0,0,POINT (-87.58582 41.75127),464.0,1400000US17031450300
3,42.016804165,-87.690708662,10364740,0,0,1,0,POINT (-87.69071 42.01680),12.0,1400000US17031020200
4,41.949837364,-87.658635101,10364683,1,0,0,0,POINT (-87.65864 41.94984),84.0,1400000US17031061100
5,41.888165132,-87.622937212,10365142,0,0,1,0,POINT (-87.62294 41.88817),386.0,1400000US17031320100
...,...,...,...,...,...,...,...,...,...,...
264603,41.921156927,-87.775623089,10327496,0,1,0,0,POINT (-87.77562 41.92116),231.0,1400000US17031191301
264604,41.9764212,-87.904976266,10083956,0,0,1,0,POINT (-87.90498 41.97642),,
264605,41.949185407,-87.70177312,10328772,0,1,0,0,POINT (-87.70177 41.94919),198.0,1400000US17031160700
264606,41.751770335,-87.65255922,10024172,0,1,0,0,POINT (-87.65256 41.75177),647.0,1400000US17031710800


In [140]:
#Aggregate information at a census tract level
crimes_2015_acs = crimes_2015_acs.groupby(['geo_id'],as_index = False)['violent','non-violent','property','other_crimes'].sum()
crimes_2015_acs.rename(columns = {"violent":"violent_crimes_2015","non-violent":"non_violent_crimes_2015","property":"property_crimes_2015","other_crimes":"other_crimes_2015"}, inplace = True)
crimes_2015_acs

  crimes_2015_acs = crimes_2015_acs.groupby(['geo_id'],as_index = False)['violent','non-violent','property','other_crimes'].sum()


Unnamed: 0,geo_id,violent_crimes_2015,non_violent_crimes_2015,property_crimes_2015,other_crimes_2015
0,1400000US17031010100,215,111,200,45
1,1400000US17031010201,156,58,135,27
2,1400000US17031010202,139,48,184,18
3,1400000US17031010300,121,39,158,27
4,1400000US17031010400,78,29,105,21
...,...,...,...,...,...
793,1400000US17031843500,90,42,159,32
794,1400000US17031843600,124,45,123,22
795,1400000US17031843700,64,41,116,11
796,1400000US17031843800,142,75,123,30


### 2016

In [141]:
client = Socrata("data.cityofchicago.org", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cityofchicago.org,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("kf95-mnd6", limit=270000)

# Convert to pandas DataFrame
crimes_2016 = pd.DataFrame.from_records(results)



In [142]:
crimes_2016['violent'] = 0
crimes_2016['non-violent'] = 0
crimes_2016['property'] = 0
crimes_2016['other_crimes'] = 0

for i in range(0,len(crimes_2016)):
    
    #VIOLENT CRIMES
    if crimes_2016['primary_type'].iloc[i] == "HOMICIDE":
        crimes_2016['violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "CRIM SEXUAL ASSAULT":
        crimes_2016['violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "ROBBERY":
        crimes_2016['violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "BATTERY":
        crimes_2016['violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "RITUALISM":
        crimes_2016['violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "ASSAULT":
        crimes_2016['violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "HUMAN TRAFFICKING":
        crimes_2016['violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "ASSAULT":
        crimes_2016['violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "CRIMINAL TRESPASS":
        crimes_2016['violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "ASSAULT":
        crimes_2016['violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "KIDNAPPING":
        crimes_2016['violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "INTIMIDATION":
        crimes_2016['violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "CRIMINAL ABORTION":
        crimes_2016['violent'].iloc[i] = 1
    
    #NON VIOLENT CRIMES
    if crimes_2016['primary_type'].iloc[i] == "PUBLIC PEACE VIOLATION":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "STALKING":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "DECEPTIVE PRACTICE":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "GAMBLING":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "WEAPONS VIOLATION":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "CONCEALED CARRY LICENSE VIOLATION":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "NON-CRIMINAL":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "PROSTITUTION":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "OBSCENITY":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "PUBLIC INDECENCY":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "OFFENSE INVOLVING CHILDREN":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "SEX OFFENSE":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "OFFENSE INVOLVING CHILDREN":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "NARCOTICS":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "OTHER NARCOTIC VIOLATION":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "LIQUOR LAW VIOLATION":
        crimes_2016['non-violent'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "INTERFERENCE WITH PUBLIC OFFICER":
        crimes_2016['non-violent'].iloc[i] = 1
    
    
    #PROPERTY CRIMES
    if crimes_2016['primary_type'].iloc[i] == "BURGLARY":
        crimes_2016['property'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "THEFT":
        crimes_2016['property'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "MOTOR VEHICLE THEFT":
        crimes_2016['property'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "ARSON":
        crimes_2016['property'].iloc[i] = 1
    if crimes_2016['primary_type'].iloc[i] == "CRIMINAL DAMAGE":
        crimes_2016['property'].iloc[i] = 1
    
    #OTHER OFFENSE
    if crimes_2016['primary_type'].iloc[i] == "OTHER OFFENSE":
        crimes_2016['other_crimes'].iloc[i] = 1




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [143]:
crimes_2016 = crimes_2016.rename(columns = {"latitude":"lat","longitude":"long"})

crimes_2016 = crimes_2016[['lat','long','id','violent','non-violent','property','other_crimes']]

crimes_2016 = crimes_2016.dropna()
crimes_2016

Unnamed: 0,lat,long,id,violent,non-violent,property,other_crimes
0,41.976290414,-87.905227221,10819224,0,0,1,0
1,41.688033246,-87.623931468,10801137,1,0,0,0
2,41.936884881,-87.66476981,10801110,0,1,0,0
3,41.886814897,-87.625592678,10802006,1,0,0,0
4,41.752307019,-87.619797619,10801865,0,0,1,0
...,...,...,...,...,...,...,...
269634,41.915573243,-87.759637797,11093786,0,1,0,0
269635,41.765282432,-87.583654702,11009258,0,1,0,0
269638,41.889525629,-87.755401321,10501254,0,1,0,0
269639,41.898549887,-87.700450413,10755601,0,1,0,0


In [144]:
#Attach geo ids
gdf_crimes_2016 = gpd.GeoDataFrame(
    crimes_2016, geometry=gpd.points_from_xy(crimes_2016.long, crimes_2016.lat), 
    crs = 'epsg:4326')

crimes_2016_acs = gpd.sjoin(gdf_crimes_2016, gdf_acs[['geo_id','geometry']], how='left' )

crimes_2016_acs

Unnamed: 0,lat,long,id,violent,non-violent,property,other_crimes,geometry,index_right,geo_id
0,41.976290414,-87.905227221,10819224,0,0,1,0,POINT (-87.90523 41.97629),,
1,41.688033246,-87.623931468,10801137,1,0,0,0,POINT (-87.62393 41.68803),491.0,1400000US17031491300
2,41.936884881,-87.66476981,10801110,0,1,0,0,POINT (-87.66477 41.93688),97.0,1400000US17031062700
3,41.886814897,-87.625592678,10802006,1,0,0,0,POINT (-87.62559 41.88681),386.0,1400000US17031320100
4,41.752307019,-87.619797619,10801865,0,0,1,0,POINT (-87.61980 41.75231),629.0,1400000US17031691300
...,...,...,...,...,...,...,...,...,...,...
269634,41.915573243,-87.759637797,11093786,0,1,0,0,POINT (-87.75964 41.91557),312.0,1400000US17031250300
269635,41.765282432,-87.583654702,11009258,0,1,0,0,POINT (-87.58365 41.76528),446.0,1400000US17031430500
269638,41.889525629,-87.755401321,10501254,0,1,0,0,POINT (-87.75540 41.88953),323.0,1400000US17031251500
269639,41.898549887,-87.700450413,10755601,0,1,0,0,POINT (-87.70045 41.89855),302.0,1400000US17031242700


In [145]:
#Aggregate information at a census tract level
crimes_2016_acs = crimes_2016_acs.groupby(['geo_id'],as_index = False)['violent','non-violent','property','other_crimes'].sum()
crimes_2016_acs.rename(columns = {"violent":"violent_crimes_2016","non-violent":"non_violent_crimes_2016","property":"property_crimes_2016","other_crimes":"other_crimes_2016"}, inplace = True)
crimes_2016_acs

  crimes_2016_acs = crimes_2016_acs.groupby(['geo_id'],as_index = False)['violent','non-violent','property','other_crimes'].sum()


Unnamed: 0,geo_id,violent_crimes_2016,non_violent_crimes_2016,property_crimes_2016,other_crimes_2016
0,1400000US17031010100,191,79,187,39
1,1400000US17031010201,169,44,158,28
2,1400000US17031010202,150,45,177,12
3,1400000US17031010300,137,39,152,31
4,1400000US17031010400,105,47,148,22
...,...,...,...,...,...
793,1400000US17031843500,93,30,166,37
794,1400000US17031843600,125,40,133,21
795,1400000US17031843700,67,35,141,24
796,1400000US17031843800,136,50,125,27


### 2017

In [146]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofchicago.org", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cityofchicago.org,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("d62x-nvdr", limit=270000)

# Convert to pandas DataFrame
crimes_2017 = pd.DataFrame.from_records(results)



In [147]:
crimes_2017['violent'] = 0
crimes_2017['non-violent'] = 0
crimes_2017['property'] = 0
crimes_2017['other_crimes'] = 0

for i in range(0,len(crimes_2017)):
    
    #VIOLENT CRIMES
    if crimes_2017['primary_type'].iloc[i] == "HOMICIDE":
        crimes_2017['violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "CRIM SEXUAL ASSAULT":
        crimes_2017['violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "ROBBERY":
        crimes_2017['violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "BATTERY":
        crimes_2017['violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "RITUALISM":
        crimes_2017['violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "ASSAULT":
        crimes_2017['violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "HUMAN TRAFFICKING":
        crimes_2017['violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "ASSAULT":
        crimes_2017['violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "CRIMINAL TRESPASS":
        crimes_2017['violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "ASSAULT":
        crimes_2017['violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "KIDNAPPING":
        crimes_2017['violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "INTIMIDATION":
        crimes_2017['violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "CRIMINAL ABORTION":
        crimes_2017['violent'].iloc[i] = 1
    
    #NON VIOLENT CRIMES
    if crimes_2017['primary_type'].iloc[i] == "PUBLIC PEACE VIOLATION":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "STALKING":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "DECEPTIVE PRACTICE":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "GAMBLING":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "WEAPONS VIOLATION":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "CONCEALED CARRY LICENSE VIOLATION":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "NON-CRIMINAL":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "PROSTITUTION":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "OBSCENITY":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "PUBLIC INDECENCY":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "OFFENSE INVOLVING CHILDREN":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "SEX OFFENSE":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "OFFENSE INVOLVING CHILDREN":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "NARCOTICS":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "OTHER NARCOTIC VIOLATION":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "LIQUOR LAW VIOLATION":
        crimes_2017['non-violent'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "INTERFERENCE WITH PUBLIC OFFICER":
        crimes_2017['non-violent'].iloc[i] = 1
    
    
    #PROPERTY CRIMES
    if crimes_2017['primary_type'].iloc[i] == "BURGLARY":
        crimes_2017['property'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "THEFT":
        crimes_2017['property'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "MOTOR VEHICLE THEFT":
        crimes_2017['property'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "ARSON":
        crimes_2017['property'].iloc[i] = 1
    if crimes_2017['primary_type'].iloc[i] == "CRIMINAL DAMAGE":
        crimes_2017['property'].iloc[i] = 1
    
    #OTHER OFFENSE
    if crimes_2017['primary_type'].iloc[i] == "OTHER OFFENSE":
        crimes_2017['other_crimes'].iloc[i] = 1




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [148]:
crimes_2017 = crimes_2017.rename(columns = {"latitude":"lat","longitude":"long"})

crimes_2017 = crimes_2017[['lat','long','id','violent','non-violent','property','other_crimes']]

crimes_2017 = crimes_2017.dropna()
crimes_2017

Unnamed: 0,lat,long,id,violent,non-violent,property,other_crimes
0,41.965693651,-87.715726125,11192233,0,0,1,0
1,41.926558908,-87.631294073,11196379,1,0,0,0
2,41.895750913,-87.623495923,11192540,0,0,1,0
3,41.856426716,-87.638892854,11192239,0,0,1,0
4,41.683369303,-87.622829524,11192254,1,0,0,0
...,...,...,...,...,...,...,...
268866,41.763181833,-87.657790112,10902442,0,1,0,0
268870,41.889765267,-87.732580986,10874271,0,0,1,0
268871,41.882947966,-87.691879124,11092761,0,0,1,0
268873,41.947225755,-87.655523387,10801111,0,0,0,1


In [149]:
#Attach geo ids
gdf_crimes_2017 = gpd.GeoDataFrame(
    crimes_2017, geometry=gpd.points_from_xy(crimes_2017.long, crimes_2017.lat), 
    crs = 'epsg:4326')

crimes_2017_acs = gpd.sjoin(gdf_crimes_2017, gdf_acs[['geo_id','geometry']], how='left' )

crimes_2017_acs

Unnamed: 0,lat,long,id,violent,non-violent,property,other_crimes,geometry,index_right,geo_id
0,41.965693651,-87.715726125,11192233,0,0,1,0,POINT (-87.71573 41.96569),175.0,1400000US17031140702
1,41.926558908,-87.631294073,11196379,1,0,0,0,POINT (-87.63129 41.92656),107.0,1400000US17031070101
2,41.895750913,-87.623495923,11192540,0,0,1,0,POINT (-87.62350 41.89575),135.0,1400000US17031081401
3,41.856426716,-87.638892854,11192239,0,0,1,0,POINT (-87.63889 41.85643),790.0,1400000US17031843200
4,41.683369303,-87.622829524,11192254,1,0,0,0,POINT (-87.62283 41.68337),506.0,1400000US17031530200
...,...,...,...,...,...,...,...,...,...,...
268866,41.763181833,-87.657790112,10902442,0,1,0,0,POINT (-87.65779 41.76318),611.0,1400000US17031671800
268870,41.889765267,-87.732580986,10874271,0,0,1,0,POINT (-87.73258 41.88977),780.0,1400000US17031842100
268871,41.882947966,-87.691879124,11092761,0,0,1,0,POINT (-87.69188 41.88295),746.0,1400000US17031837800
268873,41.947225755,-87.655523387,10801111,0,0,0,1,POINT (-87.65552 41.94723),84.0,1400000US17031061100


In [150]:
#Aggregate information at a census tract level
crimes_2017_acs = crimes_2017_acs.groupby(['geo_id'],as_index = False)['violent','non-violent','property','other_crimes'].sum()
crimes_2017_acs.rename(columns = {"violent":"violent_crimes_2017","non-violent":"non_violent_crimes_2017","property":"property_crimes_2017","other_crimes":"other_crimes_2017"}, inplace = True)
crimes_2017_acs

  crimes_2017_acs = crimes_2017_acs.groupby(['geo_id'],as_index = False)['violent','non-violent','property','other_crimes'].sum()


Unnamed: 0,geo_id,violent_crimes_2017,non_violent_crimes_2017,property_crimes_2017,other_crimes_2017
0,1400000US17031010100,217,71,272,39
1,1400000US17031010201,166,40,272,32
2,1400000US17031010202,163,44,257,15
3,1400000US17031010300,147,36,219,16
4,1400000US17031010400,106,52,120,19
...,...,...,...,...,...
793,1400000US17031843500,106,48,224,28
794,1400000US17031843600,156,38,175,24
795,1400000US17031843700,58,41,155,17
796,1400000US17031843800,143,35,163,27


### 2018

In [151]:
client = Socrata("data.cityofchicago.org", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cityofchicago.org,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("3i3m-jwuy", limit=270000)

# Convert to pandas DataFrame
crimes_2018 = pd.DataFrame.from_records(results)



In [152]:
crimes_2018['violent'] = 0
crimes_2018['non-violent'] = 0
crimes_2018['property'] = 0
crimes_2018['other_crimes'] = 0

for i in range(0,len(crimes_2018)):
    
    #VIOLENT CRIMES
    if crimes_2018['primary_type'].iloc[i] == "HOMICIDE":
        crimes_2018['violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "CRIM SEXUAL ASSAULT":
        crimes_2018['violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "ROBBERY":
        crimes_2018['violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "BATTERY":
        crimes_2018['violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "RITUALISM":
        crimes_2018['violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "ASSAULT":
        crimes_2018['violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "HUMAN TRAFFICKING":
        crimes_2018['violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "ASSAULT":
        crimes_2018['violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "CRIMINAL TRESPASS":
        crimes_2018['violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "ASSAULT":
        crimes_2018['violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "KIDNAPPING":
        crimes_2018['violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "INTIMIDATION":
        crimes_2018['violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "CRIMINAL ABORTION":
        crimes_2018['violent'].iloc[i] = 1
    
    #NON VIOLENT CRIMES
    if crimes_2018['primary_type'].iloc[i] == "PUBLIC PEACE VIOLATION":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "STALKING":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "DECEPTIVE PRACTICE":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "GAMBLING":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "WEAPONS VIOLATION":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "CONCEALED CARRY LICENSE VIOLATION":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "NON-CRIMINAL":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "PROSTITUTION":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "OBSCENITY":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "PUBLIC INDECENCY":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "OFFENSE INVOLVING CHILDREN":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "SEX OFFENSE":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "OFFENSE INVOLVING CHILDREN":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "NARCOTICS":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "OTHER NARCOTIC VIOLATION":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "LIQUOR LAW VIOLATION":
        crimes_2018['non-violent'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "INTERFERENCE WITH PUBLIC OFFICER":
        crimes_2018['non-violent'].iloc[i] = 1
    
    
    #PROPERTY CRIMES
    if crimes_2018['primary_type'].iloc[i] == "BURGLARY":
        crimes_2018['property'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "THEFT":
        crimes_2018['property'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "MOTOR VEHICLE THEFT":
        crimes_2018['property'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "ARSON":
        crimes_2018['property'].iloc[i] = 1
    if crimes_2018['primary_type'].iloc[i] == "CRIMINAL DAMAGE":
        crimes_2018['property'].iloc[i] = 1
    
    #OTHER OFFENSE
    if crimes_2018['primary_type'].iloc[i] == "OTHER OFFENSE":
        crimes_2018['other_crimes'].iloc[i] = 1




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [153]:
crimes_2018 = crimes_2018.rename(columns = {"latitude":"lat","longitude":"long"})

crimes_2018 = crimes_2018[['lat','long','id','violent','non-violent','property','other_crimes']]

crimes_2018 = crimes_2018.dropna()
crimes_2018

Unnamed: 0,lat,long,id,violent,non-violent,property,other_crimes
0,41.689078832,-87.696064026,11556487,0,0,1,0
1,41.763181359,-87.657709477,11561837,0,1,0,0
2,41.740520866,-87.647390719,11552699,0,0,1,0
3,41.857068095,-87.657625201,11552724,1,0,0,0
4,41.75191443,-87.647716532,11552731,1,0,0,0
...,...,...,...,...,...,...,...
268479,41.895897782,-87.677740745,11267461,0,1,0,0
268481,41.780873262,-87.661824508,11315895,0,1,0,0
268483,41.922724629,-87.769594212,11262581,0,1,0,0
268486,41.756330614,-87.588307179,11369150,1,0,0,0


In [154]:
#Attach geo ids
gdf_crimes_2018 = gpd.GeoDataFrame(
    crimes_2018, geometry=gpd.points_from_xy(crimes_2018.long, crimes_2018.lat), 
    crs = 'epsg:4326')

crimes_2018_acs = gpd.sjoin(gdf_crimes_2018, gdf_acs[['geo_id','geometry']], how='left' )

crimes_2018_acs

Unnamed: 0,lat,long,id,violent,non-violent,property,other_crimes,geometry,index_right,geo_id
0,41.689078832,-87.696064026,11556487,0,0,1,0,POINT (-87.69606 41.68908),670.0,1400000US17031740100
1,41.763181359,-87.657709477,11561837,0,1,0,0,POINT (-87.65771 41.76318),611.0,1400000US17031671800
2,41.740520866,-87.647390719,11552699,0,0,1,0,POINT (-87.64739 41.74052),649.0,1400000US17031711000
3,41.857068095,-87.657625201,11552724,1,0,0,0,POINT (-87.65763 41.85707),382.0,1400000US17031310600
4,41.75191443,-87.647716532,11552731,1,0,0,0,POINT (-87.64772 41.75191),647.0,1400000US17031710800
...,...,...,...,...,...,...,...,...,...,...
268479,41.895897782,-87.677740745,11267461,0,1,0,0,POINT (-87.67774 41.89590),305.0,1400000US17031243000
268481,41.780873262,-87.661824508,11315895,0,1,0,0,POINT (-87.66182 41.78087),603.0,1400000US17031670800
268483,41.922724629,-87.769594212,11262581,0,1,0,0,POINT (-87.76959 41.92272),232.0,1400000US17031191302
268486,41.756330614,-87.588307179,11369150,1,0,0,0,POINT (-87.58831 41.75633),716.0,1400000US17031834200


In [155]:
#Aggregate information at a census tract level
crimes_2018_acs = crimes_2018_acs.groupby(['geo_id'],as_index = False)['violent','non-violent','property','other_crimes'].sum()
crimes_2018_acs.rename(columns = {"violent":"violent_crimes_2018","non-violent":"non_violent_crimes_2018","property":"property_crimes_2018","other_crimes":"other_crimes_2018"}, inplace = True)
crimes_2018_acs

  crimes_2018_acs = crimes_2018_acs.groupby(['geo_id'],as_index = False)['violent','non-violent','property','other_crimes'].sum()


Unnamed: 0,geo_id,violent_crimes_2018,non_violent_crimes_2018,property_crimes_2018,other_crimes_2018
0,1400000US17031010100,218,59,290,40
1,1400000US17031010201,154,45,189,32
2,1400000US17031010202,193,38,241,14
3,1400000US17031010300,129,47,146,18
4,1400000US17031010400,107,33,121,19
...,...,...,...,...,...
793,1400000US17031843500,112,51,99,20
794,1400000US17031843600,134,60,175,30
795,1400000US17031843700,59,41,96,7
796,1400000US17031843800,131,36,127,21


### 2019

In [156]:
client = Socrata("data.cityofchicago.org", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cityofchicago.org,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("w98m-zvie", limit=270000)

# Convert to pandas DataFrame
crimes_2019 = pd.DataFrame.from_records(results)



In [157]:
crimes_2019['violent'] = 0
crimes_2019['non-violent'] = 0
crimes_2019['property'] = 0
crimes_2019['other_crimes'] = 0

for i in range(0,len(crimes_2019)):
    
    #VIOLENT CRIMES
    if crimes_2019['primary_type'].iloc[i] == "HOMICIDE":
        crimes_2019['violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "CRIM SEXUAL ASSAULT":
        crimes_2019['violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "ROBBERY":
        crimes_2019['violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "BATTERY":
        crimes_2019['violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "RITUALISM":
        crimes_2019['violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "ASSAULT":
        crimes_2019['violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "HUMAN TRAFFICKING":
        crimes_2019['violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "ASSAULT":
        crimes_2019['violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "CRIMINAL TRESPASS":
        crimes_2019['violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "ASSAULT":
        crimes_2019['violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "KIDNAPPING":
        crimes_2019['violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "INTIMIDATION":
        crimes_2019['violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "CRIMINAL ABORTION":
        crimes_2019['violent'].iloc[i] = 1
    
    #NON VIOLENT CRIMES
    if crimes_2019['primary_type'].iloc[i] == "PUBLIC PEACE VIOLATION":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "STALKING":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "DECEPTIVE PRACTICE":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "GAMBLING":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "WEAPONS VIOLATION":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "CONCEALED CARRY LICENSE VIOLATION":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "NON-CRIMINAL":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "PROSTITUTION":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "OBSCENITY":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "PUBLIC INDECENCY":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "OFFENSE INVOLVING CHILDREN":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "SEX OFFENSE":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "OFFENSE INVOLVING CHILDREN":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "NARCOTICS":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "OTHER NARCOTIC VIOLATION":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "LIQUOR LAW VIOLATION":
        crimes_2019['non-violent'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "INTERFERENCE WITH PUBLIC OFFICER":
        crimes_2019['non-violent'].iloc[i] = 1
    
    
    #PROPERTY CRIMES
    if crimes_2019['primary_type'].iloc[i] == "BURGLARY":
        crimes_2019['property'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "THEFT":
        crimes_2019['property'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "MOTOR VEHICLE THEFT":
        crimes_2019['property'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "ARSON":
        crimes_2019['property'].iloc[i] = 1
    if crimes_2019['primary_type'].iloc[i] == "CRIMINAL DAMAGE":
        crimes_2019['property'].iloc[i] = 1
    
    #OTHER OFFENSE
    if crimes_2019['primary_type'].iloc[i] == "OTHER OFFENSE":
        crimes_2019['other_crimes'].iloc[i] = 1




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [158]:
crimes_2019 = crimes_2019.rename(columns = {"latitude":"lat","longitude":"long"})

crimes_2019 = crimes_2019[['lat','long','id','violent','non-violent','property','other_crimes']]

crimes_2019 = crimes_2019.dropna()
crimes_2019

Unnamed: 0,lat,long,id,violent,non-violent,property,other_crimes
0,41.769150218,-87.627136786,11938228,0,1,0,0
1,41.779173667,-87.653277703,11940078,1,0,0,0
3,41.874623951,-87.745052647,11938240,0,1,0,0
4,41.877268465,-87.711536692,11937967,0,1,0,0
5,41.714095115,-87.652806763,11938124,0,0,0,1
...,...,...,...,...,...,...,...
260853,41.907072136,-87.731331357,11992909,0,1,0,0
260855,41.896591951,-87.692793096,11947046,0,1,0,0
260856,41.65634477,-87.605129962,11739161,0,1,0,0
260857,41.876889551,-87.690074383,11589244,0,1,0,0


In [159]:
#Attach geo ids
gdf_crimes_2019 = gpd.GeoDataFrame(
    crimes_2019, geometry=gpd.points_from_xy(crimes_2019.long, crimes_2019.lat), 
    crs = 'epsg:4326')

crimes_2019_acs = gpd.sjoin(gdf_crimes_2019, gdf_acs[['geo_id','geometry']], how='left' )

crimes_2019_acs

Unnamed: 0,lat,long,id,violent,non-violent,property,other_crimes,geometry,index_right,geo_id
0,41.769150218,-87.627136786,11938228,0,1,0,0,POINT (-87.62714 41.76915),622.0,1400000US17031690300
1,41.779173667,-87.653277703,11940078,1,0,0,0,POINT (-87.65328 41.77917),617.0,1400000US17031681000
3,41.874623951,-87.745052647,11938240,0,1,0,0,POINT (-87.74505 41.87462),332.0,1400000US17031252202
4,41.877268465,-87.711536692,11937967,0,1,0,0,POINT (-87.71154 41.87727),346.0,1400000US17031271400
5,41.714095115,-87.652806763,11938124,0,0,0,1,POINT (-87.65281 41.71410),666.0,1400000US17031730400
...,...,...,...,...,...,...,...,...,...,...
260853,41.907072136,-87.731331357,11992909,0,1,0,0,POINT (-87.73133 41.90707),274.0,1400000US17031230600
260855,41.896591951,-87.692793096,11947046,0,1,0,0,POINT (-87.69279 41.89659),301.0,1400000US17031242600
260856,41.65634477,-87.605129962,11739161,0,1,0,0,POINT (-87.60513 41.65634),513.0,1400000US17031540101
260857,41.876889551,-87.690074383,11589244,0,1,0,0,POINT (-87.69007 41.87689),351.0,1400000US17031280800


In [160]:
#Aggregate information at a census tract level
crimes_2019_acs = crimes_2019_acs.groupby(['geo_id'],as_index = False)['violent','non-violent','property','other_crimes'].sum()
crimes_2019_acs.rename(columns = {"violent":"violent_crimes_2019","non-violent":"non_violent_crimes_2019","property":"property_crimes_2019","other_crimes":"other_crimes_2019"}, inplace = True)
crimes_2019_acs

  crimes_2019_acs = crimes_2019_acs.groupby(['geo_id'],as_index = False)['violent','non-violent','property','other_crimes'].sum()


Unnamed: 0,geo_id,violent_crimes_2019,non_violent_crimes_2019,property_crimes_2019,other_crimes_2019
0,1400000US17031010100,198,97,291,35
1,1400000US17031010201,156,38,188,19
2,1400000US17031010202,177,42,222,18
3,1400000US17031010300,129,48,216,37
4,1400000US17031010400,65,51,108,16
...,...,...,...,...,...
793,1400000US17031843500,127,41,115,16
794,1400000US17031843600,132,55,177,35
795,1400000US17031843700,48,38,100,17
796,1400000US17031843800,91,35,104,26


### Creating one total crimes dataset

In [161]:
#Use the acs dataset with just geo_ids as the starting point
crimes_total = pd.DataFrame(acs['geo_id'])

#Merging the 5 year wise datasets
crimes_total = crimes_total.merge(crimes_2015_acs, how = 'left', on = 'geo_id')
crimes_total = crimes_total.merge(crimes_2016_acs, how = 'left', on = 'geo_id')
crimes_total = crimes_total.merge(crimes_2017_acs, how = 'left', on = 'geo_id')
crimes_total = crimes_total.merge(crimes_2018_acs, how = 'left', on = 'geo_id')
crimes_total = crimes_total.merge(crimes_2019_acs, how = 'left', on = 'geo_id')

#Creating a total crimes column
crimes_total['total_violent_crimes'] = crimes_total['violent_crimes_2015'] + crimes_total['violent_crimes_2016'] + crimes_total['violent_crimes_2017'] + crimes_total['violent_crimes_2018'] + crimes_total['violent_crimes_2019']
crimes_total['total_non_violent_crimes'] = crimes_total['non_violent_crimes_2015'] + crimes_total['non_violent_crimes_2016'] + crimes_total['non_violent_crimes_2017'] + crimes_total['non_violent_crimes_2018'] + crimes_total['non_violent_crimes_2019']
crimes_total['total_property_crimes'] = crimes_total['property_crimes_2015'] + crimes_total['property_crimes_2016'] + crimes_total['property_crimes_2017'] + crimes_total['property_crimes_2018'] + crimes_total['property_crimes_2019']
crimes_total['total_other_crimes'] = crimes_total['other_crimes_2015'] + crimes_total['other_crimes_2016'] + crimes_total['other_crimes_2017'] + crimes_total['other_crimes_2018'] + crimes_total['other_crimes_2019']

crimes_total = crimes_total[['geo_id', 'total_violent_crimes','total_non_violent_crimes','total_property_crimes','total_other_crimes']]
crimes_total



Unnamed: 0,geo_id,total_violent_crimes,total_non_violent_crimes,total_property_crimes,total_other_crimes
0,1400000US17031010100,1039,417,1240,198
1,1400000US17031010201,801,225,942,138
2,1400000US17031010202,822,217,1081,77
3,1400000US17031010300,663,209,891,129
4,1400000US17031010400,461,212,602,97
...,...,...,...,...,...
793,1400000US17031843500,528,212,763,133
794,1400000US17031843600,671,238,783,132
795,1400000US17031843700,296,196,608,76
796,1400000US17031843800,643,231,642,131


## Merging all datasets

In [162]:
#Use the acs dataset with just geo_ids as the starting point
additional_predictors_final = pd.DataFrame(acs['geo_id'])

#Police stations
additional_predictors_final = additional_predictors_final.merge(ps_acs, how = 'left', on = 'geo_id')
additional_predictors_final['police_stations'] = additional_predictors_final['police_stations'].fillna(0)

#Fire stations
additional_predictors_final = additional_predictors_final.merge(fs_acs, how = 'left', on = 'geo_id')
additional_predictors_final['fire_stations'] = additional_predictors_final['fire_stations'].fillna(0)

#Schools
additional_predictors_final = additional_predictors_final.merge(schools_acs, how = 'left', on = 'geo_id')
additional_predictors_final['public_schools'] = additional_predictors_final['public_schools'].fillna(0)

#Parks
additional_predictors_final = additional_predictors_final.merge(parks_acs, how = 'left', on = 'geo_id')
additional_predictors_final['parks'] = additional_predictors_final['parks'].fillna(0)

#Commercial establishments
additional_predictors_final = additional_predictors_final.merge(comm_est_acs, how = 'left', on = 'geo_id')
additional_predictors_final['commercial_establishments'] = additional_predictors_final['commercial_establishments'].fillna(0)

#Police killings
additional_predictors_final = additional_predictors_final.merge(police_deaths_acs, how = 'left', on = 'geo_id')
additional_predictors_final['number_of_police_killings'] = additional_predictors_final['number_of_police_killings'].fillna(0)

#total_crimes
additional_predictors_final = additional_predictors_final.merge(crimes_total, how = 'left', on = 'geo_id')
additional_predictors_final['total_violent_crimes'] = additional_predictors_final['total_violent_crimes'].fillna(0)
additional_predictors_final['total_non_violent_crimes'] = additional_predictors_final['total_non_violent_crimes'].fillna(0)
additional_predictors_final['total_property_crimes'] = additional_predictors_final['total_property_crimes'].fillna(0)
additional_predictors_final['total_other_crimes'] = additional_predictors_final['total_other_crimes'].fillna(0)


#Display and export final dataset
additional_predictors_final.to_csv(here('./data/CleanOpenData.csv'), index = False)
additional_predictors_final

Unnamed: 0,geo_id,police_stations,fire_stations,public_schools,parks,commercial_establishments,number_of_police_killings,total_violent_crimes,total_non_violent_crimes,total_property_crimes,total_other_crimes
0,1400000US17031010100,0.0,0.0,1.0,3.0,24.0,0.0,1039,417,1240,198
1,1400000US17031010201,0.0,0.0,1.0,1.0,30.0,0.0,801,225,942,138
2,1400000US17031010202,0.0,1.0,1.0,2.0,58.0,0.0,822,217,1081,77
3,1400000US17031010300,0.0,0.0,0.0,1.0,71.0,0.0,663,209,891,129
4,1400000US17031010400,0.0,0.0,0.0,3.0,41.0,0.0,461,212,602,97
...,...,...,...,...,...,...,...,...,...,...,...
793,1400000US17031843500,0.0,0.0,1.0,0.0,121.0,0.0,528,212,763,133
794,1400000US17031843600,0.0,1.0,0.0,1.0,42.0,0.0,671,238,783,132
795,1400000US17031843700,0.0,1.0,1.0,2.0,117.0,1.0,296,196,608,76
796,1400000US17031843800,0.0,1.0,2.0,1.0,31.0,0.0,643,231,642,131
