In [111]:
from geopy.geocoders import Nominatim
import pandas as pd
import folium
import json
import datetime
import geopandas as gpd
from shapely.geometry import Point, Polygon
import requests
from bs4 import BeautifulSoup

### Basic Map with Boundaries

In [4]:
geolocator = Nominatim(user_agent="NMKG")
chicago_location = geolocator.geocode("Chicago")
(chicago_location.latitude, chicago_location.longitude)

(41.8755616, -87.6244212)

In [16]:
m = folium.Map(location=[chicago_location.latitude, chicago_location.longitude], zoom_start=11)

In [17]:
geojson_data = json.load(open("data/chicago_boundaries.geojson"))
folium.GeoJson(
    geojson_data,
    name='geojson'
).add_to(m)

### Get Neighbourhood <-> Community Area - Maybe will be useful?

In [131]:
url = "https://en.wikipedia.org/wiki/List_of_neighborhoods_in_Chicago"
response = requests.get(url)
html_soup = BeautifulSoup(response.text, 'html.parser')
table_content = html_soup.find("tbody").find_all("tr")[1:]
map_list = []
i = 0
for entry in table_content:
    neigh_community = entry.find_all("td")
    if len(neigh_community) > 0:
        neigh = neigh_community[0].text if "<a>" not in neigh_community[0].text else neigh_community.find_all(a)[0].text
        community = neigh_community[1].text if "<a>" not in neigh_community[1].text else neigh_community.find_all(a)[0].text
        map_list.append([neigh.strip(), community.strip()])

map_list = pd.DataFrame(map_list, columns=["neigh", "community"])
map_list.head()

Unnamed: 0,neigh,community
0,Albany Park,Albany Park
1,Altgeld Gardens,Riverdale
2,Andersonville,Edgewater
3,Archer Heights,Archer Heights
4,Armour Square,Armour Square


### Map with Inspections

In [79]:
inspections = pd.read_csv("data/food-inspections.csv")
inspections.head()
inspections = inspections.dropna(subset=["Latitude", "Longitude"])

In [80]:
inspections.columns

Index(['Inspection ID', 'DBA Name', 'AKA Name', 'License #', 'Facility Type',
       'Risk', 'Address', 'City', 'State', 'Zip', 'Inspection Date',
       'Inspection Type', 'Results', 'Violations', 'Latitude', 'Longitude',
       'Location', 'Historical Wards 2003-2015', 'Zip Codes',
       'Community Areas', 'Census Tracts', 'Wards'],
      dtype='object')

In [81]:
inspections["date"] = pd.to_datetime(inspections["Inspection Date"])

In [82]:
## getting only 2019 to make it easier
inspections_2019 = inspections[inspections["date"].dt.year == 2019]
inspections_2019["date"].tail()

14130   2019-01-02
14131   2019-01-02
14132   2019-01-02
14133   2019-01-02
14134   2019-01-02
Name: date, dtype: datetime64[ns]

In [83]:
## reasing geojson
geo_df = gpd.read_file("data/chicago_boundaries.geojson")
print(geo_df["pri_neigh"].nunique() == len(geo_df))
geo_df.sort_values("pri_neigh").head()

True


Unnamed: 0,pri_neigh,sec_neigh,shape_area,shape_len,geometry
52,Albany Park,"NORTH PARK,ALBANY PARK",53542230.819,39339.016439,(POLYGON ((-87.70403771340104 41.9735515838182...
42,Andersonville,ANDERSONVILLE,9584592.89906,12534.092625,(POLYGON ((-87.66114249176968 41.9763032707800...
78,Archer Heights,"ARCHER HEIGHTS,WEST ELSDON",55922505.7212,31880.02103,(POLYGON ((-87.71436934735939 41.8260405636423...
8,Armour Square,"ARMOUR SQUARE,CHINATOWN",17141468.6356,24359.189625,(POLYGON ((-87.62920071904188 41.8471270613852...
21,Ashburn,ASHBURN,135460337.208,54818.154632,(POLYGON ((-87.71254775561138 41.7573373338274...


In [92]:
## create a geodataframe with the location info of inspections
geometry = [Point(x, y) for x, y in zip(inspections_2019.Longitude, inspections_2019.Latitude)]
crs = {'init': 'epsg:4326'}
inspections_to_join = gpd.GeoDataFrame(inspections_2019[["Inspection ID", "Latitude", "Longitude"]], 
                                       crs=crs,
                                       geometry=geometry)
inspections_to_join.head()

Unnamed: 0,Inspection ID,Latitude,Longitude,geometry
0,2320315,41.714168,-87.655291,POINT (-87.65529116028439 41.7141680989703)
1,2320342,41.913588,-87.682203,POINT (-87.6822028354253 41.9135877900482)
2,2320328,41.808025,-87.720037,POINT (-87.72003743037237 41.80802515275297)
3,2320319,41.808025,-87.720037,POINT (-87.72003743037237 41.80802515275297)
4,2320228,41.807662,-87.73148,POINT (-87.7314802731113 41.8076619936005)


In [94]:
## merging locations and polygons to find the neighbourhood
from geopandas.tools import sjoin

points_to_neigh = sjoin(inspections_to_join, geo_df, how='left')
inspections_2019 = inspections_2019.merge(points_to_neigh, left_on="Inspection ID", right_on="Inspection ID")
inspections_2019.head()

Unnamed: 0,Inspection ID,DBA Name,AKA Name,License #,Facility Type,Risk,Address,City,State,Zip,...,Wards,date,Latitude_y,Longitude_y,geometry,index_right,pri_neigh,sec_neigh,shape_area,shape_len
0,2320315,SERENDIPITY CHILDCARE,SERENDIPITY CHILDCARE,2216009.0,Daycare Above and Under 2 Years,Risk 1 (High),1300 W 99TH ST,CHICAGO,IL,60643.0,...,,2019-10-23,41.714168,-87.655291,POINT (-87.65529116028439 41.7141680989703),72.0,Washington Heights,"WASHINGTON HEIGHTS,ROSELAND",79635752.8769,42222.598163
1,2320342,YOLK TEST KITCHEN,YOLK TEST KITCHEN,2589655.0,Restaurant,Risk 1 (High),1767 N MILWAUKEE AVE,CHICAGO,IL,60647.0,...,,2019-10-23,41.913588,-87.682203,POINT (-87.6822028354253 41.9135877900482),88.0,Wicker Park,"WICKER PARK,WEST TOWN",26853193.0926,21992.660946
2,2320328,LAS ASADAS MEXICAN GRILL,LAS ASADAS MEXICAN GRILL,2583309.0,Restaurant,Risk 1 (High),3834 W 47TH ST,CHICAGO,IL,60632.0,...,,2019-10-23,41.808025,-87.720037,POINT (-87.72003743037237 41.80802515275297),78.0,Archer Heights,"ARCHER HEIGHTS,WEST ELSDON",55922505.7212,31880.02103
3,2320319,LA PALAPITA,LA PALAPITA,2694702.0,Restaurant,Risk 1 (High),3834 W 47TH ST,CHICAGO,IL,60632.0,...,,2019-10-23,41.808025,-87.720037,POINT (-87.72003743037237 41.80802515275297),78.0,Archer Heights,"ARCHER HEIGHTS,WEST ELSDON",55922505.7212,31880.02103
4,2320228,47TH ST CANTINA,47TH ST CANTINA,2678250.0,Liquor,Risk 3 (Low),4311 W 47TH ST,CHICAGO,IL,60632.0,...,,2019-10-22,41.807662,-87.73148,POINT (-87.7314802731113 41.8076619936005),78.0,Archer Heights,"ARCHER HEIGHTS,WEST ELSDON",55922505.7212,31880.02103


In [104]:
inspection_per_neigh = inspections_2019.groupby("pri_neigh").count()["Inspection ID"]
inspection_per_neigh.head()

pri_neigh
Albany Park       283
Andersonville      59
Archer Heights     95
Armour Square      31
Ashburn           153
Name: Inspection ID, dtype: int64

In [179]:
m = folium.Map(location=[chicago_location.latitude, chicago_location.longitude], zoom_start=11)

folium.Choropleth(
    geo_data=geo_df,
    name='choropleth',
    data=inspection_per_neigh,
    columns=["pri_neigh", "Inspection ID"],
    key_on='feature.properties.pri_neigh',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2
).add_to(m)

<folium.features.Choropleth at 0x1de0bb0ac08>

In [158]:
set(geo_df["pri_neigh"].unique()) - set(inspections_2019["pri_neigh"].unique())
## These are the neighborhood without inspections in 2019

{'Burnside', 'Grant Park'}

In [183]:
set(inspections_2019["pri_neigh"].unique()) - set(geo_df["pri_neigh"].unique())
## There are NaN because they are outside the boundary given by the city

{nan}

In [180]:
not_in_map = inspections_2019[inspections_2019["pri_neigh"].isna()]
i = 0
for idx, row in not_in_map.iterrows():
    i = i + 1
    folium.Marker([row.Latitude_x, row.Longitude_x], popup=idx).add_to(m)
print(i)

263


In [181]:
m.save("map.html")

In [182]:
(not_in_map.Latitude_y.unique(), not_in_map.Longitude_y.unique())

(array([42.0085364 , 41.89224916, 41.89233781]),
 array([-87.91442844, -87.60951805, -87.60404476]))

#### If you check this map, you will see two things that look weird
1) If we intersect the polygons provided by the government and the points of inspections, there are 3 points (and 263 inspections) that are "not anywhere". However the problem is because of really small erorrs in measurement, it is actually quite easy to set it by hand. Have a look at the map.
2) The black neighbourhood actually did not have any inspections lol

### Map with Income

In [217]:
eco_info = pd.read_csv("data/economic-info.csv")
eco_info.head()

Unnamed: 0,area_number,area_name,housing_crowded,household_below_poverty,unemployment_16,wo_hs_25,aged_18_64,per_capita_income,hardship_index
0,1,Rogers Park,7.7,23.6,8.7,18.2,27.5,23939,39
1,2,West Ridge,7.8,17.2,8.8,20.8,38.5,23040,46
2,3,Uptown,3.8,24.0,8.9,11.8,22.2,35787,20
3,4,Lincoln Square,3.4,10.9,8.2,13.4,25.5,37524,17
4,5,North Center,0.3,7.5,5.2,4.5,26.2,57123,6


In [218]:
m = folium.Map(location=[chicago_location.latitude, chicago_location.longitude], zoom_start=11)

folium.Choropleth(
    geo_data=geo_df,
    name='choropleth',
    data=eco_info,
    columns=["area_name", "per_capita_income"],
    key_on='feature.properties.pri_neigh',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2
).add_to(m)

<folium.features.Choropleth at 0x1de3489c988>

In [219]:
m.save("map.html")

In [220]:
set(eco_info["area_name"]) - set(geo_df["pri_neigh"].unique())

{'East Garfield Park',
 'Forest Glen',
 'Greater Grand Crossing',
 'Humboldt park',
 'McKinley Park',
 'Montclaire',
 'Near North Side',
 'Near West Side',
 'South Lawndale',
 'Washington Height',
 'West Englewood',
 'West Garfield Park'}

In [221]:
set(geo_df["pri_neigh"].unique()) - set(eco_info["area_name"])

{'Andersonville',
 'Boystown',
 'Bucktown',
 'Chinatown',
 'East Village',
 'Galewood',
 'Garfield Park',
 'Gold Coast',
 'Grand Crossing',
 'Grant Park',
 'Greektown',
 'Humboldt Park',
 'Jackson Park',
 'Little Italy, UIC',
 'Little Village',
 'Magnificent Mile',
 'Mckinley Park',
 'Millenium Park',
 'Montclare',
 'Museum Campus',
 'Old Town',
 'Printers Row',
 'River North',
 'Rush & Division',
 'Sauganash,Forest Glen',
 'Sheffield & DePaul',
 'Streeterville',
 'Ukrainian Village',
 'United Center',
 'Washington Heights',
 'West Loop',
 'Wicker Park',
 'Wrigleyville'}

In [224]:
geolocator = Nominatim(user_agent="NMKG")
geolocator.geocode("Jackson Park Chicago")

Location(Jackson Park, Woodlawn, Chicago, Cook County, Illinois, United States of America, (41.78323175, -87.5804432439724, 0.0))

In [228]:
geolocator = Nominatim(user_agent="NMKG")
geolocator.geocode("River North Chicago")

Location(North Chicago, Lake County, Illinois, USA, (42.325578, -87.8411818, 0.0))

See that there are some areas that are actually in other regions but it is not necessarily a neighborhood. Gotta clean it somehow.

### Map with Crime