In [1]:
import numpy as np
import pandas as pd
import matplotlib as plt
import geopandas as gpd # conda install geopandas
import shapely
import folium # conda install -c conda-forge folium
import json
import pickle
import geopy
from geopy.extra.rate_limiter import RateLimiter

### Import data
We import the dataset containing the food inspection and drop all irrelevant and null columns.

In [65]:
data = pd.read_csv('data/food-inspections.csv').drop(['AKA Name', 'City', 'State', "Historical Wards 2003-2015", "Zip Codes", "Community Areas", "Census Tracts", "Wards", "Location"], axis=1)
data.head()

Unnamed: 0,Inspection ID,DBA Name,License #,Facility Type,Risk,Address,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude
0,2320509,TAQUERIA EL DORADO,2694960.0,Restaurant,Risk 1 (High),2114 W LAWRENCE AVE,60625.0,2019-10-25T00:00:00.000,License Re-Inspection,Fail,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.96882,-87.682292
1,2320519,SALAM RESTAURANT,2002822.0,Restaurant,Risk 1 (High),4634-4636 N KEDZIE AVE,60625.0,2019-10-25T00:00:00.000,Complaint Re-Inspection,Pass,,41.965719,-87.708538
2,2320421,THE NEW GRACE RESTAURANT,2698310.0,Restaurant,Risk 1 (High),4409 N BROADWAY,60640.0,2019-10-24T00:00:00.000,License,Not Ready,,41.962104,-87.655204
3,2320368,GADS HILL CHILD CARE CENTER,2215799.0,Children's Services Facility,Risk 1 (High),2653 W OGDEN AVE,60608.0,2019-10-24T00:00:00.000,License,Pass,10. ADEQUATE HANDWASHING SINKS PROPERLY SUPPLI...,41.862273,-87.692703
4,2320389,"ALL THINGS ARE POSSIBLE FOR KIDS, INC.",2215965.0,Children's Services Facility,Risk 1 (High),4014 W CHICAGO AVE,60651.0,2019-10-24T00:00:00.000,License,Pass,54. GARBAGE & REFUSE PROPERLY DISPOSED; FACILI...,41.895468,-87.726665


## Complete the dataset

In [66]:
for col in data.columns:
    print("Number of entries without " + col + ":" + str(len(data[data[col].apply(pd.isnull)])))

Number of entries without Inspection ID:0
Number of entries without DBA Name:0
Number of entries without License #:17
Number of entries without Facility Type:4774
Number of entries without Risk:72
Number of entries without Address:0
Number of entries without Zip:50
Number of entries without Inspection Date:0
Number of entries without Inspection Type:1
Number of entries without Results:0
Number of entries without Violations:51682
Number of entries without Latitude:683
Number of entries without Longitude:683


### License number

In [67]:
data[data['License #'].apply(pd.isnull)].sample(10)

Unnamed: 0,Inspection ID,DBA Name,License #,Facility Type,Risk,Address,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude
113188,1152076,ARGENTINA FOODS,,Grocery Store,Risk 2 (Medium),4500 S WOOD ST,60609.0,2014-04-10T00:00:00.000,Canvass,Out of Business,,41.812105,-87.670072
187278,229453,OLD ST. PATRICK'S CHURCH,,Church,Risk 2 (Medium),700 W ADAMS ST,60661.0,2010-06-08T00:00:00.000,Special Events (Festivals),Pass,33. FOOD AND NON-FOOD CONTACT EQUIPMENT UTENSI...,41.879367,-87.644105
165344,521659,ST. DEMETRIOS GREEK ORTHODOX CHURCH,,Special Event,Risk 2 (Medium),2727 W WINONA ST,60625.0,2011-08-10T00:00:00.000,Canvass,Pass,,41.974653,-87.697529
156097,670328,GOD'S BATTLE AXE PRAYER ACADEMY,,CHURCH/DAY CARE,Risk 1 (High),6969 N WESTERN AVE,60645.0,2012-02-09T00:00:00.000,Canvass,Fail,"11. ADEQUATE NUMBER, CONVENIENT, ACCESSIBLE, D...",42.008303,-87.690005
154371,1214242,GOD'S BATTLE AXE PRAYER ACADEMY,,CHURCH/DAY CARE,Risk 1 (High),6969 N WESTERN AVE,60645.0,2012-03-19T00:00:00.000,Canvass Re-Inspection,Pass,,42.008303,-87.690005
39837,2071910,ST. DEMETRIOS GREEK ORTHODOX CHURCH,,Special Event,Risk 2 (Medium),2727 W WINONA ST,60625.0,2017-08-04T00:00:00.000,Canvass,Pass,32. FOOD AND NON-FOOD CONTACT SURFACES PROPERL...,41.974653,-87.697529
22844,2181316,ST. DEMETRIOS GREEK ORTHODOX CHURCH,,Special Event,Risk 2 (Medium),2727 W WINONA ST,60625.0,2018-06-13T00:00:00.000,Canvass,Pass,32. FOOD AND NON-FOOD CONTACT SURFACES PROPERL...,41.974653,-87.697529
151045,1184330,OLD ST. PATRICK'S CHURCH,,Church,Risk 2 (Medium),700 W ADAMS ST,60661.0,2012-05-22T00:00:00.000,Special Events (Festivals),Pass,"34. FLOORS: CONSTRUCTED PER CODE, CLEANED, GOO...",41.879367,-87.644105
6403,2290863,ST. DEMETRIOS GREEK ORTHODOX CHURCH,,Special Event,Risk 2 (Medium),2727 W WINONA ST,60625.0,2019-06-04T00:00:00.000,Canvass,Pass w/ Conditions,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.974653,-87.697529
154583,679787,GOD'S BATTLE AXE PRAYER ACADEMY,,CHURCH/DAY CARE,Risk 1 (High),6969 N WESTERN AVE,60645.0,2012-03-14T00:00:00.000,Canvass Re-Inspection,Fail,"9. WATER SOURCE: SAFE, HOT & COLD UNDER CITY P...",42.008303,-87.690005


### Facility Type

In [68]:
data[data['Facility Type'].apply(pd.isnull)].sample(10)

Unnamed: 0,Inspection ID,DBA Name,License #,Facility Type,Risk,Address,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude
181413,413970,BONEY-BONES BBQ,1717032.0,,Risk 3 (Low),5620 S ASHLAND AVE,60636.0,2010-09-28T00:00:00.000,Canvass,Out of Business,,41.791549,-87.664668
174271,88692,PROVECHO,1868064.0,,Risk 3 (Low),5527-5531 N MILWAUKEE AVE,60630.0,2011-02-25T00:00:00.000,Canvass,Out of Business,,41.98235,-87.77366
188054,250230,ALAN'S GROCERY,1937255.0,,Risk 2 (Medium),202 N CALIFORNIA AVE,60612.0,2010-05-24T00:00:00.000,Out of Business,Pass,,41.884369,-87.696526
189714,63320,SPARRER SAUSAGE CO,4040.0,,Risk 3 (Low),4325 W OGDEN AVE,60623.0,2010-04-22T00:00:00.000,Canvass,Fail,,41.848356,-87.732128
179036,456367,LITTLE CAESAR PIZZA #1726,1915444.0,,Risk 2 (Medium),609 E 79TH ST,60619.0,2010-11-08T00:00:00.000,Canvass,Out of Business,,41.751097,-87.609683
181126,414077,EL COQUI RESTAURANT,1771894.0,,Risk 1 (High),4612 S ASHLAND AVE,60609.0,2010-10-01T00:00:00.000,Canvass,Out of Business,,41.810015,-87.665157
142412,1286155,STUDIO 69 CAFE,1847813.0,,Risk 1 (High),2366-2370 N NEVA AVE,60707.0,2012-10-26T00:00:00.000,Canvass,Out of Business,,41.92301,-87.804536
119773,1391290,MINI MART,1576046.0,,Risk 3 (Low),414 N CLARK ST,60610.0,2013-11-27T00:00:00.000,Canvass,Out of Business,,41.88976,-87.631221
76905,1591435,YOU-IN-WIRELESS,1517083.0,,Risk 3 (Low),328 E 43RD ST,60653.0,2015-12-09T00:00:00.000,Canvass,Out of Business,,41.816718,-87.618511
100852,1447538,I DREAM OF FALAFEL,2032750.0,,Risk 1 (High),329 S FRANKLIN ST,,2014-10-23T00:00:00.000,Canvass,Business Not Located,,41.877397,-87.635043


### Risk

In [69]:
data[data['Risk'].apply(pd.isnull)].sample(10)

Unnamed: 0,Inspection ID,DBA Name,License #,Facility Type,Risk,Address,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude
155833,670575,RESPECT FOR LIFE,0.0,Restaurant,,725 W 79TH ST,60620.0,2012-02-16T00:00:00.000,Pre-License Consultation,Fail,,41.750553,-87.642674
159299,537746,SOUTH SHORE TOURISUM CENTER,0.0,,,1813 E 71ST ST,60649.0,2011-12-02T00:00:00.000,Complaint,Business Not Located,,41.76592,-87.580767
69313,1763609,THE INDIAN GARDEN RESTAURANT,2458737.0,,,700 E GRAND AVE,60611.0,2016-04-22T00:00:00.000,License,Not Ready,,41.892249,-87.609518
177961,469702,NICOLES SWEET SWEET TOUTH,0.0,,,11946 S HALSTED ST,60628.0,2010-12-02T00:00:00.000,Complaint,Fail,,41.676421,-87.642008
158082,659843,LAS BRISAS DEL SUR INC,2134071.0,Mobile Food Dispenser,,2637 S THROOP ST FL,60608.0,2012-01-03T00:00:00.000,License,Fail,,41.844671,-87.654962
17831,2229372,RICE THAI CAFE,2589016.0,,,211`7 W FARRAGUT AVE,60625.0,2018-10-04T00:00:00.000,License,Business Not Located,,,
1990,2311745,MOJO 33 NORTH LASALLE LLC,2689550.0,Restaurant,,33 N LA SALLE ST,60602.0,2019-09-04T00:00:00.000,License,Not Ready,,41.882798,-87.632242
192096,58348,UNIVERSITY OF AESTHETICS/TOUCH CLINIC,0.0,,,1357 N MILWAUKEE AVE,60622.0,2010-03-04T00:00:00.000,Short Form Complaint,Pass,,41.906596,-87.671323
7935,2288271,TAVERN ON THE POINT,2652668.0,,,6722-26 N NORTHWEST HWY,60631.0,2019-05-06T00:00:00.000,License,Not Ready,,42.003187,-87.817026
179474,428391,BEST SUBS,0.0,,,2653 N WASTENAW AVE BLDG,60647.0,2010-10-29T00:00:00.000,Complaint,Business Not Located,,,


### Zip

In [70]:
data[data['Zip'].apply(pd.isnull)].sample(10)

Unnamed: 0,Inspection ID,DBA Name,License #,Facility Type,Risk,Address,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude
84837,1562130,PARKWEST LIQUORS AND SMOKE SHOP,2354157.0,Liquor,Risk 3 (Low),2570 N LINCOLN AVE,,2015-08-12T00:00:00.000,License,Fail,12. HAND WASHING FACILITIES: WITH SOAP AND SAN...,41.928597,-87.653388
174650,525229,GOLDEN HOUSE RESTAURANT,1443483.0,Restaurant,Risk 1 (High),4742-44 N RACINE AVE,,2011-02-17T00:00:00.000,Canvass Re-Inspection,Pass,"30. FOOD IN ORIGINAL CONTAINER, PROPERLY LABEL...",41.968491,-87.659816
113626,1464217,DUNKIN DONUTS,1515116.0,Restaurant,Risk 2 (Medium),7545 N PAULINA ST,,2014-04-02T00:00:00.000,Canvass,Out of Business,,42.019032,-87.673459
30229,2135962,PERSPECTIVES CHARTER SCHOOL,2225546.0,School,Risk 1 (High),1930 S ARCHER AVE,,2018-01-25T00:00:00.000,Canvass,Out of Business,,41.856033,-87.628528
182422,413369,I DREAM OF FALAFEL,2032750.0,,Risk 1 (High),329 S FRANKLIN ST,,2010-09-13T00:00:00.000,License,Business Not Located,,41.877397,-87.635043
139613,1279336,UIC COLLEGE PREP,2225460.0,School,Risk 1 (High),1231 S DAMEN,,2012-12-18T00:00:00.000,Canvass,Pass,33. FOOD AND NON-FOOD CONTACT EQUIPMENT UTENSI...,41.8658,-87.676074
100852,1447538,I DREAM OF FALAFEL,2032750.0,,Risk 1 (High),329 S FRANKLIN ST,,2014-10-23T00:00:00.000,Canvass,Business Not Located,,41.877397,-87.635043
174875,525226,THE LUNCH MACHINE INC,2079077.0,Mobile Food Dispenser,Risk 2 (Medium),1319 W WILSON AVE,,2011-02-15T00:00:00.000,License,Fail,,41.965212,-87.66326
124446,1115029,SPORT SERVICE SOLDIER FIELD,1354323.0,Restaurant,Risk 2 (Medium),1410 S MUSEUM CAMPUS DR.,,2013-09-19T00:00:00.000,Canvass,Out of Business,,41.862628,-87.615031
174129,525238,THE LUNCH MACHINE INC,2079077.0,Mobile Food Dispenser,Risk 2 (Medium),1319 W WILSON AVE,,2011-03-01T00:00:00.000,License Re-Inspection,Fail,,41.965212,-87.66326


### Inspection Type

In [71]:
data[data['Inspection Type'].apply(pd.isnull)]

Unnamed: 0,Inspection ID,DBA Name,License #,Facility Type,Risk,Address,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude
63843,1946612,ARAMARL-FRESHII,2470318.0,Restaurant,Risk 1 (High),2650 N CLARK ST,60614.0,2016-07-14T00:00:00.000,,Pass,,41.930833,-87.644135


### Violations

In [72]:
data[data['Violations'].apply(pd.isnull)].sample(10)

Unnamed: 0,Inspection ID,DBA Name,License #,Facility Type,Risk,Address,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude
1656,2312118,ROTI MODERN MEDITERRANEAN,2550110.0,Restaurant,Risk 1 (High),624 W LAKE ST,60661.0,2019-09-11T00:00:00.000,Canvass,No Entry,,41.88581,-87.643907
44331,2050952,M.W. PRINCE HALL GRAND LODGE,2424043.0,Restaurant,Risk 3 (Low),809 E 42ND PL,60653.0,2017-05-23T00:00:00.000,License,Fail,,41.817842,-87.606302
40206,2071628,KYUS,2451473.0,Restaurant,Risk 1 (High),939 N ASHLAND AVE,60622.0,2017-07-31T00:00:00.000,Canvass,Out of Business,,41.89908,-87.667228
162578,521786,Roscoe Village Market,1249271.0,,Risk 3 (Low),3533 N WESTERN AVE,60618.0,2011-10-03T00:00:00.000,Canvass,Out of Business,,41.945912,-87.688088
144045,1134598,La Fette,1332302.0,,Risk 3 (Low),163 W NORTH AVE,60610.0,2012-09-26T00:00:00.000,Canvass,Out of Business,,41.911052,-87.634194
179583,420197,NIGHTWOOD,1869813.0,Restaurant,Risk 1 (High),2119 S HALSTED ST,60608.0,2010-10-28T00:00:00.000,Complaint,Fail,,41.853544,-87.646372
42003,2065093,LIL EINSTEIN'S INSTITUTE FOR SCHOLARS,2216110.0,Children's Services Facility,Risk 1 (High),1854 S RACINE AVE,60608.0,2017-06-27T00:00:00.000,License Re-Inspection,Pass,,41.856321,-87.656524
56656,1970346,PORTILLO'S HOT DOGS,2433064.0,Restaurant,Risk 1 (High),520 W TAYLOR ST,60607.0,2016-10-27T00:00:00.000,Short Form Complaint,Pass,,41.869856,-87.64027
33375,1956015,ANNETTE'S ITALIAN ICE,2088056.0,Restaurant,Risk 2 (Medium),2009 N BISSELL ST,60614.0,2017-11-20T00:00:00.000,Canvass,No Entry,,41.918442,-87.652015
164728,614427,PROTEIN BAR,2092960.0,Restaurant,Risk 1 (High),10 S CLINTON ST,60661.0,2011-08-22T00:00:00.000,License Re-Inspection,Pass,,41.881559,-87.641331


### Latitude and longitude
There are entries that are missing coordinate data, which is needed to put them on the map.
We see that all entries have address data, so we try to fill in the null values for the coordinates by using Geopy that uses geocoders to get coordinates based on addresses. Through testing we found that using ArcGIS as a geocoder gives us a sufficiently good result.

In [76]:
locator = geopy.geocoders.ArcGIS(user_agent='myGeocoder')
data_wo_latlong = data[data['Latitude'].apply(np.isnan)].groupby("DBA Name").first()
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)
lats = pd.Series(index=data_wo_latlong.index)
longs = pd.Series(index=data_wo_latlong.index)
for name, address in list(zip(data_wo_latlong.index, data_wo_latlong['Address'])):
    if location == "":
        continue
    location = geocode(address + ", Chicago, United States")
    if location is not None:
        lats[name] = location.latitude
        longs[name] = location.longitude
    else:
        print('Did not find location for address: ' + address)

for name, lat, long in zip(lats.index, lats, longs):
    data['Latitude'] = np.where(data['DBA Name'] == name, lat, data['Latitude'])
    data['Longitude'] = np.where(data['DBA Name'] == name, long, data['Longitude'])


RateLimiter caught an error, retrying (0/2 tries). Called with (*('9513 S RIDGELAND AVE STE 3E, Chicago, United States',), **{}).
Traceback (most recent call last):
  File "/home/widmark/Anaconda3/lib/python3.7/site-packages/geopy/geocoders/base.py", line 355, in _call_geocoder
    page = requester(req, timeout=timeout, **kwargs)
  File "/home/widmark/Anaconda3/lib/python3.7/urllib/request.py", line 525, in open
    response = self._open(req, data)
  File "/home/widmark/Anaconda3/lib/python3.7/urllib/request.py", line 543, in _open
    '_open', req)
  File "/home/widmark/Anaconda3/lib/python3.7/urllib/request.py", line 503, in _call_chain
    result = func(*args)
  File "/home/widmark/Anaconda3/lib/python3.7/urllib/request.py", line 1360, in https_open
    context=self._context, check_hostname=self._check_hostname)
  File "/home/widmark/Anaconda3/lib/python3.7/urllib/request.py", line 1320, in do_open
    r = h.getresponse()
  File "/home/widmark/Anaconda3/lib/python3.7/http/client.py"

Did not find location for address: 9513 S RIDGELAND AVE STE 3E


In [77]:
print("Number of entries without coordinate data: " + str(len(data[data["Latitude"].apply(pd.isnull)])))

Number of entries without coordinate data: 1


## Projecting food inspections on a map
We want to be able to visualize our dataset on a map based on coordinates. This is done by using Folium.

In [84]:
data_names = data.groupby('DBA Name').first()
print(len(data_names))

27491


In [87]:
middle_lat = data_names.Latitude.median()
middle_lon = data_names.Longitude.median()
ch_map = folium.Map(location = [middle_lat, middle_lon], zoom_start=10)
locations = folium.map.FeatureGroup()
lats = data_names.Latitude
lons = data_names.Longitude
names = data_names.index


for lat, lon, name in list(zip(lats, lons, names))[0:100]:
    if np.isnan(lat) or np.isnan(lon):
        continue
    folium.Marker(
        location =  [lat, lon],
        tooltip = name,
        icon = folium.Icon(color='red', icon='info-sign')
    ).add_to(ch_map)
    
ch_map.add_child(locations)
ch_map

In [59]:
g_data = gpd.GeoDataFrame(data)
g_data = g_data.set_geometry([shapely.geometry.Point(x, y) for x, y in zip(data.Longitude, data.Latitude)])
g_data.head()

In [None]:
# chicago_map = gpd.read_file('data/chicago_zip.shp') # https://www.kaggle.com/threadid/chicago-shape-files

In [None]:
# ax = chicago_map.plot()
# g_data.plot(ax=ax, color='red')