In [1]:
import numpy as np
import pandas as pd
import matplotlib as plt
import geopandas as gpd # conda install geopandas
import shapely
import folium # conda install -c conda-forge folium
import json
import pickle
import geopy
from geopy.extra.rate_limiter import RateLimiter
import re
import requests

### Import data
We import the dataset containing the food inspection and drop all irrelevant and null columns.

In [2]:
data = pd.read_csv('data/food-inspections.csv').drop(['AKA Name', 'City', 'State', "Historical Wards 2003-2015", "Zip Codes", "Community Areas", "Census Tracts", "Wards", "Location"], axis=1)
data.head()

Unnamed: 0,Inspection ID,DBA Name,License #,Facility Type,Risk,Address,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude
0,2320509,TAQUERIA EL DORADO,2694960.0,Restaurant,Risk 1 (High),2114 W LAWRENCE AVE,60625.0,2019-10-25T00:00:00.000,License Re-Inspection,Fail,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.96882,-87.682292
1,2320519,SALAM RESTAURANT,2002822.0,Restaurant,Risk 1 (High),4634-4636 N KEDZIE AVE,60625.0,2019-10-25T00:00:00.000,Complaint Re-Inspection,Pass,,41.965719,-87.708538
2,2320421,THE NEW GRACE RESTAURANT,2698310.0,Restaurant,Risk 1 (High),4409 N BROADWAY,60640.0,2019-10-24T00:00:00.000,License,Not Ready,,41.962104,-87.655204
3,2320368,GADS HILL CHILD CARE CENTER,2215799.0,Children's Services Facility,Risk 1 (High),2653 W OGDEN AVE,60608.0,2019-10-24T00:00:00.000,License,Pass,10. ADEQUATE HANDWASHING SINKS PROPERLY SUPPLI...,41.862273,-87.692703
4,2320389,"ALL THINGS ARE POSSIBLE FOR KIDS, INC.",2215965.0,Children's Services Facility,Risk 1 (High),4014 W CHICAGO AVE,60651.0,2019-10-24T00:00:00.000,License,Pass,54. GARBAGE & REFUSE PROPERLY DISPOSED; FACILI...,41.895468,-87.726665


# Complete the dataset

## Duplicates
Entries must be identified by their Inspection ID. Duplicates can simply be removed.

In [3]:
print("Number of duplicate rows: " + str(len(data[data.duplicated("Inspection ID")])))
data = data.drop_duplicates("Inspection ID")

Number of duplicate rows: 196


## Deal with missing values

In [4]:
for col in data.columns:
    print("Number of entries without " + col + ":" + str(len(data[data[col].apply(pd.isnull)])))

Number of entries without Inspection ID:0
Number of entries without DBA Name:0
Number of entries without License #:17
Number of entries without Facility Type:4770
Number of entries without Risk:72
Number of entries without Address:0
Number of entries without Zip:50
Number of entries without Inspection Date:0
Number of entries without Inspection Type:1
Number of entries without Results:0
Number of entries without Violations:51636
Number of entries without Latitude:682
Number of entries without Longitude:682


### License number

In [5]:
data[data['License #'].apply(pd.isnull)].sample(10)

Unnamed: 0,Inspection ID,DBA Name,License #,Facility Type,Risk,Address,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude
22844,2181316,ST. DEMETRIOS GREEK ORTHODOX CHURCH,,Special Event,Risk 2 (Medium),2727 W WINONA ST,60625.0,2018-06-13T00:00:00.000,Canvass,Pass,32. FOOD AND NON-FOOD CONTACT SURFACES PROPERL...,41.974653,-87.697529
154583,679787,GOD'S BATTLE AXE PRAYER ACADEMY,,CHURCH/DAY CARE,Risk 1 (High),6969 N WESTERN AVE,60645.0,2012-03-14T00:00:00.000,Canvass Re-Inspection,Fail,"9. WATER SOURCE: SAFE, HOT & COLD UNDER CITY P...",42.008303,-87.690005
154371,1214242,GOD'S BATTLE AXE PRAYER ACADEMY,,CHURCH/DAY CARE,Risk 1 (High),6969 N WESTERN AVE,60645.0,2012-03-19T00:00:00.000,Canvass Re-Inspection,Pass,,42.008303,-87.690005
65373,1933084,ST. DEMETRIOS GREEK ORTHODOX CHURCH,,Special Event,Risk 2 (Medium),2727 W WINONA ST,60625.0,2016-06-20T00:00:00.000,Canvass,Pass,38. VENTILATION: ROOMS AND EQUIPMENT VENTED AS...,41.974653,-87.697529
85255,1561809,ST. DEMETRIOS GREEK ORTHODOX CHURCH,,Special Event,Risk 2 (Medium),2727 W WINONA ST,60625.0,2015-08-04T00:00:00.000,Canvass,Pass,,41.974653,-87.697529
184679,339207,ST DEMETRIOS CHURCH,,Special Event,Risk 1 (High),2727 W WINONA ST,60625.0,2010-07-30T00:00:00.000,Special Events (Festivals),Pass,32. FOOD AND NON-FOOD CONTACT SURFACES PROPERL...,41.974653,-87.697529
113188,1152076,ARGENTINA FOODS,,Grocery Store,Risk 2 (Medium),4500 S WOOD ST,60609.0,2014-04-10T00:00:00.000,Canvass,Out of Business,,41.812105,-87.670072
165344,521659,ST. DEMETRIOS GREEK ORTHODOX CHURCH,,Special Event,Risk 2 (Medium),2727 W WINONA ST,60625.0,2011-08-10T00:00:00.000,Canvass,Pass,,41.974653,-87.697529
187278,229453,OLD ST. PATRICK'S CHURCH,,Church,Risk 2 (Medium),700 W ADAMS ST,60661.0,2010-06-08T00:00:00.000,Special Events (Festivals),Pass,33. FOOD AND NON-FOOD CONTACT EQUIPMENT UTENSI...,41.879367,-87.644105
110647,1459918,ST. DEMETRIOS GREEK ORTHODOX CHURCH,,Special Event,Risk 2 (Medium),2727 W WINONA ST,60625.0,2014-05-20T00:00:00.000,Canvass,Pass,33. FOOD AND NON-FOOD CONTACT EQUIPMENT UTENSI...,41.974653,-87.697529


### Facility Type

In [6]:
data[data['Facility Type'].apply(pd.isnull)].sample(10)

Unnamed: 0,Inspection ID,DBA Name,License #,Facility Type,Risk,Address,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude
180829,120618,MID WEST IMPORTS,4024.0,,Risk 3 (Low),1121 S CLINTON ST,60607.0,2010-10-07T00:00:00.000,Canvass,Out of Business,,41.868362,-87.64061
177915,470131,STANS DRIVE-IN,3587.0,,Risk 3 (Low),3001 S ARCHER AVE,60608.0,2010-12-03T00:00:00.000,Canvass,Out of Business,,41.83996,-87.661181
139165,1322307,NOHANA RESTAURANT,31170.0,,Risk 3 (Low),3136 N BROADWAY,60657.0,2013-01-03T00:00:00.000,Canvass,Out of Business,,41.938779,-87.644503
193417,197246,MCDONALD'S,1821171.0,,Risk 2 (Medium),36 W RANDOLPH ST,60601.0,2010-02-03T00:00:00.000,Out of Business,Fail,,41.884591,-87.629236
148809,1154435,"PRIMARY DAY CARE,INC",80877.0,,Risk 3 (Low),11439 S MICHIGAN AVE,60628.0,2012-06-29T00:00:00.000,Canvass,Out of Business,,41.685962,-87.62068
131552,1335657,MARATHON MIDWAY GAS,1879314.0,,Risk 3 (Low),4951 S CICERO AVE,60632.0,2013-05-20T00:00:00.000,Canvass,Out of Business,,41.802469,-87.742865
132491,1335331,SANA ATIQ TRADING INC.,1718425.0,,Risk 3 (Low),5200 N OTTO AVE,60656.0,2013-05-03T00:00:00.000,Canvass,Out of Business,,41.973228,-87.866958
149372,1229722,SUBWAY SANDWICHES & SALADS,61199.0,,Risk 3 (Low),5324 N BROADWAY,60640.0,2012-06-20T00:00:00.000,Canvass,Business Not Located,,41.978914,-87.660087
126055,920374,UNCLE'S JOE'S INC.,1379729.0,,Risk 3 (Low),8211 S COTTAGE GROVE AVE,60619.0,2013-08-26T00:00:00.000,Canvass,Out of Business,,41.745425,-87.604927
80640,1578836,THE REVIVAL,2423633.0,,Risk 3 (Low),1160 E 55TH ST,60615.0,2015-10-13T00:00:00.000,License,Fail,,41.79517,-87.597183


### Risk

In [7]:
data[data['Risk'].apply(pd.isnull)].sample(10)

Unnamed: 0,Inspection ID,DBA Name,License #,Facility Type,Risk,Address,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude
175968,491298,KANELA CAFE,2078723.0,,,3231 N CLARK ST,60657.0,2011-01-20T00:00:00.000,License,Fail,,41.940826,-87.651211
158082,659843,LAS BRISAS DEL SUR INC,2134071.0,Mobile Food Dispenser,,2637 S THROOP ST FL,60608.0,2012-01-03T00:00:00.000,License,Fail,,41.844671,-87.654962
7935,2288271,TAVERN ON THE POINT,2652668.0,,,6722-26 N NORTHWEST HWY,60631.0,2019-05-06T00:00:00.000,License,Not Ready,,42.003187,-87.817026
178059,469353,CHICAGO O'HARE GARDEN HOTEL,1380014.0,unlicensed facility,,8201 W HIGGINS RD,60631.0,2010-12-01T00:00:00.000,Consultation,Fail,,41.985652,-87.832152
187122,250904,FARMER'S MARKET,0.0,FARMER'S MARKET,,3252 N BROADWAY,60657.0,2010-06-10T00:00:00.000,Complaint,Fail,,41.94163,-87.644598
135810,1322185,CONGRESS THEATRE,2196827.0,,,2135 N MILWAUKEE AVE,60647.0,2013-03-07T00:00:00.000,License,Fail,,41.920055,-87.692462
176992,487855,DUNKIN DONUTS,0.0,,,2640 W DIVERSEY AVE,60647.0,2010-12-23T00:00:00.000,Complaint,Business Not Located,,41.932226,-87.694274
57724,1966438,BAMBU DESSERTS AND DRINKS,2458787.0,,,5010 N BROADWAY,60640.0,2016-10-13T00:00:00.000,License,Not Ready,,41.973068,-87.659926
110571,1459985,GREENLINE COFFEE,2333150.0,Restaurant,,501 E 61ST ST,60637.0,2014-05-21T00:00:00.000,License,Fail,,41.783819,-87.613189
186189,74520,TAMMY'S SWICHES & SALADS,2041764.0,,,1854 W 18TH ST BLDG,60618.0,2010-06-28T00:00:00.000,License,Fail,,41.857806,-87.673279


### Zip

In [8]:
data[data['Zip'].apply(pd.isnull)].sample(10)

Unnamed: 0,Inspection ID,DBA Name,License #,Facility Type,Risk,Address,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude
1287,2312540,CHICAGO COLLEGIATE CHARTER,3846104.0,School,Risk 1 (High),10909 S COTTAGE GROVE AVE,,2019-09-19T00:00:00.000,Canvass Re-Inspection,Fail,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.696087,-87.608945
124406,1115030,SPORT SERVICE SOLDIER FIELD /SUITE PANTRY C,1354323.0,Restaurant,Risk 2 (Medium),1410 S MUSEUM CAMPUS,,2013-09-19T00:00:00.000,Canvass,Out of Business,,41.862628,-87.615031
179803,335363,TISH'S TINY TOWN FOR TOTS CHI,1887746.0,,Risk 1 (High),2301 E 79TH ST,,2010-10-25T00:00:00.000,License,Fail,,41.751592,-87.568606
89695,1538026,FOSTER GROCERY AND DOLLAR PLUS,1992096.0,Grocery Store,Risk 3 (Low),3358 W FOSTER AVE,,2015-05-14T00:00:00.000,Canvass,Out of Business,,41.975801,-87.713499
116370,1396156,THINK SIMPLE FOODS,2308286.0,Restaurant,Risk 2 (Medium),141 W CHICAGO AVE,,2014-02-11T00:00:00.000,License Re-Inspection,Pass,,41.896504,-87.632784
123931,1106878,FILIPINO AMERICAN COUNCIL,2225427.0,Golden Diner,Risk 1 (High),1332 W DRIVING PARK ROAD BSMT,,2013-09-26T00:00:00.000,Canvass,Out of Business,,,
18421,2222692,LEO'S FOOD AND LIQUOR,2535068.0,Liquor,Risk 3 (Low),4471 W LAWRENCE AVE,,2018-09-21T00:00:00.000,License,Pass w/ Conditions,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.967993,-87.740726
20750,2200428,LEO'S FOOD AND LIQUOR,2535068.0,Liquor,Risk 3 (Low),4471 W LAWRENCE AVE,,2018-08-02T00:00:00.000,License,Not Ready,,41.967993,-87.740726
139613,1279336,UIC COLLEGE PREP,2225460.0,School,Risk 1 (High),1231 S DAMEN,,2012-12-18T00:00:00.000,Canvass,Pass,33. FOOD AND NON-FOOD CONTACT EQUIPMENT UTENSI...,41.8658,-87.676074
177274,483133,JOHNSON COLLEGE PREP HIGH SCHOOL,2353843.0,School,Risk 1 (High),6330 S STEWART ST,,2010-12-16T00:00:00.000,License,Pass,,41.778963,-87.635169


### Inspection Type

In [9]:
data[data['Inspection Type'].apply(pd.isnull)]

Unnamed: 0,Inspection ID,DBA Name,License #,Facility Type,Risk,Address,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude
63843,1946612,ARAMARL-FRESHII,2470318.0,Restaurant,Risk 1 (High),2650 N CLARK ST,60614.0,2016-07-14T00:00:00.000,,Pass,,41.930833,-87.644135


### Violations

In [10]:
data[data['Violations'].apply(pd.isnull)].sample(10)
data['Violations'] = data['Violations'].fillna('No data')

### Latitude and longitude
There are entries that are missing coordinate data, which is needed to put them on the map.
We see that all entries have address data, so we try to fill in the null values for the coordinates by using Geopy that uses geocoders to get coordinates based on addresses. Through testing we found that using ArcGIS as a geocoder gives us a perfect result.

In [55]:
locator = geopy.geocoders.ArcGIS(user_agent='myGeocoder')
data_wo_latlong = data[data['Latitude'].apply(np.isnan)].groupby("DBA Name").first()
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)
lats = pd.Series(index=data_wo_latlong.index)
longs = pd.Series(index=data_wo_latlong.index)
for name, address in list(zip(data_wo_latlong.index, data_wo_latlong['Address'])):
    if address == "":
        continue
    location = geocode(address + ", Chicago, United States")
    if location is not None:
        lats[name] = location.latitude
        longs[name] = location.longitude
    else:
        print('Did not find location for address: ' + address)

for name, lat, long in zip(lats.index, lats, longs):
    data['Latitude'] = np.where(data['DBA Name'] == name, lat, data['Latitude'])
    data['Longitude'] = np.where(data['DBA Name'] == name, long, data['Longitude'])

In [56]:
print("Number of entries without coordinate data: " + str(len(data[data["Latitude"].apply(pd.isnull)])))
data[data["Latitude"].apply(pd.isnull)]

Number of entries without coordinate data: 0


Unnamed: 0,Inspection ID,DBA Name,License #,Facility Type,Risk,Address,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude


# Projecting food inspections on a map
We want to be able to visualize our dataset on a map based on coordinates. This is done by using Folium.

In [11]:
data_names = data.groupby('DBA Name').first()
print(len(data_names))

27491


In [12]:
middle_lat = data_names.Latitude.median()
middle_lon = data_names.Longitude.median()
ch_map = folium.Map(location = [middle_lat, middle_lon], zoom_start=10)
locations = folium.map.FeatureGroup()
lats = data_names.Latitude
lons = data_names.Longitude
names = data_names.index


for lat, lon, name in list(zip(lats, lons, names))[0:100]:
    if np.isnan(lat) or np.isnan(lon):
        continue
    folium.Marker(
        location =  [lat, lon],
        tooltip = name,
        icon = folium.Icon(color='green', icon='info-sign')
    ).add_to(ch_map)
    
ch_map.add_child(locations)
ch_map

# Finding useful data in the Violations column
Violations seem to have the structure:
(&lt;violation number>. &lt;violation title> - &lt;violation comments> - | )+

According to https://data.cityofchicago.org/api/assets/BAD5301B-681A-4202-9D25-51B2CAE672FF, critical violations are numbered 1-14, and serious violations are numbered 15-29. If these are found and fixed during the inspection, the inspection will pass with conditions. Otherwise it will fail.

In [41]:
data['Violations'][4]

'54. GARBAGE & REFUSE PROPERLY DISPOSED; FACILITIES MAINTAINED - Comments: OBSERVED THE OUTSIDE ENCLOSED REFUSE AREA NOT MAINTAIN. INSTRUCTED MANAGER TO REMOVE AND CLEAN AREA TO PREVENT HARBORAGE. | 55. PHYSICAL FACILITIES INSTALLED, MAINTAINED & CLEAN - Comments: NOTED WATER STAINED CEILING TILES IN THE KIDS WASHROOM. MUST REPLACE AND MAINTAIN. | 56. ADEQUATE VENTILATION & LIGHTING; DESIGNATED AREAS USED - Comments: MUST CLEAN THE HOOD ABOVE THE COOKING EQUIPMENT TO REMOVE DUST.'

In [28]:
re_num = re.compile(r'([0-9]+)\.')
violation_numbers = data['Violations'].apply(re_num.findall).apply(lambda xs: list(map(lambda x: int(x), xs)))
critical_violations = range(1,14+1)
serious_violations = range(15, 29+1)
violation_numbers
# Maybe we can actually connect violation numbers to result "Out of business"
# See under "Under what conditions will a food establishment get shut down?" https://dchealth.dc.gov/service/understanding-food-establishment-inspections
# While this is for Washington DC, I think it could apply to us too
# Also see "Results" in https://data.cityofchicago.org/api/assets/BAD5301B-681A-4202-9D25-51B2CAE672FF

0                   [3, 5, 25, 50, 74, 53, 53, 56, 58]
1                                                   []
2                                                   []
3                                         [10, 55, 56]
4                                         [54, 55, 56]
                              ...                     
194899                                              []
194900                            [18, 32, 34, 35, 36]
194901                                              []
194902    [18, 19, 32, 33, 34, 35, 36, 37, 38, 39, 41]
194903                                              []
Name: Violations, Length: 194708, dtype: object

In [29]:
violations_severity = violation_numbers.apply(lambda xs: list(map(lambda x: 'critical' if x in critical_violations else 'serious' if x in serious_violations else 'non-serious', xs)))
violations_severity

0         [critical, critical, serious, non-serious, non...
1                                                        []
2                                                        []
3                      [critical, non-serious, non-serious]
4                   [non-serious, non-serious, non-serious]
                                ...                        
194899                                                   []
194900    [serious, non-serious, non-serious, non-seriou...
194901                                                   []
194902    [serious, serious, non-serious, non-serious, n...
194903                                                   []
Name: Violations, Length: 194708, dtype: object

# Supplementing the data

## Income by zip code (actually, anything by zip code if we want)
https://towardsdatascience.com/getting-census-data-in-5-easy-steps-a08eeb63995d

Attribution statement: This product uses the Census Bureau Data API but is not endorsed or certified by the Census Bureau.

Zip code 60666 is the zip code for O'Hare International Airport.
Zip code 60627 is the zip code for Golden Gate Park.
These do not have a household median income since nobody lives there.

In [None]:
income_by_zip_address = "https://api.census.gov/data/%s/acs/acs5?key=%s&get=%s&for=zip%%20code%%20tabulation%%20area:%s" 
year = "2017" # 2009-2017
api_key = "" # Request here: https://www.census.gov/developers/
income_variable = "B19013_001E"
zip_codes = data['Zip'].drop_duplicates()

for zip_code in zip_codes:
    if pd.isnull(zip_code):
        continue
        
    called_api = income_by_zip_address % (year, api_key, income_variable, int(zip_code))
    response = requests.get(called_api)
    if response.status_code == 200:
        formatted_response = json.loads(response.text)
        print(formatted_response)
    else:
        print('Could not find income for zip code: ' + str(int(zip_code)))
    # add to some list or something, also exception handling
    

# Information about columns

## Facility Type
From https://data.cityofchicago.org/api/assets/BAD5301B-681A-4202-9D25-51B2CAE672FF:

Type of facility: Each establishment is described by one of the following: bakery, banquet hall, candy store, caterer, coffee shop, day care center (for ages less than 2), day care center (for ages 2 – 6), day care center (combo, for ages less than 2 and 2 – 6 combined), gas station, Golden Diner, grocery store, hospital, long term care center(nursing home), liquor store, mobile food dispenser, restaurant, paleteria, school, shelter, tavern, social club, wholesaler, or Wrigley Field Rooftop. 

In [244]:
facility_types = data['Facility Type'].drop_duplicates().reset_index(drop=True).apply(str)
valid_facility_types = ['bakery', 'banquet hall', 'candy', 'caterer', 'coffee shop', 'daycare', 'gas station', 'golden diner', 'grocery store', 'hospital', 'long term', 'liquor store', 'mobile food', 'restaurant', 'paleteria', 'school', 'shelter', 'tavern', 'social club', 'wholesaler', 'wrigley field rooftop']
print(facility_types[facility_types.apply(str.lower).map(lambda x: any([y in x for y in valid_facility_types]))])
print(facility_types[facility_types.apply(str.lower).map(lambda x: not any([y in x for y in valid_facility_types]))])
# What to do? There are sooo many different facilities.

0                           Restaurant
2                        Grocery Store
3      Daycare Above and Under 2 Years
4                Daycare (2 - 6 Years)
6                 Mobile Food Preparer
                    ...               
481                       candy/gelato
482                        CANDY MAKER
484                      TAVERN/LIQUOR
485                              Candy
487                   WHOLESALE BAKERY
Name: Facility Type, Length: 145, dtype: object
1      Children's Services Facility
5                          Catering
7                            Liquor
10      CHILDRENS SERVICES FACILITY
11                              nan
                   ...             
479                    SOUP KITCHEN
480                    hooka lounge
483                       religious
486                       incubator
488                      Kids Cafe'
Name: Facility Type, Length: 344, dtype: object


## Inspection types
From https://data.cityofchicago.org/api/assets/BAD5301B-681A-4202-9D25-51B2CAE672FF:

Inspection type: An inspection can be one of the following types: 
* canvass, the most common type of inspection performed at a frequency relative to the risk of the establishment; 
* consultation, when the inspection is done at the request of the owner prior to the opening of the establishment; complaint, when  the inspection is done in response to a complaint against the establishment; 
* license, when the inspection is done as a requirement for the establishment to receive its license to operate; 
* suspect food poisoning, when the inspection is done in response to one or more persons claiming to have gotten ill as a result of eating at the establishment (a specific type of complaint-based inspection); 
* task-force inspection, when an inspection of a bar or tavern is done. 
Re-inspections can occur for most types of these inspections and are indicated as such.

In [226]:
inspection_types = data['Inspection Type'].drop_duplicates().reset_index(drop=True).apply(str)
valid_inspection_types = ['canvass', 'consultation', 'license', 'complaint', 'food poisoning', 'task']
pd.set_option('display.max_rows', 108)
print(inspection_types[inspection_types.apply(str.lower).map(lambda x: any([y in x for y in valid_inspection_types]))])
print(inspection_types[inspection_types.apply(str.lower).map(lambda x: not any([y in x for y in valid_inspection_types]))])
pd.reset_option('display.max_rows')

# What to do? We could manually annotate them since there are not too many. 
# We also need to figure out what a combination of types mean, for example "License-Task Force"
# Also, there are a few things that are not correctly classified using only keywords, for example "OWNER SUSPENDED OPERATION/LICENSE" will be classified as license

0                          License Re-Inspection
1                        Complaint Re-Inspection
2                                        License
3                                        Canvass
4                          Canvass Re-Inspection
6                                      Complaint
7                           Short Form Complaint
8                       Suspected Food Poisoning
9         Suspected Food Poisoning Re-inspection
11                                  Consultation
15                                fire complaint
19                            License-Task Force
20                  Complaint-Fire Re-inspection
21                                Complaint-Fire
22                     Short Form Fire-Complaint
26                     LICENSE CANCELED BY OWNER
27             OWNER SUSPENDED OPERATION/LICENSE
28                          LICENSE CONSULTATION
29                          License consultation
30                        Task Force Liquor 1475
34                  

## Results
From https://data.cityofchicago.org/api/assets/BAD5301B-681A-4202-9D25-51B2CAE672FF:

An inspection can pass, pass with conditions or fail. Establishments receiving a ‘pass’ were found to have no critical or serious violations (violation number 1-14 and 15-29, respectively). Establishments receiving a ‘pass with conditions’ were found to have critical or serious violations, but these were corrected during the inspection. Establishments receiving a ‘fail’ were found to have critical or serious violations that were not correctable during the inspection. An establishment receiving a ‘fail’ does not necessarily mean the establishment’s licensed is suspended. Establishments found to be out of business or not located are indicated as such. 

In [49]:
results = data['Results'].drop_duplicates().reset_index(drop=True).apply(str)
valid_results = ['pass','pass w/ conditions','fail','out of business', 'business not located']
print(results[results.apply(str.lower).map(lambda x: any([y == x for y in valid_results]))])
print(results[results.apply(str.lower).map(lambda x: not any([y == x for y in valid_results]))])
# Easy to add some more categories

0                    Fail
1                    Pass
3      Pass w/ Conditions
5         Out of Business
6    Business Not Located
Name: Results, dtype: object
2    Not Ready
4     No Entry
Name: Results, dtype: object


In [169]:
# chicago_map = gpd.read_file('data/chicago_zip.shp') # https://www.kaggle.com/threadid/chicago-shape-files

In [170]:
# ax = chicago_map.plot()
# g_data.plot(ax=ax, color='red')