In [1]:
import csv
from collections import Counter, defaultdict

In [4]:
food = list(csv.DictReader(open('data/food-inspections.csv')))

In [5]:
food[0]

{'Inspection ID': '2352683',
 'DBA Name': 'CHIPOTLE MEXICAN GRILL',
 'AKA Name': 'CHIPOTLE MEXICAN GRILL',
 'License #': '2670642',
 'Facility Type': 'Restaurant',
 'Risk': 'Risk 1 (High)',
 'Address': '1025 W ADDISON ST ',
 'City': 'CHICAGO',
 'State': 'IL',
 'Zip': '60613',
 'Inspection Date': '2019-12-04T00:00:00.000',
 'Inspection Type': 'License Re-Inspection',
 'Results': 'Pass w/ Conditions',
 'Violations': '3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL EMPLOYEE; KNOWLEDGE, RESPONSIBILITIES AND REPORTING - Comments: FOUND NO EMPLOYEE HEALTH POLICY AT PREMISES. PRIORITY FOUNDATION. NO CITATION ISSUED. 7-38-010. MUST PROVIDE.',
 'Latitude': '41.94711774835235',
 'Longitude': '-87.6553811622124',
 'Location': "{'latitude': '-87.6553811622124', 'longitude': '41.94711774835235'}",
 'Historical Wards 2003-2015': '',
 'Zip Codes': '',
 'Community Areas': '',
 'Census Tracts': '',
 'Wards': ''}

In [6]:
len(food)

196825

In [7]:
# unique values of the 'Results' attribute
{ row['Results'] for row in food }

{'Business Not Located',
 'Fail',
 'No Entry',
 'Not Ready',
 'Out of Business',
 'Pass',
 'Pass w/ Conditions'}

In [8]:
fail = [ row for row in food if row['Results'] == 'Fail' ]

In [9]:
len(fail)

38087

In [10]:
worst = Counter(row['DBA Name'] for row in fail)
worst.most_common(5)

[('SUBWAY', 342),
 ('DUNKIN DONUTS', 209),
 ("MCDONALD'S", 112),
 ('7-ELEVEN', 61),
 ('MCDONALDS', 59)]

In [11]:
fail = [ { **row, 'DBA Name': row['DBA Name'].replace("'", '').upper() }
          for row in fail ]

In [12]:
worst = Counter(row['DBA Name'] for row in fail)
worst.most_common(5)

[('SUBWAY', 370),
 ('DUNKIN DONUTS', 228),
 ('MCDONALDS', 223),
 ('7-ELEVEN', 67),
 ('CHIPOTLE MEXICAN GRILL', 64)]

In [13]:
bad = Counter(row['Address'] for row in fail)
bad.most_common(5)

[('11601 W TOUHY AVE ', 340),
 ('2300 S THROOP ST ', 103),
 ('324 N LEAVITT ST ', 88),
 ('500 W MADISON ST ', 76),
 ('5700 S CICERO AVE ', 63)]

In [14]:
by_year = defaultdict(Counter)
for row in fail:
    by_year[row['Inspection Date'][:4]][row['Address']] += 1

In [15]:
by_year["2015"].most_common(5)

[('11601 W TOUHY AVE ', 39),
 ('500 W MADISON ST ', 12),
 ('307 S KEDZIE AVE ', 9),
 ('324 N LEAVITT ST ', 9),
 ('12 S MICHIGAN AVE ', 8)]

In [16]:
by_year["2019"].most_common(5)

[('11601 W TOUHY AVE ', 37),
 ('2300 S THROOP ST ', 24),
 ('2002 S WENTWORTH AVE ', 6),
 ('311 W MONROE ST ', 5),
 ('131 N CLINTON ST ', 5)]

In [17]:
airport = [ row for row in fail if row['Address'].startswith('11601 W TOUHY')]

In [18]:
{row['Address'] for row in airport }

{'11601 W TOUHY AVE ', '11601 W TOUHY AVE T2 F12'}

In [19]:
c = Counter(row['AKA Name'] for row in airport)
c.most_common(5)

[('MACARONI GRILL (T3-K2)', 12),
 ('ARGO TEA  (T3 ROTUNDA)', 9),
 ('TOCCO (T5 M-07)', 8),
 ('ARGO TEA (T2/E5)', 7),
 ("CHILI'S  TOO (T3-H2)", 7)]

In [27]:
violations = airport[1]["Violations"].split("|")
violations

['6. PROPER EATING, TASTING, DRINKING, OR TOBACCO USE - Comments: OBSERVED MALE FOOD EMPLOYEE DRINKING BEVERAGE OUT OF AN OPEN CUP WHILE ACTIVELY PREPPING FOODS IN REAR FOOD PREP AREA.FOOD EMPLOYEE BEVERAGE MUST BE CONSUMED IN A DESIGNATED AREA AND TO MAINTAIN. ',
 ' 37. FOOD PROPERLY LABELED; ORIGINAL CONTAINER - Comments: LARGE BULK CONTAINER OF FLOUR IN THE PIZZA PREP AREA IS NOT LABELED.MUST LABEL TO IDENTIFY FOOD STORED AND TO MAINTAIN. ',
 ' 38. INSECTS, RODENTS, & ANIMALS NOT PRESENT - Comments: OBSERVED LIVE FRUIT FLIES IN THE REAR PREP/DISH WASH AREA AND THE BAR AREA. APPROXIMATELY 50 LIVE FRUIT FLIES WERE OBSERVED ON THE WALLS, CEILING AND EQUIPMENT IN THESE AREAS.INSTRUCTED MANAGER TO REMOVE, TO CLEAN AND SANITIZE ALL AFFECTED AREAS IN DETAIL AND RECCOMEND TO CALL LICENSED PEST CONTROL COMPANY FOR SERVICE.PRIORITY FOUNDATION VIOLATION 7-38-020 (A), CITATION ISSUED.  ',
 ' 47. FOOD & NON-FOOD CONTACT SURFACES CLEANABLE, PROPERLY DESIGNED, CONSTRUCTED & USED - Comments: THE SA

In [39]:
[v[:v.find("- Comments")].strip() for v in violations]

['6. PROPER EATING, TASTING, DRINKING, OR TOBACCO USE',
 '37. FOOD PROPERLY LABELED; ORIGINAL CONTAINER',
 '38. INSECTS, RODENTS, & ANIMALS NOT PRESENT',
 '47. FOOD & NON-FOOD CONTACT SURFACES CLEANABLE, PROPERLY DESIGNED, CONSTRUCTED & USED',
 '49. NON-FOOD/FOOD CONTACT SURFACES CLEAN',
 '52. SEWAGE & WASTE WATER PROPERLY DISPOSED',
 '55. PHYSICAL FACILITIES INSTALLED, MAINTAINED & CLEAN']

In [44]:
all_violations = [row["Violations"].split("|") for row in airport]
c = Counter()
for violation in all_violations:
    for v in violations:
        c[v[:v.find("- Comments")].strip()] += 1
c

Counter({'6. PROPER EATING, TASTING, DRINKING, OR TOBACCO USE': 341,
         '37. FOOD PROPERLY LABELED; ORIGINAL CONTAINER': 341,
         '38. INSECTS, RODENTS, & ANIMALS NOT PRESENT': 341,
         '47. FOOD & NON-FOOD CONTACT SURFACES CLEANABLE, PROPERLY DESIGNED, CONSTRUCTED & USED': 341,
         '49. NON-FOOD/FOOD CONTACT SURFACES CLEAN': 341,
         '52. SEWAGE & WASTE WATER PROPERLY DISPOSED': 341,
         '55. PHYSICAL FACILITIES INSTALLED, MAINTAINED & CLEAN': 341})

In [50]:
c.most_common(5)

[('6. PROPER EATING, TASTING, DRINKING, OR TOBACCO USE', 341),
 ('37. FOOD PROPERLY LABELED; ORIGINAL CONTAINER', 341),
 ('38. INSECTS, RODENTS, & ANIMALS NOT PRESENT', 341),
 ('47. FOOD & NON-FOOD CONTACT SURFACES CLEANABLE, PROPERLY DESIGNED, CONSTRUCTED & USED',
  341),
 ('49. NON-FOOD/FOOD CONTACT SURFACES CLEAN', 341)]