# Minneapolis Food Inspections - ETL

In [1]:
# Dependencies

import pandas as pd
import numpy as np
from sqlalchemy import create_engine 
from config import (user, password, host, port, database)

In [2]:
# Read CSV 

food_insp_df = pd.read_csv("Food_Inspections.csv")

In [3]:
# Drop unneeded columns
columns_to_drop = ['X', 'Y', 'HealthFacilityIDNumber',  'AddrNum', 'AddrNumSuffix',
       'StreetName', 'StreetType', 'PostDir', 'Unit', 'City', 'State',
       'County', 'ZipCode', 'APN', 'Ward', 'X_WebMercator', 'Y_WebMercator']

food_insp_df_clean = food_insp_df.drop(columns = columns_to_drop)
food_insp_df_clean.head()

Unnamed: 0,OBJECTID,FacilityCategory,BusinessName,RiskLevel,FullAddress,InspectionType,InspectionResult,DateOfInspection,InspectionIDNumber,FoodCodeText,...,ViolationPriority,FoodCodeItem,YearOfInspection,ViolationStatus,ViolationPoints,InspectionScore,ViolationIDNumber,Neighborhood,Latitude,Longitude
0,1,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,Develop written procedures prior to using tim...,...,Priority2,MN Rule 4626.0408A,2018,Observed,2,92,625815,Lowry Hill East,44.94885,-93.29663
1,2,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,When using time only as a public health contro...,...,Priority1,MN Rule 4626.0408B,2018,Observed,4,92,625817,Lowry Hill East,44.94885,-93.29663
2,3,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,Employ a certified food protection manager (CF...,...,Priority3,MN Rule 4626.0033A,2018,Observed,0,92,681981,Lowry Hill East,44.94885,-93.29663
3,4,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,Food packages must be in good condition and mu...,...,Priority2,MN Rule 4626.0190,2018,Observed,2,92,681984,Lowry Hill East,44.94885,-93.29663
4,5,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,Thaw TCS food by one of the following methods:...,...,Priority3,MN Rule 4626.0380ABC,2018,Observed,0,92,682062,Lowry Hill East,44.94885,-93.29663


In [4]:
# Drop any existing NA's
food_insp_df_clean.dropna(how = 'any', inplace = True)

In [9]:
# Drop non-restaurant facilities
restaurants_df = food_insp_df_clean.loc[food_insp_df_clean["FacilityCategory"] == "RESTAURANT"]
restaurants_df.head(20)

Unnamed: 0,OBJECTID,FacilityCategory,BusinessName,RiskLevel,FullAddress,InspectionType,InspectionResult,DateOfInspection,InspectionIDNumber,FoodCodeText,...,ViolationPriority,FoodCodeItem,YearOfInspection,ViolationStatus,ViolationPoints,InspectionScore,ViolationIDNumber,Neighborhood,Latitude,Longitude
1,2,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,When using time only as a public health contro...,...,Priority1,MN Rule 4626.0408B,2018,Observed,4,92,625817,Lowry Hill East,44.94885,-93.29663
2,3,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,Employ a certified food protection manager (CF...,...,Priority3,MN Rule 4626.0033A,2018,Observed,0,92,681981,Lowry Hill East,44.94885,-93.29663
3,4,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,Food packages must be in good condition and mu...,...,Priority2,MN Rule 4626.0190,2018,Observed,2,92,681984,Lowry Hill East,44.94885,-93.29663
4,5,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,Thaw TCS food by one of the following methods:...,...,Priority3,MN Rule 4626.0380ABC,2018,Observed,0,92,682062,Lowry Hill East,44.94885,-93.29663
5,6,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,Provide a sign or poster at all handwashing si...,...,Priority3,MN Rule 4626.1457,2019,Corrected on-site,0,100,730395,Lowry Hill East,44.94885,-93.29663
6,7,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,The handwashing sink must be accessible at all...,...,Priority2,MN Rule 4626.1110AB,2019,Corrected on-site,2,100,730396,Lowry Hill East,44.94885,-93.29663
7,8,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,Provide and maintain a supply of individual di...,...,Priority2,MN Rule 4626.1445,2019,Corrected on-site,2,100,730398,Lowry Hill East,44.94885,-93.29663
8,9,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,"Mark the refrigerated, ready-to-eat, TCS food ...",...,Priority2,MN Rule 4626.0400A,2019,Corrected on-site,2,100,730399,Lowry Hill East,44.94885,-93.29663
9,10,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,"Separate raw animal foods during storage, prep...",...,Priority1,MN Rule 4626.0235A(1),2019,Corrected on-site,4,100,730400,Lowry Hill East,44.94885,-93.29663
10,11,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,Properly label all working containers holding ...,...,Priority3,MN Rule 4626.0240,2019,Corrected on-site,0,100,730401,Lowry Hill East,44.94885,-93.29663


In [10]:
restaurants_df[["InspectionIDNumber", "DateOfInspection", "BusinessName", "YearOfInspection", "InspectionType", "InspectionScore"]].drop_duplicates("InspectionIDNumber").head(20)

Unnamed: 0,InspectionIDNumber,DateOfInspection,BusinessName,YearOfInspection,InspectionType,InspectionScore
1,204578,2018/07/18 13:50:00+00,SOTAROL UPTOWN,2018,Routine,92
5,254817,2019/07/18 11:45:00+00,SOTAROL UPTOWN,2019,Routine,100
15,279484,2019/11/08 12:00:00+00,CRISP & GREEN,2019,Routine,84
25,228096,2018/10/11 10:55:00+00,LUCY BAR & RESTAURANT LIMITED LIABILITY COMPANY,2018,Routine,82
38,275485,2019/10/11 13:35:00+00,LUCY BAR & RESTAURANT LIMITED LIABILITY COMPANY,2019,Routine,80
53,275486,2018/12/07 11:55:00+00,LUCY BAR & RESTAURANT LIMITED LIABILITY COMPANY,2018,Follow-Up,96
57,332891,2019/12/06 12:05:00+00,LUCY BAR & RESTAURANT LIMITED LIABILITY COMPANY,2019,Follow-Up,98
60,230069,2018/02/12 15:50:00+00,"DOUBLE DANGER, LLC",2018,Follow-Up,100
61,230070,2018/10/05 11:30:00+00,"DOUBLE DANGER, LLC",2018,Routine,94
65,242722,2018/04/17 12:00:00+00,"DOUBLE DANGER, LLC",2018,Follow-Up,100


In [11]:
restaurants_df.columns

Index(['OBJECTID', 'FacilityCategory', 'BusinessName', 'RiskLevel',
       'FullAddress', 'InspectionType', 'InspectionResult', 'DateOfInspection',
       'InspectionIDNumber', 'FoodCodeText', 'InspectorComments',
       'ViolationPriority', 'FoodCodeItem', 'YearOfInspection',
       'ViolationStatus', 'ViolationPoints', 'InspectionScore',
       'ViolationIDNumber', 'Neighborhood', 'Latitude', 'Longitude'],
      dtype='object')

In [13]:
# Drop details of inspections and violations, won't be needed
inspection_details = ['FoodCodeText', 'RiskLevel', 'InspectorComments',
       'ViolationPriority', 'FoodCodeItem', 'YearOfInspection',
       'ViolationStatus', 'ViolationPoints', 'ViolationIDNumber']

restaurants_df_clean = restaurants_df.drop(columns = inspection_details)
restaurants_df_clean.head(10)

Unnamed: 0,OBJECTID,FacilityCategory,BusinessName,FullAddress,InspectionType,InspectionResult,DateOfInspection,InspectionIDNumber,InspectionScore,Neighborhood,Latitude,Longitude
1,2,RESTAURANT,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,92,Lowry Hill East,44.94885,-93.29663
2,3,RESTAURANT,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,92,Lowry Hill East,44.94885,-93.29663
3,4,RESTAURANT,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,92,Lowry Hill East,44.94885,-93.29663
4,5,RESTAURANT,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,92,Lowry Hill East,44.94885,-93.29663
5,6,RESTAURANT,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,100,Lowry Hill East,44.94885,-93.29663
6,7,RESTAURANT,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,100,Lowry Hill East,44.94885,-93.29663
7,8,RESTAURANT,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,100,Lowry Hill East,44.94885,-93.29663
8,9,RESTAURANT,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,100,Lowry Hill East,44.94885,-93.29663
9,10,RESTAURANT,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,100,Lowry Hill East,44.94885,-93.29663
10,11,RESTAURANT,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,100,Lowry Hill East,44.94885,-93.29663


In [15]:
# Drop duplicate inspection ID numbers (each inspection has a unique ID)
restaurants_df_unique = restaurants_df_clean.drop_duplicates("InspectionIDNumber")
restaurants_df_unique.head(20)

Unnamed: 0,OBJECTID,FacilityCategory,BusinessName,FullAddress,InspectionType,InspectionResult,DateOfInspection,InspectionIDNumber,InspectionScore,Neighborhood,Latitude,Longitude
1,2,RESTAURANT,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,92,Lowry Hill East,44.94885,-93.29663
5,6,RESTAURANT,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,100,Lowry Hill East,44.94885,-93.29663
15,16,RESTAURANT,CRISP & GREEN,428 WASHINGTON AVE N,Routine,Complete,2019/11/08 12:00:00+00,279484,84,North Loop,44.98619,-93.27411
25,26,RESTAURANT,LUCY BAR & RESTAURANT LIMITED LIABILITY COMPANY,3915 HIAWATHA AVE,Routine,Complete,2018/10/11 10:55:00+00,228096,82,Howe,44.93168,-93.22639
38,39,RESTAURANT,LUCY BAR & RESTAURANT LIMITED LIABILITY COMPANY,3915 HIAWATHA AVE,Routine,Complete,2019/10/11 13:35:00+00,275485,80,Howe,44.93168,-93.22639
53,54,RESTAURANT,LUCY BAR & RESTAURANT LIMITED LIABILITY COMPANY,3915 HIAWATHA AVE,Follow-Up,Complete,2018/12/07 11:55:00+00,275486,96,Howe,44.93168,-93.22639
57,58,RESTAURANT,LUCY BAR & RESTAURANT LIMITED LIABILITY COMPANY,3915 HIAWATHA AVE,Follow-Up,Complete,2019/12/06 12:05:00+00,332891,98,Howe,44.93168,-93.22639
60,61,RESTAURANT,"DOUBLE DANGER, LLC",113 26TH ST E,Follow-Up,Complete,2018/02/12 15:50:00+00,230069,100,Whittier,44.95511,-93.27569
61,62,RESTAURANT,"DOUBLE DANGER, LLC",113 26TH ST E,Routine,Complete,2018/10/05 11:30:00+00,230070,94,Whittier,44.95511,-93.27569
65,66,RESTAURANT,"DOUBLE DANGER, LLC",113 26TH ST E,Follow-Up,Complete,2018/04/17 12:00:00+00,242722,100,Whittier,44.95511,-93.27569
