# Minneapolis Food Inspections - ETL

In [23]:
# Dependencies

import pandas as pd
import numpy as np
from sqlalchemy import create_engine 
from config import (user, password, host, port, database)

In [24]:
# Read CSV 

food_insp_df = pd.read_csv("Food_Inspections.csv")

In [25]:
# Drop unneeded columns
columns_to_drop = ['X', 'Y', 'HealthFacilityIDNumber',  'AddrNum', 'AddrNumSuffix',
       'StreetName', 'StreetType', 'PostDir', 'Unit', 'City', 'State',
       'County', 'ZipCode', 'APN', 'Ward', 'X_WebMercator', 'Y_WebMercator']

food_insp_df_clean = food_insp_df.drop(columns = columns_to_drop)

In [26]:
# Drop any existing NA's
food_insp_df_clean.dropna(how = 'any', inplace = True)

In [27]:
# Drop non-restaurant facilities
restaurants_df = food_insp_df_clean.loc[food_insp_df_clean["FacilityCategory"] == "RESTAURANT"]
restaurants_df.head(20)

Unnamed: 0,OBJECTID,FacilityCategory,BusinessName,RiskLevel,FullAddress,InspectionType,InspectionResult,DateOfInspection,InspectionIDNumber,FoodCodeText,...,ViolationPriority,FoodCodeItem,YearOfInspection,ViolationStatus,ViolationPoints,InspectionScore,ViolationIDNumber,Neighborhood,Latitude,Longitude
1,2,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,When using time only as a public health contro...,...,Priority1,MN Rule 4626.0408B,2018,Observed,4,92,625817,Lowry Hill East,44.94885,-93.29663
2,3,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,Employ a certified food protection manager (CF...,...,Priority3,MN Rule 4626.0033A,2018,Observed,0,92,681981,Lowry Hill East,44.94885,-93.29663
3,4,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,Food packages must be in good condition and mu...,...,Priority2,MN Rule 4626.0190,2018,Observed,2,92,681984,Lowry Hill East,44.94885,-93.29663
4,5,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2018/07/18 13:50:00+00,204578,Thaw TCS food by one of the following methods:...,...,Priority3,MN Rule 4626.0380ABC,2018,Observed,0,92,682062,Lowry Hill East,44.94885,-93.29663
5,6,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,Provide a sign or poster at all handwashing si...,...,Priority3,MN Rule 4626.1457,2019,Corrected on-site,0,100,730395,Lowry Hill East,44.94885,-93.29663
6,7,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,The handwashing sink must be accessible at all...,...,Priority2,MN Rule 4626.1110AB,2019,Corrected on-site,2,100,730396,Lowry Hill East,44.94885,-93.29663
7,8,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,Provide and maintain a supply of individual di...,...,Priority2,MN Rule 4626.1445,2019,Corrected on-site,2,100,730398,Lowry Hill East,44.94885,-93.29663
8,9,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,"Mark the refrigerated, ready-to-eat, TCS food ...",...,Priority2,MN Rule 4626.0400A,2019,Corrected on-site,2,100,730399,Lowry Hill East,44.94885,-93.29663
9,10,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,"Separate raw animal foods during storage, prep...",...,Priority1,MN Rule 4626.0235A(1),2019,Corrected on-site,4,100,730400,Lowry Hill East,44.94885,-93.29663
10,11,RESTAURANT,SOTAROL UPTOWN,1,2935 GIRARD AVE S,Routine,Complete,2019/07/18 11:45:00+00,254817,Properly label all working containers holding ...,...,Priority3,MN Rule 4626.0240,2019,Corrected on-site,0,100,730401,Lowry Hill East,44.94885,-93.29663


In [28]:
# Drop details of inspections and violations, won't be needed, as well as Restaurant Category
inspection_details = ['FoodCodeText', 'FacilityCategory', 'RiskLevel', 'InspectorComments',
       'InspectionResult', 'ViolationPriority', 'FoodCodeItem', 'YearOfInspection',
       'ViolationStatus', 'ViolationPoints', 'ViolationIDNumber']

restaurants_df_clean = restaurants_df.drop(columns = inspection_details)
restaurants_df_clean.head(10)

Unnamed: 0,OBJECTID,BusinessName,FullAddress,InspectionType,DateOfInspection,InspectionIDNumber,InspectionScore,Neighborhood,Latitude,Longitude
1,2,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,2018/07/18 13:50:00+00,204578,92,Lowry Hill East,44.94885,-93.29663
2,3,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,2018/07/18 13:50:00+00,204578,92,Lowry Hill East,44.94885,-93.29663
3,4,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,2018/07/18 13:50:00+00,204578,92,Lowry Hill East,44.94885,-93.29663
4,5,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,2018/07/18 13:50:00+00,204578,92,Lowry Hill East,44.94885,-93.29663
5,6,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,2019/07/18 11:45:00+00,254817,100,Lowry Hill East,44.94885,-93.29663
6,7,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,2019/07/18 11:45:00+00,254817,100,Lowry Hill East,44.94885,-93.29663
7,8,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,2019/07/18 11:45:00+00,254817,100,Lowry Hill East,44.94885,-93.29663
8,9,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,2019/07/18 11:45:00+00,254817,100,Lowry Hill East,44.94885,-93.29663
9,10,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,2019/07/18 11:45:00+00,254817,100,Lowry Hill East,44.94885,-93.29663
10,11,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,2019/07/18 11:45:00+00,254817,100,Lowry Hill East,44.94885,-93.29663


In [29]:
# Drop duplicate inspection ID numbers (each inspection has a unique ID)
restaurants_df_unique = restaurants_df_clean.drop_duplicates("InspectionIDNumber")
restaurants_df_unique.head(20)

Unnamed: 0,OBJECTID,BusinessName,FullAddress,InspectionType,DateOfInspection,InspectionIDNumber,InspectionScore,Neighborhood,Latitude,Longitude
1,2,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,2018/07/18 13:50:00+00,204578,92,Lowry Hill East,44.94885,-93.29663
5,6,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,2019/07/18 11:45:00+00,254817,100,Lowry Hill East,44.94885,-93.29663
15,16,CRISP & GREEN,428 WASHINGTON AVE N,Routine,2019/11/08 12:00:00+00,279484,84,North Loop,44.98619,-93.27411
25,26,LUCY BAR & RESTAURANT LIMITED LIABILITY COMPANY,3915 HIAWATHA AVE,Routine,2018/10/11 10:55:00+00,228096,82,Howe,44.93168,-93.22639
38,39,LUCY BAR & RESTAURANT LIMITED LIABILITY COMPANY,3915 HIAWATHA AVE,Routine,2019/10/11 13:35:00+00,275485,80,Howe,44.93168,-93.22639
53,54,LUCY BAR & RESTAURANT LIMITED LIABILITY COMPANY,3915 HIAWATHA AVE,Follow-Up,2018/12/07 11:55:00+00,275486,96,Howe,44.93168,-93.22639
57,58,LUCY BAR & RESTAURANT LIMITED LIABILITY COMPANY,3915 HIAWATHA AVE,Follow-Up,2019/12/06 12:05:00+00,332891,98,Howe,44.93168,-93.22639
60,61,"DOUBLE DANGER, LLC",113 26TH ST E,Follow-Up,2018/02/12 15:50:00+00,230069,100,Whittier,44.95511,-93.27569
61,62,"DOUBLE DANGER, LLC",113 26TH ST E,Routine,2018/10/05 11:30:00+00,230070,94,Whittier,44.95511,-93.27569
65,66,"DOUBLE DANGER, LLC",113 26TH ST E,Follow-Up,2018/04/17 12:00:00+00,242722,100,Whittier,44.95511,-93.27569


In [30]:
# Rename and rearrange columns (OBJECTID is going to be our primary key column)
## Verify that OBJECTID has unique values
print(restaurants_df_unique.shape) # 4540 rows x 10 columns
print(restaurants_df_unique["OBJECTID"].nunique()) # Number of unique values in the OBJECTID column 

(4540, 10)
4540


In [32]:
## Rename the OBJECTID to id and rename rest of columns to snake case 
new_column_names = {"OBJECTID": "id", 
                    "BusinessName": "business_name",
                    "FullAddress": "address",
                    "InspectionType": "inspection_type",
                    "DateOfInspection": "inspection_date",
                    "InspectionScore": "inspection_score",
                    "Neighborhood": "neighborhood",
                    "Latitude": "latitude",
                    "Longitude": "longitude"}

restaurants_df_lower = restaurants_df_unique.rename(columns = new_column_names).drop(columns = "InspectionIDNumber")
restaurants_df_lower.head()

Unnamed: 0,id,business_name,address,inspection_type,inspection_date,inspection_score,neighborhood,latitude,longitude
1,2,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,2018/07/18 13:50:00+00,92,Lowry Hill East,44.94885,-93.29663
5,6,SOTAROL UPTOWN,2935 GIRARD AVE S,Routine,2019/07/18 11:45:00+00,100,Lowry Hill East,44.94885,-93.29663
15,16,CRISP & GREEN,428 WASHINGTON AVE N,Routine,2019/11/08 12:00:00+00,84,North Loop,44.98619,-93.27411
25,26,LUCY BAR & RESTAURANT LIMITED LIABILITY COMPANY,3915 HIAWATHA AVE,Routine,2018/10/11 10:55:00+00,82,Howe,44.93168,-93.22639
38,39,LUCY BAR & RESTAURANT LIMITED LIABILITY COMPANY,3915 HIAWATHA AVE,Routine,2019/10/11 13:35:00+00,80,Howe,44.93168,-93.22639


In [17]:
# Transform the inspection_date column to a date object
pd.to_datetime(inspection_date)

array(['Routine', 'Follow-Up'], dtype=object)