In [113]:
from IPython import display
import requests
import json
from sodapy import Socrata
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import warnings
import time
warnings.filterwarnings("ignore")
pd.set_option('display.max_colwidth',100)
pd.set_option('display.max_columns',50)

from sklearn.preprocessing import OneHotEncoder


# Introduction

According to the CDC, more than 48 million Americans per year become sick from food, and an estimated 75% of the outbreaks came from food prepared by caterers, delis, and restaurants. In most cities, health inspections are generally random, which can increase time spent on spot checks at clean restaurants that have been following the rules closely — and missed opportunities to improve health and hygiene at places with more pressing food safety issues.

The goal for this project is to leverage public citizen generated data from social media to narrow the search for critical health and safety violations in New York City. As the City of New York manages  an open data portal, everyone can access historical hygiene inspections and violation records. By combine these two data source this project aims to determine which words, phrases, ratings, and patterns among restaurants lead to critical health and safety violations. This model can assist city health inspectors do their job better by prioritizing the kitchens most likely to be in violation of code.

# Obtain

This project requires data pulled from two different sources, the City of New York and Yelp. To obtain the data we will call the API keys.

## NYC Open Data Portal

In [114]:
doh = pd.read_csv('data/DOHMH_New_York_City_Restaurant_Inspection_Results.csv')
doh

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,INSPECTION DATE,ACTION,VIOLATION CODE,VIOLATION DESCRIPTION,CRITICAL FLAG,SCORE,GRADE,GRADE DATE,RECORD DATE,INSPECTION TYPE,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA
0,50008319,POULETTE,Manhattan,790,9 AVENUE,10019.0,2129569488,Chicken,01/28/2019,Violations were cited in the following area(s).,04D,"Food worker does not wash hands thoroughly after using the toilet, coughing, sneezing, smoking, ...",Critical,22,,,03/24/2022,Cycle Inspection / Initial Inspection,40.765040,-73.987795,104.0,3.0,13300.0,1025235.0,1.010430e+09,MN15
1,41678734,TINY'S DINER,Bronx,3603,RIVERDALE AVENUE,10463.0,7187087600,American,09/23/2019,Violations were cited in the following area(s).,04M,Live roaches present in facility's food and/or non-food areas.,Critical,24,,,03/24/2022,Cycle Inspection / Initial Inspection,40.886691,-73.907056,208.0,11.0,29500.0,2084186.0,2.057960e+09,BX29
2,40795021,TAO RESTAURANT,Manhattan,42,EAST 58 STREET,10022.0,2128882288,Asian/Asian Fusion,03/11/2020,Violations were cited in the following area(s).,10F,Non-food contact surface improperly constructed. Unacceptable material used. Non-food contact su...,Not Critical,9,,,03/24/2022,Cycle Inspection / Initial Inspection,40.762786,-73.971486,105.0,4.0,11202.0,1036073.0,1.012930e+09,MN17
3,40795021,TAO RESTAURANT,Manhattan,42,EAST 58 STREET,10022.0,2128882288,Asian/Asian Fusion,03/11/2020,Violations were cited in the following area(s).,10F,Non-food contact surface improperly constructed. Unacceptable material used. Non-food contact su...,Not Critical,9,,,03/24/2022,Cycle Inspection / Initial Inspection,40.762786,-73.971486,105.0,4.0,11202.0,1036073.0,1.012930e+09,MN17
4,40538234,"DUNKIN',' BASKIN ROBBINS",Queens,9925,HORACE HARDING EXPRESSWAY,11368.0,7182719222,Donuts,07/29/2021,Violations were cited in the following area(s).,06E,"Sanitized equipment or utensil, including in-use food dispensing utensil, improperly used or sto...",Critical,10,A,07/29/2021,03/24/2022,Cycle Inspection / Initial Inspection,40.736185,-73.858052,404.0,21.0,43702.0,4047915.0,4.019480e+09,QN25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186222,41556790,FIVE GUYS FAMOUS BURGERS AND FRIES,Manhattan,2847,BROADFWAY,,2126787701,Hamburgers,03/01/2019,Violations were cited in the following area(s).,10F,Non-food contact surface improperly constructed. Unacceptable material used. Non-food contact su...,Not Critical,3,A,03/01/2019,03/24/2022,Cycle Inspection / Initial Inspection,0.000000,0.000000,,,,,1.000000e+00,
186223,50032876,TANNER SMITH'S,Manhattan,204,WEST 55 STREET,10019.0,9175172283,American,06/07/2019,Violations were cited in the following area(s).,08A,Facility not vermin proof. Harborage or conditions conducive to attracting vermin to the premise...,Not Critical,12,A,06/07/2019,03/24/2022,Cycle Inspection / Initial Inspection,40.764364,-73.981362,105.0,4.0,13700.0,1024857.0,1.010260e+09,MN17
186224,50003842,T- 45,Manhattan,135,WEST 45 STREET,10036.0,6466403775,American,07/26/2018,Violations were cited in the following area(s).,10B,Plumbing not properly installed or maintained; anti-siphonage or backflow prevention device not ...,Not Critical,49,,,03/24/2022,Cycle Inspection / Initial Inspection,40.757244,-73.983815,105.0,4.0,11900.0,1089796.0,1.009980e+09,MN17
186225,41640824,RICURAS ECUADORIAN BAKERY,Bronx,1576,WATSON AVENUE,10472.0,7184508363,Spanish,07/20/2018,Violations were cited in the following area(s).,06F,Wiping cloths soiled or not stored in sanitizing solution.,Critical,12,A,07/20/2018,03/24/2022,Cycle Inspection / Initial Inspection,40.826352,-73.876066,209.0,18.0,5001.0,2023533.0,2.037160e+09,BX55


In [115]:
doh.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 186227 entries, 0 to 186226
Data columns (total 26 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   CAMIS                  186227 non-null  int64  
 1   DBA                    186200 non-null  object 
 2   BORO                   186227 non-null  object 
 3   BUILDING               185799 non-null  object 
 4   STREET                 186227 non-null  object 
 5   ZIPCODE                183146 non-null  float64
 6   PHONE                  186214 non-null  object 
 7   CUISINE DESCRIPTION    186226 non-null  object 
 8   INSPECTION DATE        186227 non-null  object 
 9   ACTION                 186227 non-null  object 
 10  VIOLATION CODE         185413 non-null  object 
 11  VIOLATION DESCRIPTION  185863 non-null  object 
 12  CRITICAL FLAG          186227 non-null  object 
 13  SCORE                  186227 non-null  int64  
 14  GRADE                  67933 non-nul

In [123]:
doh.dropna(subset=['PHONE'],inplace=True)

In [124]:
doh.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 186214 entries, 0 to 186226
Data columns (total 26 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   CAMIS                  186214 non-null  int64  
 1   DBA                    186187 non-null  object 
 2   BORO                   186214 non-null  object 
 3   BUILDING               185786 non-null  object 
 4   STREET                 186214 non-null  object 
 5   ZIPCODE                183133 non-null  float64
 6   PHONE                  186214 non-null  object 
 7   CUISINE DESCRIPTION    186213 non-null  object 
 8   INSPECTION DATE        186214 non-null  object 
 9   ACTION                 186214 non-null  object 
 10  VIOLATION CODE         185400 non-null  object 
 11  VIOLATION DESCRIPTION  185850 non-null  object 
 12  CRITICAL FLAG          186214 non-null  object 
 13  SCORE                  186214 non-null  int64  
 14  GRADE                  67924 non-nul

In [125]:
# How many unique restaurants are in this dataset?
doh['CAMIS'].nunique()

19790

Health code violations found during an inspections carries a point value, and a restaurant’s score corresponds to a letter grade. A lower point score, leads to a better letter grade:

"A" grade: 0 to 13 points for sanitary violations
"B" grade: 14 to 27 points for sanitary violations
"C" grade: 28 or more points for sanitary violations

In [126]:
doh['A'] = doh['SCORE'] < 14
doh['B'] = (doh['SCORE'] > 13) & (doh['SCORE'] < 28)
doh['C'] = doh['SCORE'] > 27

In [127]:
doh

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,INSPECTION DATE,ACTION,VIOLATION CODE,VIOLATION DESCRIPTION,CRITICAL FLAG,SCORE,GRADE,GRADE DATE,RECORD DATE,INSPECTION TYPE,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA,A,B,C
0,50008319,POULETTE,Manhattan,790,9 AVENUE,10019.0,2129569488,Chicken,01/28/2019,Violations were cited in the following area(s).,04D,"Food worker does not wash hands thoroughly after using the toilet, coughing, sneezing, smoking, ...",Critical,22,,,03/24/2022,Cycle Inspection / Initial Inspection,40.765040,-73.987795,104.0,3.0,13300.0,1025235.0,1.010430e+09,MN15,False,True,False
1,41678734,TINY'S DINER,Bronx,3603,RIVERDALE AVENUE,10463.0,7187087600,American,09/23/2019,Violations were cited in the following area(s).,04M,Live roaches present in facility's food and/or non-food areas.,Critical,24,,,03/24/2022,Cycle Inspection / Initial Inspection,40.886691,-73.907056,208.0,11.0,29500.0,2084186.0,2.057960e+09,BX29,False,True,False
2,40795021,TAO RESTAURANT,Manhattan,42,EAST 58 STREET,10022.0,2128882288,Asian/Asian Fusion,03/11/2020,Violations were cited in the following area(s).,10F,Non-food contact surface improperly constructed. Unacceptable material used. Non-food contact su...,Not Critical,9,,,03/24/2022,Cycle Inspection / Initial Inspection,40.762786,-73.971486,105.0,4.0,11202.0,1036073.0,1.012930e+09,MN17,True,False,False
3,40795021,TAO RESTAURANT,Manhattan,42,EAST 58 STREET,10022.0,2128882288,Asian/Asian Fusion,03/11/2020,Violations were cited in the following area(s).,10F,Non-food contact surface improperly constructed. Unacceptable material used. Non-food contact su...,Not Critical,9,,,03/24/2022,Cycle Inspection / Initial Inspection,40.762786,-73.971486,105.0,4.0,11202.0,1036073.0,1.012930e+09,MN17,True,False,False
4,40538234,"DUNKIN',' BASKIN ROBBINS",Queens,9925,HORACE HARDING EXPRESSWAY,11368.0,7182719222,Donuts,07/29/2021,Violations were cited in the following area(s).,06E,"Sanitized equipment or utensil, including in-use food dispensing utensil, improperly used or sto...",Critical,10,A,07/29/2021,03/24/2022,Cycle Inspection / Initial Inspection,40.736185,-73.858052,404.0,21.0,43702.0,4047915.0,4.019480e+09,QN25,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186222,41556790,FIVE GUYS FAMOUS BURGERS AND FRIES,Manhattan,2847,BROADFWAY,,2126787701,Hamburgers,03/01/2019,Violations were cited in the following area(s).,10F,Non-food contact surface improperly constructed. Unacceptable material used. Non-food contact su...,Not Critical,3,A,03/01/2019,03/24/2022,Cycle Inspection / Initial Inspection,0.000000,0.000000,,,,,1.000000e+00,,True,False,False
186223,50032876,TANNER SMITH'S,Manhattan,204,WEST 55 STREET,10019.0,9175172283,American,06/07/2019,Violations were cited in the following area(s).,08A,Facility not vermin proof. Harborage or conditions conducive to attracting vermin to the premise...,Not Critical,12,A,06/07/2019,03/24/2022,Cycle Inspection / Initial Inspection,40.764364,-73.981362,105.0,4.0,13700.0,1024857.0,1.010260e+09,MN17,True,False,False
186224,50003842,T- 45,Manhattan,135,WEST 45 STREET,10036.0,6466403775,American,07/26/2018,Violations were cited in the following area(s).,10B,Plumbing not properly installed or maintained; anti-siphonage or backflow prevention device not ...,Not Critical,49,,,03/24/2022,Cycle Inspection / Initial Inspection,40.757244,-73.983815,105.0,4.0,11900.0,1089796.0,1.009980e+09,MN17,False,False,True
186225,41640824,RICURAS ECUADORIAN BAKERY,Bronx,1576,WATSON AVENUE,10472.0,7184508363,Spanish,07/20/2018,Violations were cited in the following area(s).,06F,Wiping cloths soiled or not stored in sanitizing solution.,Critical,12,A,07/20/2018,03/24/2022,Cycle Inspection / Initial Inspection,40.826352,-73.876066,209.0,18.0,5001.0,2023533.0,2.037160e+09,BX55,True,False,False


In [128]:
doh['PHONE'] = '+1'+doh['PHONE']

Of the 19,792 unique restaurants, 12,221 did not pass the intitial inspections at least once.

In [129]:
doh_grades = doh.groupby(['CAMIS'])['PHONE','BORO','BUILDING','STREET','ZIPCODE',
                                    'PHONE','CUISINE DESCRIPTION',
                                    'Latitude','Longitude','Community Board',
                                    'Council District','Census Tract',
                                    'A', 'B','C'].sum()

In [130]:
doh_grades

Unnamed: 0_level_0,ZIPCODE,Latitude,Longitude,Community Board,Council District,Census Tract,A,B,C
CAMIS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
30075445,83696.0,326.785850,-590.847775,1688.0,104.0,201600.0,1,7,0
30112340,56125.0,203.313258,-369.810407,1545.0,200.0,163500.0,5,0,0
30191841,50095.0,203.836629,-369.921552,520.0,15.0,69500.0,5,0,0
40356018,44896.0,162.319681,-295.928361,1252.0,188.0,139200.0,4,0,0
40356483,101106.0,365.581004,-665.162905,2862.0,414.0,630000.0,5,4,0
...,...,...,...,...,...,...,...,...,...
50115169,10002.0,40.714841,-73.991700,103.0,1.0,1600.0,1,0,0
50116155,33699.0,122.051758,-221.752145,948.0,123.0,111900.0,3,0,0
50117350,50090.0,203.762660,-369.947575,525.0,15.0,54500.0,5,0,0
50117434,79604.0,285.289075,-517.196049,2821.0,175.0,228900.0,0,0,7


In [133]:
doh_grades[doh_grades['B'] > 0]

Unnamed: 0_level_0,ZIPCODE,Latitude,Longitude,Community Board,Council District,Census Tract,A,B,C
CAMIS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
30075445,83696.0,326.785850,-590.847775,1688.0,104.0,201600.0,1,7,0
40356483,101106.0,365.581004,-665.162905,2862.0,414.0,630000.0,5,4,0
40362264,70175.0,285.548344,-517.773967,749.0,42.0,126700.0,3,4,0
40362274,70084.0,285.080205,-517.982347,714.0,7.0,38507.0,1,6,0
40362715,230115.0,936.235858,-1702.198016,2323.0,23.0,16100.0,7,11,5
...,...,...,...,...,...,...,...,...,...
50111805,30042.0,122.195939,-222.011203,306.0,9.0,20100.0,0,3,0
50113198,40008.0,162.854183,-295.984590,412.0,4.0,3200.0,0,4,0
50113951,41820.0,163.249463,-295.613108,808.0,32.0,33200.0,0,4,0
50113984,30081.0,122.429129,-221.880270,327.0,21.0,60900.0,0,3,0


Of the 19,792 unique restaurants, 9,978 failed an initial cycle inspection at least once.

In [132]:
doh_grades[doh_grades['C'] > 0]

Unnamed: 0_level_0,ZIPCODE,Latitude,Longitude,Community Board,Council District,Census Tract,A,B,C
CAMIS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
40360076,112180.0,406.433297,-739.781887,3120.0,390.0,496000.0,4,0,6
40362715,230115.0,936.235858,-1702.198016,2323.0,23.0,16100.0,7,11,5
40363630,200560.0,815.505622,-1479.131518,2160.0,100.0,280000.0,2,11,7
40364179,190475.0,775.226054,-1405.243039,2033.0,133.0,366700.0,3,5,11
40364362,120228.0,489.124894,-887.733038,1260.0,48.0,124800.0,3,3,6
...,...,...,...,...,...,...,...,...,...
50110216,79604.0,285.233461,-517.186091,2828.0,147.0,188307.0,0,0,7
50112974,70049.0,285.004311,-518.044896,707.0,7.0,14700.0,0,0,7
50113139,100926.0,365.433842,-665.981428,2799.0,423.0,259200.0,0,0,9
50113991,60162.0,244.858259,-443.760539,654.0,42.0,121800.0,0,0,6


Of the 19,792 unique restaurants, 5,648 severly failed an initial cycle inspection at least once and are at risk of being closed by the DOHMH.

In [134]:
doh.duplicated().sum()

11502

In [135]:
doh.drop_duplicates(inplace=True)

In [136]:
doh.shape

(174712, 29)

In [137]:
doh['SCORE'].isna().sum()

0

In [None]:
doh['SCORE'].hist(bins=113, figsize=(12,8));

In [None]:
doh['SCORE'].value_counts()

In [None]:
doh['SCORE'].mean()

In [None]:
doh['SCORE'].median()

In [None]:
doh['SCORE'].mode()

In [None]:
doh['GRADE'].isna().sum()

In [None]:
doh['GRADE'].value_counts()

In [None]:
doh.info()

In [None]:
doh['INSPECTION DATE'] =  pd.to_datetime(doh['INSPECTION DATE'])

In [None]:
# doh.pivot(columns=['CAMIS','INSPECTION DATE'],values='SCORE')

The New York Health Department inspects the approximately 27,000 restaurants within the city to monitor their compliance with food safety regulations. Inspectors observe how food is prepared, served and stored and whether restaurant workers are practicing good hygiene. They check food temperatures, equipment maintenance and pest control measures.

Calling API

In [None]:
dohmh_df['score'].hist(bins='auto', figsize=(12,8));

In [None]:
# # Take a look at duplicated records
# duplicates = df1.duplicated(subset=['camis'], keep=False)
# df1.loc[duplicates.loc[duplicates==True].index].sort_values(by='camis')

## Yelp API

In [8]:
with open('/Users/Rob/.secret/yelp_api.json') as f:
    creds = json.load(f)

In [9]:
creds.keys()

dict_keys(['api_key'])

In [10]:
#Business Search      
# url = 'https://api.yelp.com/v3/businesses/search'

#Business Match       
# url = 'https://api.yelp.com/v3/businesses/matches'

#Phone Search         
url = 'https://api.yelp.com/v3/businesses/search/phone'

#Business Details     
# url = 'https://api.yelp.com/v3/businesses/{id}'

#Business Reviews     
# url = 'https://api.yelp.com/v3/businesses/{id}/reviews'

In [None]:
# #Business Search  
# headers = {
#     'Authorization': 'Bearer ' + creds['api_key']
#           }
# location = 'New York NY'
# SEARCH_LIMIT = 50

# url_params = {
#     'limit': SEARCH_LIMIT,
#     'location' : location.replace(' ','+'),
#     'offset': 0
#              }


# response = requests.get(url, headers=headers, params=url_params)
# print(response.status_code)

In [140]:
phones = list(top_doh['PHONE'])
phones

['+12129569488',
 '+17187087600',
 '+12128882288',
 '+12128882288',
 '+17182719222']

In [73]:
phone_numbers = set(doh['PHONE'])

In [74]:
len(phone_numbers)

18938

In [154]:
phone_numbers = list(phone_numbers)

In [155]:
phone_numbers1 = phone_numbers[1:1000]

In [199]:
phone_numbers2 = phone_numbers[1000:2000]

In [235]:
phone_numbers3 = phone_numbers[2000:2500]

In [236]:
phone_numbers4 = phone_numbers[2500:3500]

In [174]:
# Initialize empty list
biz_list1 = []

In [200]:
# Initialize empty list
biz_list2 = []

In [210]:
# Initialize empty list
biz_list3 = []

In [233]:
# Initialize empty list
biz_list4 = []

In [177]:
#Phone Search 
for number in phone_numbers1:
    url = 'https://api.yelp.com/v3/businesses/search/phone'
    headers = {'Authorization': 'Bearer ' + creds['api_key']}
    url_params = {'phone': number}
    response = requests.get(url, headers=headers, params=url_params)
    response_json = response.json()
    biz_list1.extend(response_json.get('businesses','U'))
    

In [201]:
#Phone Search 
for number in phone_numbers2:
    url = 'https://api.yelp.com/v3/businesses/search/phone'
    headers = {'Authorization': 'Bearer ' + creds['api_key']}
    url_params = {'phone': number}
    response = requests.get(url, headers=headers, params=url_params)
    response_json = response.json()
    biz_list2.extend(response_json.get('businesses','U'))
    

In [211]:
#Phone Search 
for number in phone_numbers3:
    url = 'https://api.yelp.com/v3/businesses/search/phone'
    headers = {'Authorization': 'Bearer ' + creds['api_key']}
    url_params = {'phone': number}
    response = requests.get(url, headers=headers, params=url_params)
    response_json = response.json()
    biz_list3.extend(response_json.get('businesses','U'))
    

In [237]:
#Phone Search 
for number in phone_numbers4:
    url = 'https://api.yelp.com/v3/businesses/search/phone'
    headers = {'Authorization': 'Bearer ' + creds['api_key']}
    url_params = {'phone': number}
    response = requests.get(url, headers=headers, params=url_params)
    response_json = response.json()
    biz_list4.extend(response_json.get('businesses','U'))
    

In [238]:
len(biz_list4)

868

In [252]:
biz_list4.remove('U')

ValueError: list.remove(x): x not in list

In [253]:
biz_list4_df = pd.DataFrame(biz_list4)
biz_list4_df.to_csv('yelp_phone4.csv',index=False)
df = pd.read_csv('yelp_phone4.csv')
df

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone
0,QTQrXWuowKHNFGNssXT_TA,forest-thai-cuisine-staten-island,Forest Thai Cuisine,https://s3-media0.fl.yelpcdn.com/bphoto/48HvL8gQx1l4FJayVD7lPQ/o.jpg,False,https://www.yelp.com/biz/forest-thai-cuisine-staten-island?adjust_creative=82uXkAt1Tiw7u9_h33zr1...,130,"[{'alias': 'thai', 'title': 'Thai'}]",3.5,"{'latitude': 40.626607, 'longitude': -74.130334}",['delivery'],$$,"{'address1': '1211 Forest Ave', 'address2': '', 'address3': '', 'city': 'Staten Island', 'zip_co...",17188769888,(718) 876-9888
1,pk89jzL1RsE9U0thf_z6tw,locanda-verde-new-york,Locanda Verde,https://s3-media0.fl.yelpcdn.com/bphoto/a9PziJGwrYRYEARFXa5HUA/o.jpg,False,https://www.yelp.com/biz/locanda-verde-new-york?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campa...,1735,"[{'alias': 'italian', 'title': 'Italian'}, {'alias': 'cocktailbars', 'title': 'Cocktail Bars'}, ...",4.0,"{'latitude': 40.7198601300801, 'longitude': -74.010039072422}","['pickup', 'delivery']",$$$,"{'address1': '377 Greenwich St', 'address2': '', 'address3': '', 'city': 'New York', 'zip_code':...",12129253797,(212) 925-3797
2,JuwfLhj7PNUr61EbwEkvFQ,ago-restaurant-new-york,Ago Restaurant,,True,https://www.yelp.com/biz/ago-restaurant-new-york?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_camp...,9,"[{'alias': 'italian', 'title': 'Italian'}]",3.0,"{'latitude': 40.719845, 'longitude': -74.009857}",[],$$$$,"{'address1': '377 Greenwich St', 'address2': '', 'address3': '', 'city': 'New York', 'zip_code':...",12129253797,(212) 925-3797
3,NzHiK8Csmypx4f6jYhSqiQ,lennys-bagels-new-york-2,Lenny's Bagels,https://s3-media0.fl.yelpcdn.com/bphoto/GMsGjAXyU7mcqV6jgMgVJw/o.jpg,False,https://www.yelp.com/biz/lennys-bagels-new-york-2?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_cam...,168,"[{'alias': 'delis', 'title': 'Delis'}, {'alias': 'salad', 'title': 'Salad'}, {'alias': 'sandwich...",4.0,"{'latitude': 40.7961904, 'longitude': -73.9709623}",['delivery'],$,"{'address1': '2601 Broadway', 'address2': '', 'address3': '', 'city': 'New York', 'zip_code': '1...",12122220410,(212) 222-0410
4,6K3Hk5wDcXchuDlyd2ufEQ,sparks-deli-long-island-city,Sparks Deli,https://s3-media0.fl.yelpcdn.com/bphoto/xYlfGfJEMyj7SomhquCLIA/o.jpg,False,https://www.yelp.com/biz/sparks-deli-long-island-city?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm...,21,"[{'alias': 'delis', 'title': 'Delis'}, {'alias': 'sandwiches', 'title': 'Sandwiches'}]",3.0,"{'latitude': 40.738837, 'longitude': -73.940858}",['delivery'],$,"{'address1': '2831 Borden Ave', 'address2': '', 'address3': '', 'city': 'Long Island City', 'zip...",17183920757,(718) 392-0757
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
850,e1jwvzY8hU642HSBZZATcw,hub-thai-new-york,Hub Thai,https://s3-media0.fl.yelpcdn.com/bphoto/rUP93-QUXWjW9_0PFd1Vhg/o.jpg,False,https://www.yelp.com/biz/hub-thai-new-york?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campaign=y...,142,"[{'alias': 'thai', 'title': 'Thai'}]",4.0,"{'latitude': 40.7238123, 'longitude': -73.9847935}","['delivery', 'pickup']",$$,"{'address1': '50 Avenue A', 'address2': '', 'address3': None, 'city': 'New York', 'zip_code': '1...",12122288221,(212) 228-8221
851,zJcxZQbTSAP3q4Fy-6WAyw,shalom-japan-brooklyn,Shalom Japan,https://s3-media0.fl.yelpcdn.com/bphoto/F0f37eyiRrEpmzLiwcAMZg/o.jpg,False,https://www.yelp.com/biz/shalom-japan-brooklyn?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campai...,312,"[{'alias': 'japanese', 'title': 'Japanese'}]",4.0,"{'latitude': 40.7091679, 'longitude': -73.9558028}","['pickup', 'delivery']",$$$,"{'address1': '310 S 4th St', 'address2': '', 'address3': '', 'city': 'Brooklyn', 'zip_code': '11...",17183884012,(718) 388-4012
852,wrCLnbaWcrhu7MsDmEBf6A,ginos-pizzeria-brooklyn-5,Ginos Pizzeria,https://s3-media0.fl.yelpcdn.com/bphoto/LNL9Xtijplcaioj84Pf0pw/o.jpg,False,https://www.yelp.com/biz/ginos-pizzeria-brooklyn-5?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_ca...,81,"[{'alias': 'pizza', 'title': 'Pizza'}]",3.5,"{'latitude': 40.68103, 'longitude': -73.97548}","['delivery', 'pickup']",$,"{'address1': '218 Flatbush Ave', 'address2': '', 'address3': '', 'city': 'Brooklyn', 'zip_code':...",17182303932,(718) 230-3932
853,iw6l5ohNUJK4ozNEjkvbvw,dunkin-bronx-49,Dunkin',https://s3-media0.fl.yelpcdn.com/bphoto/VEaK_zlF3v9TWlSisaIEfg/o.jpg,False,https://www.yelp.com/biz/dunkin-bronx-49?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campaign=yel...,5,"[{'alias': 'donuts', 'title': 'Donuts'}, {'alias': 'coffee', 'title': 'Coffee & Tea'}]",1.5,"{'latitude': 40.824436, 'longitude': -73.909206}",['delivery'],$,"{'address1': '511 E. 163rd St', 'address2': '', 'address3': '', 'city': 'Bronx', 'zip_code': '10...",17184026700,(718) 402-6700


In [221]:
df_1 = pd.read_csv('yelp_phone1.csv')

In [226]:
df_1

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone
0,fEsgUESZxOQtd4YEmAUuow,ii-caffe-latte-2-new-york-4,II Caffe Latte 2,https://s3-media0.fl.yelpcdn.com/bphoto/rL-1bjIrEvnG6zV8ofrZpg/o.jpg,False,https://www.yelp.com/biz/ii-caffe-latte-2-new-york-4?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_...,127,"[{'alias': 'breakfast_brunch', 'title': 'Breakfast & Brunch'}, {'alias': 'desserts', 'title': 'D...",3.5,"{'latitude': 40.8246899, 'longitude': -73.94704}","['delivery', 'pickup']",$$,"{'address1': '458 W 145th St', 'address2': '', 'address3': '', 'city': 'New York', 'zip_code': '...",12122340202,(212) 234-0202
1,zm_ngfMxyU6d85AaTIgI7Q,chloes-restaurant-and-lounge-brooklyn,Chloe's Restaurant & Lounge,https://s3-media0.fl.yelpcdn.com/bphoto/AFD_nPVpU1rhI7VMWvbltA/o.jpg,False,https://www.yelp.com/biz/chloes-restaurant-and-lounge-brooklyn?adjust_creative=82uXkAt1Tiw7u9_h3...,192,"[{'alias': 'haitian', 'title': 'Haitian'}, {'alias': 'salad', 'title': 'Salad'}, {'alias': 'seaf...",2.5,"{'latitude': 40.63755, 'longitude': -73.89731}","['delivery', 'pickup']",$$,"{'address1': '9413 Ave L', 'address2': None, 'address3': '', 'city': 'Brooklyn', 'zip_code': '11...",13477709051,(347) 770-9051
2,LbGAKVX3cwZj4fUC2XOLxg,jahns-jackson-heights,Jahn's,https://s3-media0.fl.yelpcdn.com/bphoto/9Oa1n8Dyo_n2N9YIhJxsgw/o.jpg,False,https://www.yelp.com/biz/jahns-jackson-heights?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campai...,172,"[{'alias': 'diners', 'title': 'Diners'}]",3.5,"{'latitude': 40.74941, 'longitude': -73.88497}","['delivery', 'pickup']",$$,"{'address1': '81-04 37th Ave', 'address2': '', 'address3': '', 'city': 'Jackson Heights', 'zip_c...",17186510700,(718) 651-0700
3,i8ejDDR4COtukAAA1Ls5fw,soba-ya-new-york,Soba-ya,https://s3-media0.fl.yelpcdn.com/bphoto/LnjY25INn-aM1eAyiav8XA/o.jpg,False,https://www.yelp.com/biz/soba-ya-new-york?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campaign=ye...,1496,"[{'alias': 'japanese', 'title': 'Japanese'}, {'alias': 'noodles', 'title': 'Noodles'}]",4.0,"{'latitude': 40.72965, 'longitude': -73.9878899}","['delivery', 'pickup']",$$,"{'address1': '229 E 9th St', 'address2': '', 'address3': '', 'city': 'New York', 'zip_code': '10...",12125336966,(212) 533-6966
4,YV0VaJgV2zotzI7XNUaMCQ,mcdonalds-new-york-100,McDonald's,https://s3-media0.fl.yelpcdn.com/bphoto/X710tzNcQt5xzpJMVIpZHw/o.jpg,False,https://www.yelp.com/biz/mcdonalds-new-york-100?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campa...,85,"[{'alias': 'hotdogs', 'title': 'Fast Food'}, {'alias': 'burgers', 'title': 'Burgers'}, {'alias':...",2.5,"{'latitude': 40.71852, 'longitude': -74.00115}",['delivery'],$,"{'address1': '262 Canal St', 'address2': None, 'address3': '', 'city': 'New York', 'zip_code': '...",12129415823,(212) 941-5823
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
854,2BDC9WjKYP5eHtkZvGlboQ,park-delicatessen-brooklyn,Park Delicatessen,https://s3-media0.fl.yelpcdn.com/bphoto/CtYyfhpPdQRndzm7d-ZT8g/o.jpg,False,https://www.yelp.com/biz/park-delicatessen-brooklyn?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_c...,23,"[{'alias': 'delis', 'title': 'Delis'}, {'alias': 'meats', 'title': 'Meat Shops'}]",4.5,"{'latitude': 40.7257351177142, 'longitude': -73.94427729785}","['pickup', 'delivery']",$,"{'address1': '209 Nassau Ave', 'address2': None, 'address3': '', 'city': 'Brooklyn', 'zip_code':...",17183832080,(718) 383-2080
855,V6xlMNLFTsxB4W0VVcRK_Q,230-fifth-rooftop-bar-new-york,230 Fifth Rooftop Bar,https://s3-media0.fl.yelpcdn.com/bphoto/Ygy2PfrN8nuIGMttdzRsxg/o.jpg,False,https://www.yelp.com/biz/230-fifth-rooftop-bar-new-york?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&u...,3943,"[{'alias': 'cocktailbars', 'title': 'Cocktail Bars'}]",3.0,"{'latitude': 40.7440395, 'longitude': -73.9881258}","['delivery', 'pickup']",$$$,"{'address1': '230 5th Ave', 'address2': '(corner 27th)', 'address3': '', 'city': 'New York', 'zi...",12127254300,(212) 725-4300
856,8VTSpsJCyB-wgtCFkSTszg,spa-castle-college-point,Spa Castle,https://s3-media0.fl.yelpcdn.com/bphoto/i8or0Z0fwUI1yyKZvrZ-9w/o.jpg,False,https://www.yelp.com/biz/spa-castle-college-point?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_cam...,1954,"[{'alias': 'massage', 'title': 'Massage'}, {'alias': 'reflexology', 'title': 'Reflexology'}, {'a...",3.0,"{'latitude': 40.78747054911353, 'longitude': -73.8369738}",[],$$$,"{'address1': '13110 11th Ave', 'address2': '', 'address3': '', 'city': 'College Point', 'zip_cod...",17189396300,(718) 939-6300
857,SyBFnqsPEge00N0_bHs2fQ,madera-cuban-grill-and-steakhouse-long-island-city,Madera Cuban Grill & Steakhouse,https://s3-media0.fl.yelpcdn.com/bphoto/F3z2VTAQ35s9etnhmZWK1A/o.jpg,False,https://www.yelp.com/biz/madera-cuban-grill-and-steakhouse-long-island-city?adjust_creative=82uX...,448,"[{'alias': 'cuban', 'title': 'Cuban'}, {'alias': 'steak', 'title': 'Steakhouses'}]",3.5,"{'latitude': 40.744598, 'longitude': -73.953336}","['delivery', 'restaurant_reservation', 'pickup']",$$,"{'address1': '47-29 Vernon Blvd', 'address2': '', 'address3': '', 'city': 'Long Island City', 'z...",17186061236,(718) 606-1236


In [223]:
df_2 = pd.read_csv('yelp_phone2.csv')

In [229]:
df_3 = pd.read_csv('yelp_phone3.csv')

In [224]:
df_2

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone
0,OM5nz6TYlI0vxjckv1in8Q,dunkin-new-york-105,Dunkin',https://s3-media0.fl.yelpcdn.com/bphoto/HmZzfmHFicBiAtyiNcCxnw/o.jpg,False,https://www.yelp.com/biz/dunkin-new-york-105?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campaign...,20,"[{'alias': 'donuts', 'title': 'Donuts'}, {'alias': 'coffee', 'title': 'Coffee & Tea'}]",2.0,"{'latitude': 40.79867, 'longitude': -73.94132}",['delivery'],$,"{'address1': '147 E 116th Street', 'address2': '', 'address3': '', 'city': 'New York', 'zip_code...",16465979456,(646) 597-9456
1,R2Zrr3m-UyrSW2GGgDYzLQ,life-health-fitness-whitestone-whitestone,Life Health Fitness - Whitestone,https://s3-media0.fl.yelpcdn.com/bphoto/szqbPMFThvXs_smOt35REQ/o.jpg,False,https://www.yelp.com/biz/life-health-fitness-whitestone-whitestone?adjust_creative=82uXkAt1Tiw7u...,52,"[{'alias': 'gyms', 'title': 'Gyms'}, {'alias': 'healthtrainers', 'title': 'Trainers'}, {'alias':...",4.0,"{'latitude': 40.79266, 'longitude': -73.80745}",[],,"{'address1': '10-24 154th St', 'address2': None, 'address3': '', 'city': 'Whitestone', 'zip_code...",17183570300,(718) 357-0300
2,KJXi9G1ODDEcgGfNJlpKaw,golden-krust-caribbean-restaurant-brooklyn-11,Golden Krust Caribbean Restaurant,https://s3-media0.fl.yelpcdn.com/bphoto/PveOYe6WT-SNlfiBtS1JQA/o.jpg,False,https://www.yelp.com/biz/golden-krust-caribbean-restaurant-brooklyn-11?adjust_creative=82uXkAt1T...,10,"[{'alias': 'caribbean', 'title': 'Caribbean'}, {'alias': 'bakeries', 'title': 'Bakeries'}, {'ali...",1.5,"{'latitude': 40.636501, 'longitude': -73.892392}",['delivery'],$$$,"{'address1': '1887 Rockaway Pkwy', 'address2': '', 'address3': None, 'city': 'Brooklyn', 'zip_co...",17182094242,(718) 209-4242
3,5YFgQpN3AnlLFG4m5fCvzA,gulshan-terrace-woodside,Gulshan Terrace,https://s3-media0.fl.yelpcdn.com/bphoto/EA1PIc8jCZZvZMYuGUFyng/o.jpg,False,https://www.yelp.com/biz/gulshan-terrace-woodside?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_cam...,3,"[{'alias': 'indpak', 'title': 'Indian'}, {'alias': 'bangladeshi', 'title': 'Bangladeshi'}]",4.0,"{'latitude': 40.7498753, 'longitude': -73.9027518}",[],,"{'address1': '5915 37th Ave', 'address2': '', 'address3': '', 'city': 'Woodside', 'zip_code': '1...",17184574577,(718) 457-4577
4,8mZ6fE9n_mK59mJ_MYtrSQ,mimis-hummus-brooklyn,Mimi's Hummus,https://s3-media0.fl.yelpcdn.com/bphoto/ati49nUSXFk6cdW6Of12gw/o.jpg,False,https://www.yelp.com/biz/mimis-hummus-brooklyn?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campai...,362,"[{'alias': 'mediterranean', 'title': 'Mediterranean'}, {'alias': 'mideastern', 'title': 'Middle ...",4.0,"{'latitude': 40.64035, 'longitude': -73.9667}","['delivery', 'pickup']",$$,"{'address1': '1209 Cortelyou Rd', 'address2': '', 'address3': '', 'city': 'Brooklyn', 'zip_code'...",17182844444,(718) 284-4444
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
872,fZuIszXbPoyzYMDSYd3oOA,ghang-thai-kitchen-brooklyn,Ghang Thai Kitchen,https://s3-media0.fl.yelpcdn.com/bphoto/0EIKS1bwgUM8eztCGiD1NA/o.jpg,False,https://www.yelp.com/biz/ghang-thai-kitchen-brooklyn?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_...,199,"[{'alias': 'thai', 'title': 'Thai'}]",3.5,"{'latitude': 40.6861227, 'longitude': -73.9938877}","['delivery', 'pickup']",$$,"{'address1': '229 Court St', 'address2': None, 'address3': '', 'city': 'Brooklyn', 'zip_code': '...",17188751369,(718) 875-1369
873,0bopi1hdK_ASDmvPSvwqFg,best-pizza-and-chicken-brooklyn,Best Pizza & Chicken,https://s3-media0.fl.yelpcdn.com/bphoto/TX4_E_1vS64MS3h8QHCh5g/o.jpg,False,https://www.yelp.com/biz/best-pizza-and-chicken-brooklyn?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&...,9,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias': 'burgers', 'title': 'Burgers'}]",3.5,"{'latitude': 40.61006, 'longitude': -73.9843}","['delivery', 'pickup']",,"{'address1': '24 Avenue O', 'address2': '', 'address3': None, 'city': 'Brooklyn', 'zip_code': '1...",17185133220,(718) 513-3220
874,W8vkegdl47HAIqJd6M3CZQ,ave-o-pizza-brooklyn,Ave O Pizza,,True,https://www.yelp.com/biz/ave-o-pizza-brooklyn?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campaig...,4,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 40.609936, 'longitude': -73.984268}","['delivery', 'pickup']",$,"{'address1': '24 Avenue O', 'address2': None, 'address3': None, 'city': 'Brooklyn', 'zip_code': ...",17185133220,(718) 513-3220
875,u2aUQmMIYvdaTNhZGcMU3g,best-pizza-bensonhurst,Best Pizza,https://s3-media0.fl.yelpcdn.com/bphoto/80v6_fevo1HY_5o6T2615g/o.jpg,True,https://www.yelp.com/biz/best-pizza-bensonhurst?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campa...,5,"[{'alias': 'pizza', 'title': 'Pizza'}]",3.0,"{'latitude': 40.61006, 'longitude': -73.9843}","['delivery', 'pickup']",,"{'address1': '24 Ave O', 'address2': '', 'address3': '', 'city': 'Bensonhurst', 'zip_code': '112...",17185133220,(718) 513-3220


In [230]:
yelp_df = pd.concat([df_1,df_2,df_3],ignore_index=True)

In [231]:
yelp_df

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone
0,fEsgUESZxOQtd4YEmAUuow,ii-caffe-latte-2-new-york-4,II Caffe Latte 2,https://s3-media0.fl.yelpcdn.com/bphoto/rL-1bjIrEvnG6zV8ofrZpg/o.jpg,False,https://www.yelp.com/biz/ii-caffe-latte-2-new-york-4?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_...,127,"[{'alias': 'breakfast_brunch', 'title': 'Breakfast & Brunch'}, {'alias': 'desserts', 'title': 'D...",3.5,"{'latitude': 40.8246899, 'longitude': -73.94704}","['delivery', 'pickup']",$$,"{'address1': '458 W 145th St', 'address2': '', 'address3': '', 'city': 'New York', 'zip_code': '...",12122340202,(212) 234-0202
1,zm_ngfMxyU6d85AaTIgI7Q,chloes-restaurant-and-lounge-brooklyn,Chloe's Restaurant & Lounge,https://s3-media0.fl.yelpcdn.com/bphoto/AFD_nPVpU1rhI7VMWvbltA/o.jpg,False,https://www.yelp.com/biz/chloes-restaurant-and-lounge-brooklyn?adjust_creative=82uXkAt1Tiw7u9_h3...,192,"[{'alias': 'haitian', 'title': 'Haitian'}, {'alias': 'salad', 'title': 'Salad'}, {'alias': 'seaf...",2.5,"{'latitude': 40.63755, 'longitude': -73.89731}","['delivery', 'pickup']",$$,"{'address1': '9413 Ave L', 'address2': None, 'address3': '', 'city': 'Brooklyn', 'zip_code': '11...",13477709051,(347) 770-9051
2,LbGAKVX3cwZj4fUC2XOLxg,jahns-jackson-heights,Jahn's,https://s3-media0.fl.yelpcdn.com/bphoto/9Oa1n8Dyo_n2N9YIhJxsgw/o.jpg,False,https://www.yelp.com/biz/jahns-jackson-heights?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campai...,172,"[{'alias': 'diners', 'title': 'Diners'}]",3.5,"{'latitude': 40.74941, 'longitude': -73.88497}","['delivery', 'pickup']",$$,"{'address1': '81-04 37th Ave', 'address2': '', 'address3': '', 'city': 'Jackson Heights', 'zip_c...",17186510700,(718) 651-0700
3,i8ejDDR4COtukAAA1Ls5fw,soba-ya-new-york,Soba-ya,https://s3-media0.fl.yelpcdn.com/bphoto/LnjY25INn-aM1eAyiav8XA/o.jpg,False,https://www.yelp.com/biz/soba-ya-new-york?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campaign=ye...,1496,"[{'alias': 'japanese', 'title': 'Japanese'}, {'alias': 'noodles', 'title': 'Noodles'}]",4.0,"{'latitude': 40.72965, 'longitude': -73.9878899}","['delivery', 'pickup']",$$,"{'address1': '229 E 9th St', 'address2': '', 'address3': '', 'city': 'New York', 'zip_code': '10...",12125336966,(212) 533-6966
4,YV0VaJgV2zotzI7XNUaMCQ,mcdonalds-new-york-100,McDonald's,https://s3-media0.fl.yelpcdn.com/bphoto/X710tzNcQt5xzpJMVIpZHw/o.jpg,False,https://www.yelp.com/biz/mcdonalds-new-york-100?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campa...,85,"[{'alias': 'hotdogs', 'title': 'Fast Food'}, {'alias': 'burgers', 'title': 'Burgers'}, {'alias':...",2.5,"{'latitude': 40.71852, 'longitude': -74.00115}",['delivery'],$,"{'address1': '262 Canal St', 'address2': None, 'address3': '', 'city': 'New York', 'zip_code': '...",12129415823,(212) 941-5823
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2159,aIRhO0WZ3KprUphL3oYsBg,applebees-grill-bar-staten-island-4,Applebee's Grill + Bar,https://s3-media0.fl.yelpcdn.com/bphoto/oIR3BQHVVgdXBWCS7bAbJg/o.jpg,False,https://www.yelp.com/biz/applebees-grill-bar-staten-island-4?adjust_creative=82uXkAt1Tiw7u9_h33z...,67,"[{'alias': 'tradamerican', 'title': 'American (Traditional)'}, {'alias': 'sportsbars', 'title': ...",2.5,"{'latitude': 40.530092, 'longitude': -74.229161}","['delivery', 'pickup']",$$,"{'address1': ""2720 Veteran's Rd W"", 'address2': None, 'address3': None, 'city': 'Staten Island',...",17189431200,(718) 943-1200
2160,-wGvjd9NmPqrT0tm_pq1QQ,the-black-whale-bronx,The Black Whale,https://s3-media0.fl.yelpcdn.com/bphoto/X1ThAksqctFuD5kBszAz7w/o.jpg,False,https://www.yelp.com/biz/the-black-whale-bronx?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campai...,184,"[{'alias': 'newamerican', 'title': 'American (New)'}, {'alias': 'diners', 'title': 'Diners'}, {'...",3.5,"{'latitude': 40.846302, 'longitude': -73.786432}",[],$$,"{'address1': '279 City Island Ave', 'address2': '', 'address3': '', 'city': 'Bronx', 'zip_code':...",17188853657,(718) 885-3657
2161,fCxQXdLdS1HLHk7bQOsy3A,kajitsu-new-york,Kajitsu,https://s3-media0.fl.yelpcdn.com/bphoto/ixv3qiJXKEqSPHZRW8EMVw/o.jpg,True,https://www.yelp.com/biz/kajitsu-new-york?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campaign=ye...,136,"[{'alias': 'japanese', 'title': 'Japanese'}, {'alias': 'vegetarian', 'title': 'Vegetarian'}, {'a...",4.0,"{'latitude': 40.7276955, 'longitude': -73.9840698}",[],$$$$,"{'address1': '414 E 9th St', 'address2': '', 'address3': '', 'city': 'New York', 'zip_code': '10...",12122284873,(212) 228-4873
2162,rn6SmSqAYx2kLFIAxytMMQ,kajitsu-new-york-3,Kajitsu,https://s3-media0.fl.yelpcdn.com/bphoto/rbJC-rNk-wuW1AmtNUl67Q/o.jpg,False,https://www.yelp.com/biz/kajitsu-new-york-3?adjust_creative=82uXkAt1Tiw7u9_h33zr1A&utm_campaign=...,280,"[{'alias': 'japanese', 'title': 'Japanese'}, {'alias': 'vegetarian', 'title': 'Vegetarian'}, {'a...",4.0,"{'latitude': 40.7498614, 'longitude': -73.9777215}","['delivery', 'pickup']",$$$$,"{'address1': '125 E 39th St', 'address2': '', 'address3': '', 'city': 'New York', 'zip_code': '1...",12122284873,(212) 228-4873


In [None]:
# def prepare_data(data_list):
#     """
#     This function takes in a list of dictionaries and prepares it
#     for analysis
#     """
    
#     # Make a new list to hold results
#     results = []
    
#     for business_data in data_list:
    
#         # Make a new dictionary to hold prepared data for this business
#         prepared_data = {}
        
#         # Extract name, review_count, rating, and price key-value pairs
#         # from business_data and add to prepared_data
#         # If a key is not present in business_data, add it to prepared_data
#         # with an associated value of None
#         for key in ("name", "review_count", "rating", "price"):
#             prepared_data[key] = business_data.get(key, None)
    
#         # Parse and add latitude and longitude columns
#         coordinates = business_data["coordinates"]
#         prepared_data["latitude"] = coordinates["latitude"]
#         prepared_data["longitude"] = coordinates["longitude"]
        
#         # Add to list if all values are present
#         if all(prepared_data.values()):
#             results.append(prepared_data)
    
#     return results
    
# # Test out function
# prepared_businesses = prepare_data(businesses)
# prepared_businesses[:5]

In [None]:
# full_dataset = []

# for offset in get_offsets(total):
    
#     url_params['offset'] = offset
    
#     response = requests.get(url, headers=headers, params=url_params)
    
#     time.sleep(1)
    
#     response_json = response.json()
    
#     businesses = response_json.get('businesses')
# #    
#     prepared_business = prepare_data(businesses)
    
#     full_dataset.extend(prepared_businesses)

# len(full_dataset)

In [None]:
# # Loop to retrieve all the results from the original request for all NYC businesses
# offset=0
# response = get_results('New York,NY',offset=offset)

# n_per_page = len(response['businesses'])
# total_result = response['total']

# results = response_json.get('businesses',{})

# while offset < total_result:
#     offset+=n_per_page
#     res = get_results('New York,NY',offset=offset)
#     results.extend(res['businesses'])
# results[:5]

In [None]:
# lat = []
# long = []

# for _,business in yelp_df.iterrows():
#     lat.append(business['coordinates']['latitude'])
#     long.append(business['coordinates']['longitude'])

# yelp_df['lat'] = lat
# yelp_df['long'] = long

# Data Understanding

For this project there will be two sources and types of data used:

* Historical health and hygiene inspections recorded by New York City Department of Health and Mental Hygiene (DOHMH) public health inspectors
* User generated Yelp business ratings and reviews

## Understanding NYC DOHMH Data

This dataset contains over 330,000 records, let's explore its contents.

In [None]:
dohmh_df['inspection_date'] =  pd.to_datetime(dohmh_inspections['inspection_date'])

In [None]:
dohmh_df['inspection_date'].min()

In [None]:
dohmh_df['inspection_date'].max()

Inspections in this dataset range from May 2008 up to present day.

In [None]:
# dohmh_df.set_index('inspection_date')

In [None]:
dohmh_df['boro'].value_counts()

In [None]:
dohmh_df['score'].value_counts()

In [None]:
dohmh_df['score'].isna().sum()

In [None]:
dohmh_df['grade'].value_counts()

In [None]:
dohmh_df['grade'].value_counts(normalize=True)

In [None]:
dohmh_df['grade'].isna().sum()

In [None]:
dohmh_df['critical_flag'].value_counts()

Critical violations are those most likely to contribute to foodborne illness

In [None]:
dohmh_df['critical_flag'].value_counts(normalize=True)

In [None]:
dohmh_df['inspection_type'].value_counts()

In [None]:
dohmh_inspections = dohmh_df[(dohmh_df['inspection_type'] ==
                              'Cycle Inspection / Initial Inspection') | (dohmh_df['inspection_type'] ==
                              'Cycle Inspection / Re-inspection')]

In [None]:
dohmh_inspections['critical_flag'].value_counts()

In [None]:
# Drop records with NA critical_flag

dohmh_ohe = dohmh_inspections[dohmh_inspections['critical_flag'] != 'Not Applicable']

In [None]:
dohmh_ohe = pd.get_dummies(dohmh_inspections,columns=['critical_flag'])

In [None]:
dohmh_ohe.head(2)

In [None]:
critical_flags = dohmh_ohe.groupby(['camis'])['critical_flag_Critical'].count()

In [None]:
critical_flags[critical_flagss]

In [None]:
critical_flags['critical_flag_Not Critical'].sum()

In [None]:
non_critical_flags = dohmh_ohe.groupby(['camis'])

In [None]:
dohmh_inspections.duplicated().sum()

In [None]:
dohmh_inspections.loc[dohmh_inspections.duplicated(keep='first'),:]

In [None]:
dohmh_inspections.drop_duplicates().shape

In [None]:
dohmh_inspections.duplicated(subset=['camis']).sum()

In [None]:
dohmh_inspections['camis'].nunique()

There are approximately 28,000 restaurants that have been inspected by the DOHMH. Let's check out how many have ever been flagged for a critical violation.

## Mapping with Follium