### NOTES:
This currently only iterates through 4 zip codes. If we feed it the ~70 zip codes in Chicago, it's going to take a long time to run and will use a lot of the daily API limit. Note that there's an API limit of 5000 calls per day. If we are running 70 zip codes, it will result in approx 1,400 API calls since the API returns results in groups of 50 (to a total limit of 1000), so we're basically running each zip code 20 times in groups of 50. There is probably a way to optimize this process. Alternatively, once we agree on the code, we can just generate a csv for all of our data cleaning so this API doesn't need to be run a bunch.

In [25]:
# Import dependencies
from pprint import pprint
import pandas as pd 
import requests
import numpy as np

import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from scipy.stats import linregress
from sklearn import datasets

#yelp_api_key = 

In [26]:
# Generate offsets list to loop through more than 50 results up to 1000
offsets = np.arange(0,1000,50)
offsets

array([  0,  50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600,
       650, 700, 750, 800, 850, 900, 950])

In [27]:
# Loop through Chicago zip codes 
chi_zips = ["60622","60657","60645","60603"]

In [28]:
# Set empty list to store total search results per zip code
totals = []

In [29]:
# Set empty lists to store restaurant info
name = []
rating = []
price = []
review_count = []
address = []
latitude = []
longitude = []
zip_code = []
restaurant_id = []

In [30]:
# Yelp Fusion API info here: https://www.yelp.com/developers/documentation/v3/business_search
base_url = "https://api.yelp.com/v3/businesses/search"

# Loop through Chicago zip codes list
for zips in chi_zips:
    
    location = f'Chicago, {zips}'
    
    # Loop through offsets to get each group of 50 results
    for offset in offsets:
        params = {'term' : 'restaurant',
                  'location' : location,
                  'limit' : 50,
                  'offset' : offset}

        # Headers contain the API key
        headers = {'Authorization': 'Bearer {}'.format(yelp_api_key)}

        # Place request
        response = requests.get(base_url, headers=headers, params=params)

        # JSON-ify data
        data = response.json()

        # Loop through number of businesses and append values to lists
        for i in range(len(data['businesses'])):

            # Append values to lists
            try:
                name.append(data['businesses'][i]['name'])
            except KeyError:
                name.append(np.nan)
                
            try:
                rating.append(data['businesses'][i]['rating'])
            except KeyError:
                rating.append(np.nan)
                
            try:
                price.append(data['businesses'][i]['price'])
            except KeyError:
                price.append(np.nan)

            try:
                review_count.append(data['businesses'][i]['review_count'])
            except KeyError:
                review_count.append(np.nan)

            try:
                address.append(data['businesses'][i]['location']['display_address'])
            except KeyError:
                address.append(np.nan)

            try:
                latitude.append(data['businesses'][i]['coordinates']['latitude'])
            except KeyError:
                latitude.append(np.nan)

            try:
                longitude.append(data['businesses'][i]['coordinates']['longitude'])
            except KeyError:
                longitude.append(np.nan)

            try:
                zip_code.append(data['businesses'][i]['location']['zip_code'])
            except KeyError:
                zip_code.append(np.nan)

            try:
                restaurant_id.append(data['businesses'][i]['id'])
            except:
                restaurant_id.append(np.nan)
        
        totals.append(data['total'])
    

In [31]:
# Stores number of results per zip code
totals = pd.Series(totals).unique()
totals

array([494, 640, 245, 597])

In [32]:
# Build datadrame from lists 
restaurants_df = pd.DataFrame({'name' : name,
                              'rating' : rating,
                              'price' : price,
                              'review_count' : review_count,
                              'address' : address,
                              'latitude' : latitude,
                              'longitude' : longitude,
                              'zip code' : zip_code,
                              'restaurant_id' : restaurant_id})

In [33]:
# View dataframe
restaurants_df

Unnamed: 0,name,rating,price,review_count,address,latitude,longitude,zip code,restaurant_id
0,The Perch,4.5,$$,267,"[1932 W Division, Chicago, IL 60622]",41.903480,-87.676221,60622,okaqMJEoHfHblpKz9Q-CMA
1,etta - Bucktown,4.5,$$,846,"[1840 W North Ave, Chicago, IL 60622]",41.910730,-87.674170,60622,W2QV6SILHer3qB_-CZ1z1A
2,Boeufhaus,4.5,$$$,377,"[1012 N Western Ave, Chicago, IL 60622]",41.899831,-87.687487,60622,UKs05XWhbP0riI8O0Kv36w
3,Forbidden Root,4.5,$$,516,"[1746 W Chicago Ave, Chicago, IL 60622]",41.896280,-87.671551,60622,iOMvXQYFl6_mtw0wX6ZUXw
4,Split-Rail,4.5,$$,241,"[2500 W Chicago Ave, Chicago, IL 60622]",41.895928,-87.689582,60622,8XNGM8f0QLSOqDEactfigA
...,...,...,...,...,...,...,...,...,...
1971,The Link@Sheraton Cafe,2.5,$,4,"[301 East North Water St, Sheraton Chicago Hot...",41.889111,-87.619438,60611,wVIhRjcZ8A8J_JwGoM4gIQ
1972,Au Bon Pain,3.0,$,19,"[350 N Wells St, Chicago, IL 60654]",41.888673,-87.634115,60654,fE-I4GYjVeH6E_RweMyk0Q
1973,Typhoon Cafe,3.0,$$,5,"[211 N Stetson Ave, Chicago, IL 60601]",41.886120,-87.621950,60601,l-BMWMnj56OxQ55DioQO-Q
1974,Habanero Baja Grill,3.0,$,87,"[222 Merchandise Mart Plz, Chicago, IL 60654]",41.888512,-87.635298,60654,FZOIiNJKHAe0ghP1yeJgNw


In [34]:
# Check the number of unique restaurant IDs
len(restaurants_df['restaurant_id'].unique())

1962

In [35]:
restaurants_df.count()

name             1976
rating           1976
price            1508
review_count     1976
address          1976
latitude         1976
longitude        1976
zip code         1976
restaurant_id    1976
dtype: int64

In [36]:
# Inspect data to see how many zip codes we have in the existing dataframe
restaurants_df['zip code'].value_counts()

60657      283
60622      248
60614      234
60647      149
60601      131
60613      126
60606      109
60659       99
60603       87
60654       70
60605       69
60602       59
60626       56
60604       45
60645       44
60642       41
60651       19
60611       18
60612       16
60660       15
60202       13
60712       11
60618        9
60607        6
60610        5
60076        4
             2
08830        1
L31 2HB      1
60617        1
60670        1
60686        1
60624        1
60661        1
60625        1
Name: zip code, dtype: int64

In [37]:
#import food inspection CSV file
inspection = pd.read_csv('inspection_5years.csv')
inspection.head(5)

Unnamed: 0.1,Unnamed: 0,AKA Name,Facility Type,Risk,Address,Zip,Year,Inspection Type,Results,Violations,Latitude,Longitude
0,0,RED MANGO,Restaurant,Risk 2 (Medium),1950 W POLK ST,60612.0,2022,License,Pass,,41.871596,-87.675821
1,1649,WENDY'S,Restaurant,Risk 1 (High),4412 N PULASKI RD,60630.0,2022,Complaint Re-Inspection,Pass,,41.961366,-87.727987
2,1663,GIORDANO'S PIZZA,Restaurant,Risk 1 (High),1401 W 18TH ST,60608.0,2022,Canvass,Pass,"55. PHYSICAL FACILITIES INSTALLED, MAINTAINED ...",41.857765,-87.661385
3,1664,ALEX LA MICHOACANA,Restaurant,Risk 1 (High),5420 W FULLERTON AVE,60639.0,2022,Canvass Re-Inspection,Pass,,41.924141,-87.761921
4,1667,KYO MATCHA,Restaurant,Risk 2 (Medium),2167 S CHINA PL,60616.0,2022,License,Pass,,41.853556,-87.635198


In [38]:
#list all the zipcodes
zipcode = inspection['Zip'].value_counts()
zipcode

60611.0    82
60607.0    71
60622.0    67
60647.0    66
60614.0    63
60654.0    62
60638.0    55
60618.0    54
60639.0    51
60616.0    50
60628.0    49
60609.0    46
60640.0    46
60612.0    40
60657.0    37
60642.0    37
60620.0    37
60608.0    36
60610.0    35
60626.0    33
60632.0    33
60613.0    32
60617.0    32
60629.0    31
60625.0    30
60660.0    28
60601.0    28
60604.0    27
60661.0    23
60619.0    21
60659.0    21
60644.0    20
60637.0    20
60641.0    19
60630.0    18
60631.0    17
60605.0    16
60646.0    16
60649.0    16
60606.0    16
60652.0    15
60643.0    13
60615.0    12
60636.0    12
60634.0    10
60707.0    10
60666.0     9
60623.0     9
60651.0     9
60653.0     8
60645.0     7
60621.0     6
60602.0     6
60603.0     6
60827.0     5
60655.0     4
60624.0     4
60656.0     1
Name: Zip, dtype: int64

# Testing Out Merging the 2 DataFrames using lat/long

In [39]:
Resturant_PF = inspection.rename(columns={'Latitude': 'latitude', 'Longitude': 'longitude'})

In [40]:
#testing out one zip code for merging
ziptest = Resturant_PF[(Resturant_PF["Zip"] == 60622.0)]
ziptest

Unnamed: 0.1,Unnamed: 0,AKA Name,Facility Type,Risk,Address,Zip,Year,Inspection Type,Results,Violations,latitude,longitude
10,1678,TAKIE OUTIT,Restaurant,Risk 1 (High),2132 W CHICAGO AVE,60622.0,2022,License Re-Inspection,Pass,57. ALL FOOD EMPLOYEES HAVE FOOD HANDLER TRAIN...,41.895956,-87.680842
22,1704,SUBWAY,Restaurant,Risk 1 (High),1300 N ASHLAND AVE,60622.0,2022,Canvass,Pass,57. ALL FOOD EMPLOYEES HAVE FOOD HANDLER TRAIN...,41.905132,-87.667689
159,1941,SOULE,Restaurant,Risk 1 (High),1931 W CHICAGO AVE,60622.0,2022,Canvass,Pass,39. CONTAMINATION PREVENTED DURING FOOD PREPAR...,41.895821,-87.675890
187,1983,JUNEBUG CAFE,Restaurant,Risk 1 (High),851 N ASHLAND AVE,60622.0,2022,Canvass,Pass,40. PERSONAL CLEANLINESS - Comments: OBSERVED ...,41.897580,-87.667186
201,2006,KASAMA,Restaurant,Risk 1 (High),1001 N WINCHESTER AVE,60622.0,2022,Short Form Complaint,Pass,"38. INSECTS, RODENTS, & ANIMALS NOT PRESENT - ...",41.899664,-87.675787
...,...,...,...,...,...,...,...,...,...,...,...,...
1557,3997,IRON AGE CHICAGO,Restaurant,Risk 1 (High),1265 N MILWAUKEE AVE,60622.0,2022,Complaint Re-Inspection,Pass,"55. PHYSICAL FACILITIES INSTALLED, MAINTAINED ...",41.904857,-87.668590
1559,4002,Big Star,Restaurant,Risk 1 (High),1531 N DAMEN AVE,60622.0,2022,Canvass,Pass,16. FOOD-CONTACT SURFACES: CLEANED & SANITIZED...,41.909192,-87.677283
1570,4020,PAULIE GEE'S WICKER PARK,Restaurant,Risk 1 (High),1566 N DAMEN AVE,60622.0,2022,Canvass,Pass,47. FOOD & NON-FOOD CONTACT SURFACES CLEANABLE...,41.910180,-87.677592
1604,4074,SABROSO,Restaurant,Risk 1 (High),624 N ASHLAND AVE,60622.0,2022,Canvass Re-Inspection,Pass,"55. PHYSICAL FACILITIES INSTALLED, MAINTAINED ...",41.893250,-87.667356


In [41]:
rest_zip = restaurants_df[(restaurants_df["zip code"] == "60622")]
rest_zip.head(5)

Unnamed: 0,name,rating,price,review_count,address,latitude,longitude,zip code,restaurant_id
0,The Perch,4.5,$$,267,"[1932 W Division, Chicago, IL 60622]",41.90348,-87.676221,60622,okaqMJEoHfHblpKz9Q-CMA
1,etta - Bucktown,4.5,$$,846,"[1840 W North Ave, Chicago, IL 60622]",41.91073,-87.67417,60622,W2QV6SILHer3qB_-CZ1z1A
2,Boeufhaus,4.5,$$$,377,"[1012 N Western Ave, Chicago, IL 60622]",41.899831,-87.687487,60622,UKs05XWhbP0riI8O0Kv36w
3,Forbidden Root,4.5,$$,516,"[1746 W Chicago Ave, Chicago, IL 60622]",41.89628,-87.671551,60622,iOMvXQYFl6_mtw0wX6ZUXw
4,Split-Rail,4.5,$$,241,"[2500 W Chicago Ave, Chicago, IL 60622]",41.895928,-87.689582,60622,8XNGM8f0QLSOqDEactfigA


In [42]:
#eventually to merge
inspection_Yelp = pd.merge(rest_zip, ziptest, how="left", on=["latitude", "longitude"])
inspection_Yelp

Unnamed: 0.1,name,rating,price,review_count,address,latitude,longitude,zip code,restaurant_id,Unnamed: 0,AKA Name,Facility Type,Risk,Address,Zip,Year,Inspection Type,Results,Violations
0,The Perch,4.5,$$,267,"[1932 W Division, Chicago, IL 60622]",41.903480,-87.676221,60622,okaqMJEoHfHblpKz9Q-CMA,,,,,,,,,,
1,etta - Bucktown,4.5,$$,846,"[1840 W North Ave, Chicago, IL 60622]",41.910730,-87.674170,60622,W2QV6SILHer3qB_-CZ1z1A,,,,,,,,,,
2,Boeufhaus,4.5,$$$,377,"[1012 N Western Ave, Chicago, IL 60622]",41.899831,-87.687487,60622,UKs05XWhbP0riI8O0Kv36w,,,,,,,,,,
3,Forbidden Root,4.5,$$,516,"[1746 W Chicago Ave, Chicago, IL 60622]",41.896280,-87.671551,60622,iOMvXQYFl6_mtw0wX6ZUXw,,,,,,,,,,
4,Split-Rail,4.5,$$,241,"[2500 W Chicago Ave, Chicago, IL 60622]",41.895928,-87.689582,60622,8XNGM8f0QLSOqDEactfigA,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243,Subway,3.0,$,14,"[1300 N Ashland Ave, Ste P, Space 124, Chicago...",41.905505,-87.668887,60622,AFSmLmOaKqhzj9dQiB873g,,,,,,,,,,
244,Ramirez Fast Food,3.5,$,32,"[1521 W Grand Ave, Chicago, IL 60622]",41.890740,-87.666390,60622,N3Ek7CNZak52s7zwSu25eg,,,,,,,,,,
245,Jewel Sushi,3.5,,3,"[1341 N Paulina St, Chicago, IL 60622]",41.906560,-87.669390,60622,NiQu3bWVVd1ta4sMZwhm-w,,,,,,,,,,
246,Subway,3.5,$,4,"[1768 W Armitage, Shell Gas Station C-Store, C...",41.918357,-87.672553,60622,7p7rBQVFOyWkm48J9bvQ3w,,,,,,,,,,
