### NOTES:
This currently only iterates through 4 zip codes. If we feed it the ~70 zip codes in Chicago, it's going to take a long time to run and will use a lot of the daily API limit. Note that there's an API limit of 5000 calls per day. If we are running 70 zip codes, it will result in approx 1,400 API calls since the API returns results in groups of 50 (to a total limit of 1000), so we're basically running each zip code 20 times in groups of 50. There is probably a way to optimize this process. Alternatively, once we agree on the code, we can just generate a csv for all of our data cleaning so this API doesn't need to be run a bunch.

In [None]:
# Import dependencies
from pprint import pprint
import pandas as pd 
import requests
import numpy as np

from config import yelp_api_key

In [None]:
# Generate offsets list to loop through more than 50 results up to 1000
offsets = np.arange(0,1000,50)
offsets

In [None]:
# Loop through Chicago zip codes 
chi_zips = ["60622","60657","60645","60603"]

In [None]:
# Set empty list to store total search results per zip code
totals = []

In [None]:
# Set empty lists to store restaurant info
name = []
category = []
category_title = []
rating = []
price = []
review_count = []
address = []
latitude = []
longitude = []
zip_code = []
restaurant_id = []

In [None]:
# Yelp Fusion API info here: https://www.yelp.com/developers/documentation/v3/business_search
base_url = "https://api.yelp.com/v3/businesses/search"

# Loop through Chicago zip codes list
for zips in chi_zips:
    
    location = f'Chicago, {zips}'
    
    # Loop through offsets to get each group of 50 results
    for offset in offsets:
        params = {'term' : 'restaurant',
                  'location' : location,
                  'limit' : 50,
                  'offset' : offset}

        # Headers contain the API key
        headers = {'Authorization': 'Bearer {}'.format(yelp_api_key)}

        # Place request
        response = requests.get(base_url, headers=headers, params=params)

        # JSON-ify data
        data = response.json()

        # Loop through number of businesses and append values to lists
        for i in range(len(data['businesses'])):

            # Append values to lists
            try:
                name.append(data['businesses'][i]['name'])
            except KeyError:
                name.append(np.nan)
                
            try:
                rating.append(data['businesses'][i]['rating'])
            except KeyError:
                rating.append(np.nan)
                
            try:
                price.append(data['businesses'][i]['price'])
            except KeyError:
                price.append(np.nan)

            try:
                review_count.append(data['businesses'][i]['review_count'])
            except KeyError:
                review_count.append(np.nan)

            try:
                address.append(data['businesses'][i]['location']['display_address'])
            except KeyError:
                address.append(np.nan)

            try:
                latitude.append(data['businesses'][i]['coordinates']['latitude'])
            except KeyError:
                latitude.append(np.nan)

            try:
                longitude.append(data['businesses'][i]['coordinates']['longitude'])
            except KeyError:
                longitude.append(np.nan)

            try:
                zip_code.append(data['businesses'][i]['location']['zip_code'])
            except KeyError:
                zip_code.append(np.nan)

            try:
                restaurant_id.append(data['businesses'][i]['id'])
            except:
                restaurant_id.append(np.nan)
                
            try:
                category.append(data['businesses'][i]['categories'][0]['alias'])
            except:
                category.append(np.nan)
                
            try:
                category_title.append(data['businesses'][i]['categories'][0]['title'])
            except:
                category_title.append(np.nan)
        
        totals.append(data['total'])
    

In [None]:
pprint(data)

In [None]:
# Stores number of results per zip code
totals = pd.Series(totals).unique()
totals

In [None]:
# Build datadrame from lists 
restaurants_df = pd.DataFrame({'name' : name,
                              'category' : category,
                              'category_title' : category_title, 
                              'rating' : rating,
                              'price' : price,
                              'review_count' : review_count,
                              'address' : address,
                              'latitude' : latitude,
                              'longitude' : longitude,
                              'zip code' : zip_code,
                              'restaurant_id' : restaurant_id})

In [None]:
# View dataframe
restaurants_df

In [None]:
# Check the number of unique restaurant IDs
len(restaurants_df['restaurant_id'].unique())

In [None]:
restaurants_df.count()

In [None]:
# Inspect data to see how many zip codes we have in the existing dataframe
restaurants_df['zip code'].value_counts()