### NOTES:
This currently only iterates through 4 zip codes. If we feed it the ~70 zip codes in Chicago, it's going to take a long time to run and will use a lot of the daily API limit. Note that there's an API limit of 5000 calls per day. If we are running 70 zip codes, it will result in approx 1,400 API calls since the API returns results in groups of 50 (to a total limit of 1000), so we're basically running each zip code 20 times in groups of 50. There is probably a way to optimize this process. Alternatively, once we agree on the code, we can just generate a csv for all of our data cleaning so this API doesn't need to be run a bunch.

In [2]:
# Import dependencies
from pprint import pprint
import pandas as pd 
import requests
import numpy as np
import gmaps

from config import yelp_key
from config import g_key

In [3]:
# Generate offsets list to loop through more than 50 results up to 1000
offsets = np.arange(0,1000,50)
offsets

array([  0,  50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600,
       650, 700, 750, 800, 850, 900, 950])

In [4]:
# Loop through Chicago zip codes 
chi_zips = ["60622","60657","60645","60603"]

In [5]:
# Set empty list to store total search results per zip code
totals = []

In [6]:
# Set empty lists to store restaurant info
name = []
rating = []
price = []
review_count = []
address = []
latitude = []
longitude = []
zip_code = []
restaurant_id = []

In [7]:
# Yelp Fusion API info here: https://www.yelp.com/developers/documentation/v3/business_search
base_url = "https://api.yelp.com/v3/businesses/search"

# Loop through Chicago zip codes list
for zips in chi_zips:
    
    location = f'Chicago, {zips}'
    
    # Loop through offsets to get each group of 50 results
    for offset in offsets:
        params = {'term' : 'restaurant',
                  'location' : location,
                  'limit' : 50,
                  'offset' : offset}

        # Headers contain the API key
        headers = {'Authorization': 'Bearer {}'.format(yelp_key)}

        # Place request
        response = requests.get(base_url, headers=headers, params=params)

        # JSON-ify data
        data = response.json()

        # Loop through number of businesses and append values to lists
        for i in range(len(data['businesses'])):

            # Append values to lists
            try:
                name.append(data['businesses'][i]['name'])
            except KeyError:
                name.append(np.nan)
                
            try:
                rating.append(data['businesses'][i]['rating'])
            except KeyError:
                rating.append(np.nan)
                
            try:
                price.append(data['businesses'][i]['price'])
            except KeyError:
                price.append(np.nan)

            try:
                review_count.append(data['businesses'][i]['review_count'])
            except KeyError:
                review_count.append(np.nan)

            try:
                address.append(data['businesses'][i]['location']['display_address'])
            except KeyError:
                address.append(np.nan)

            try:
                latitude.append(data['businesses'][i]['coordinates']['latitude'])
            except KeyError:
                latitude.append(np.nan)

            try:
                longitude.append(data['businesses'][i]['coordinates']['longitude'])
            except KeyError:
                longitude.append(np.nan)

            try:
                zip_code.append(data['businesses'][i]['location']['zip_code'])
            except KeyError:
                zip_code.append(np.nan)

            try:
                restaurant_id.append(data['businesses'][i]['id'])
            except:
                restaurant_id.append(np.nan)
        
        totals.append(data['total'])
    

In [8]:
# Stores number of results per zip code
totals = pd.Series(totals).unique()
totals

array([494, 640, 245, 597], dtype=int64)

In [9]:
# Build datadrame from lists 
restaurants_df = pd.DataFrame({'name' : name,
                              'rating' : rating,
                              'price' : price,
                              'review_count' : review_count,
                              'address' : address,
                              'latitude' : latitude,
                              'longitude' : longitude,
                              'zip code' : zip_code,
                              'restaurant_id' : restaurant_id})

In [10]:
# View dataframe
restaurants_df

Unnamed: 0,name,rating,price,review_count,address,latitude,longitude,zip code,restaurant_id
0,The Perch,4.5,$$,267,"[1932 W Division, Chicago, IL 60622]",41.903480,-87.676221,60622,okaqMJEoHfHblpKz9Q-CMA
1,etta - Bucktown,4.5,$$,846,"[1840 W North Ave, Chicago, IL 60622]",41.910730,-87.674170,60622,W2QV6SILHer3qB_-CZ1z1A
2,Amaru,5.0,$$$,242,"[1904 W North Ave, Chicago, IL 60622]",41.910710,-87.675360,60622,faOpq9ORL0FlsMrou2NYug
3,Boeufhaus,4.5,$$$,377,"[1012 N Western Ave, Chicago, IL 60622]",41.899831,-87.687487,60622,UKs05XWhbP0riI8O0Kv36w
4,Machine: Engineered Dining & Drink,4.0,$$,527,"[1846 W Division St, Chicago, IL 60622]",41.903460,-87.674110,60622,WUK_G_6ECozMtnJX3YVdIg
...,...,...,...,...,...,...,...,...,...
1971,Subway,2.0,$,18,"[630 S Clark St, Space 0110, Chicago, IL 60605]",41.873911,-87.630953,60605,9cssnMaTbjZm-D_yM6YdZA
1972,Typhoon Cafe,3.0,$$,5,"[211 N Stetson Ave, Chicago, IL 60601]",41.886120,-87.621950,60601,l-BMWMnj56OxQ55DioQO-Q
1973,Joe & The Juice,3.0,,2,"[233 S Wacker Dr, Chicago, IL 60606]",41.878700,-87.635980,60606,-pX1TbuTj3gOtYSXVaeyCg
1974,Habanero Baja Grill,3.0,$,87,"[222 Merchandise Mart Plz, Chicago, IL 60654]",41.888512,-87.635298,60654,FZOIiNJKHAe0ghP1yeJgNw


In [14]:
# Check the number of unique restaurant IDs
len(restaurants_df['restaurant_id'].unique())

1963

In [15]:
restaurants_df.count()

name             1977
rating           1977
price            1508
review_count     1977
address          1977
latitude         1977
longitude        1977
zip code         1977
restaurant_id    1977
dtype: int64

In [17]:
# Inspect data to see how many zip codes we have in the existing dataframe
restaurants_df['zip code'].value_counts()

60657      283
60622      249
60614      234
60647      149
60601      131
60613      126
60606      109
60659       99
60603       87
60654       70
60605       69
60602       59
60626       56
60604       45
60645       44
60642       41
60651       19
60611       18
60612       16
60660       15
60202       13
60712       11
60618        9
60607        6
60610        5
60076        4
             2
60686        1
60625        1
L31 2HB      1
60661        1
60670        1
60624        1
60617        1
08830        1
Name: zip code, dtype: int64

In [11]:
#df by each zip code 
restaurants_df.to_csv("CSV_Output/Restaurant_Data.csv")
restaurants_df["zip code"].dtype
restaurants_df_60657 = restaurants_df.loc[restaurants_df["zip code"] == "60657"]
restaurants_df_60657

Unnamed: 0,name,rating,price,review_count,address,latitude,longitude,zip code,restaurant_id
494,Figo Wine Bar,4.5,$$,215,"[3207 N Sheffield Ave, Chicago, IL 60657]",41.940150,-87.653860,60657,iyilEJb1NwUeZcd5JWXTKw
495,Farm Bar Lakeview,4.0,$$,355,"[1300 W Wellington Ave, Chicago, IL 60657]",41.936430,-87.661410,60657,sJOPkTGLi53eB46ykvDeRg
496,Cheesie's Pub and Grub,4.0,$$,1236,"[958 W Belmont Ave, Chicago, IL 60657]",41.940005,-87.653814,60657,-W8rtYk_mBsgoQHYVxAh2g
497,Wood,4.0,$$,595,"[3335 N Halsted St, Chicago, IL 60657]",41.942859,-87.649159,60657,tCBKjclvCuiuSkiNz1fOjw
498,Krung Thep Thai Cuisine,4.5,$$,298,"[3205 N Halsted St, Chicago, IL 60657]",41.940154,-87.649061,60657,1PMp9MqVvqmZ5n3kN_TO2w
...,...,...,...,...,...,...,...,...,...
1118,Subway,3.0,$,11,"[3167 N Lincoln Ave, Chicago, IL 60657]",41.939461,-87.668073,60657,FCKdIs-Na3ocJqH59XMJkw
1119,Bangkok Restaurant,3.0,,1,"[3542 N Halsted St, Chicago, IL 60657]",41.946710,-87.649780,60657,xZDkGEQ0Y1s0dwM9GbhVRQ
1124,Subway,3.0,$,14,"[2825 N Ashland Ave, Chicago, IL 60657]",41.933205,-87.667806,60657,hFfMA1pDFaiSDIlQTQI1LA
1303,Top One Thai,3.0,,1,"[2739 W Touhy Ave, Chicago, IL 60657]",42.011829,-87.698982,60657,vsoANcTNT2-khJ9a-zkqJQ


In [12]:
#configure gmaps
gmaps.configure(api_key=g_key)
rating = restaurants_df_60657["rating"].astype(int)
review_count = restaurants_df_60657["rating"].astype(int)
new_rating_df = restaurants_df_60657.loc[(restaurants_df_60657["rating"]>4)]
new_ratings = new_rating_df.loc[(new_rating_df["review_count"] > 100)]
new_ratings

Unnamed: 0,name,rating,price,review_count,address,latitude,longitude,zip code,restaurant_id
494,Figo Wine Bar,4.5,$$,215,"[3207 N Sheffield Ave, Chicago, IL 60657]",41.94015,-87.65386,60657,iyilEJb1NwUeZcd5JWXTKw
498,Krung Thep Thai Cuisine,4.5,$$,298,"[3205 N Halsted St, Chicago, IL 60657]",41.940154,-87.649061,60657,1PMp9MqVvqmZ5n3kN_TO2w
500,Ella Elli,4.5,$$,333,"[1349 W Cornelia Ave, Chicago, IL 60657]",41.945115,-87.663023,60657,AqYFeg1GBwHe4ERL3q_LYg
504,Kubo Chicago,4.5,$$,189,"[1232 W Belmont Ave, Chicago, IL 60657]",41.94008,-87.66026,60657,majGt6w0H0i-9TtVwvu2Qg
505,Lowcountry Lakeview,4.5,$$,670,"[3343 N Clark St, Chicago, IL 60657]",41.942883,-87.652833,60657,ufxKkcPhjlF4x2C1Or8ckA
509,Bolivar & Lincoln Venezuelan Cafe,5.0,,137,"[3349 N Sheffield Ave, Chicago, IL 60657]",41.943218,-87.65401,60657,Yc_VAHPXcl_h2CeEr_XfkQ
513,Sal's Trattoria,4.5,$$,250,"[2834 N Southport Ave, Chicago, IL 60657]",41.9335,-87.66393,60657,Xc7KC3XLxYUhM9Xzh-eDvA
514,Bluebird,4.5,$$,327,"[2863 N Clark St, Chicago, IL 60657]",41.934417,-87.646039,60657,lbIIyJa7lG1W0PMYf2MLUQ
518,Crisp,4.5,$$,3351,"[2940 N Broadway Ave, Chicago, IL 60657]",41.936121,-87.644421,60657,WBU0yq9J8qiYQfI_fh2P1Q
521,Rice'N Bread,4.5,$$,367,"[3435 N Sheffield Ave, Chicago, IL 60657]",41.944599,-87.65411,60657,xiMMZSi06MXtvUoKd63iFQ


In [13]:
#plot the data
location_pairs = new_ratings[["latitude", "longitude"]]
                                     
review_count = new_ratings["review_count"]


fig = gmaps.figure()
marker = gmaps.marker_layer(location_pairs)

fig.add_layer(marker)
fig

Figure(layout=FigureLayout(height='420px'))