# Yelp API Restaurant Calls By GPS Coordinates 2.0

See calls_by_coord.ipynb for detailed info on code logic

Version 2 here uses a tighter grid of coordinates, and smaller search radius in effort to retrieve maximum possible values from API. Because Yelp limits the results per query to 1k, the wide grid & radius of the previous version resulted in any coordinate with >1k restaurants to ignore potential data required for analysis.

Write data to restaurant_data_coord.csv


In [1]:
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import csv

# Import API key
from api_keys import api_key

### Perform API calls

- Dataframe from new list, grid_coordinates2.csv
- Build function that calls and writes restaurant data to new csv, given pair of coordinates
- Run list of coordinates through function

In [2]:
coords_df = pd.read_csv("Resources/grid_coordinates2.csv", 
                        names=["Lat", "Lng"])
coords_df

Unnamed: 0,Lat,Lng
0,38.842665,-123.072800
1,38.842665,-123.017222
2,38.842665,-122.961644
3,38.842665,-122.906066
4,38.842665,-122.850488
5,38.842665,-122.794910
6,38.842665,-122.739332
7,38.842665,-122.683754
8,38.842665,-122.628176
9,38.842665,-122.572598


In [19]:
def get_restaurants(lat, lng, api_key):
    url = "https://api.yelp.com/v3/businesses/search"
    headers = {"Authorization": "Bearer %s" % api_key}
    restaurant_data = []
    yelp_data = []
    count = 0
    
    for offset in range(0, 1000, 50):

        # Set parameters and pass into API calls, radius 8046 meters = 5 miles
        params = {"term":"restaurants", "latitude":lat, "longitude":lng, "radius":3412, 
                  "limit":50, "offset":offset}
        req = requests.get(url, params=params, headers=headers).json()
        count += 1
        print(f'Now processing set {count} of max 20')
            
        if req["businesses"] == []:
            break
        else:
            for business in req['businesses']:
                
                business_dic = {}
                business_dic['Query ID'] = str(lat) + str(lng)
                business_dic['Query Lat'] = lat
                business_dic['Query Lng'] = lng
                
                if 'name' in business:

                    business_dic['Name'] = business['name']
                else:
                    business_dic['Name'] = "NAN"

                business_dic['Category'] = []

                for category in business['categories']:
                    if 'title' in category:
                        business_dic['Category'].append(category['title'])
                        
                if 'coordinates' in business: 
                    if 'latitude' in business['coordinates']:
                        business_dic['Biz Lat'] = business['coordinates']['latitude']
                    else:
                        business_dic['Biz Lat'] = "NAN"
                    if 'longitude' in business['coordinates']:
                        business_dic['Biz Lng'] = business['coordinates']['longitude']
                    else:
                        business_dic['Biz Lng'] = 'NAN'
                        
                if 'location' in business:
                    if 'city' in business['location']:
                        business_dic['Biz City'] = business['location']['city']
                    else:
                        business_dic['Biz City'] = "NAN"
                    if 'address1' in business['location']:
                        business_dic['Biz Address'] = business['location']['address1']
                    else:
                        business_dic['Biz Address'] = "NAN"
                    if 'zip_code' in business['location']:
                        business_dic['Biz Zip'] = business['location']['zip_code']
                    else:
                        business_dic['Biz Zip'] = "NAN"
                        
                if 'price' in business:
                    business_dic['Price'] = business['price']
                else:
                    business_dic['Price'] = "NAN"
                if 'id' in business:
                    business_dic['Yelp ID'] = business['id']
                else:
                    business_dic['Yelp ID'] = "NAN"
                if 'rating' in business:
                    business_dic['Rating'] = business['rating']
                else:
                    business_dic['Rating'] = "NAN"
                if 'review_count' in business:
                    business_dic['Review Count'] = business['review_count']
                else:
                    business_dic['Review Count'] = "NAN"
                
                yelp_data.append(business_dic)

    df = pd.DataFrame(yelp_data)

    df.to_csv("Resources/restaurant_data_coords3.csv", mode="a", header=False)
    
    return req["total"]

In [20]:
# Track # of coordinates processed
count = 0

# List to track total restaurants found in coordinate
totals_count = []

print("LOG HISTORY OF API CALLS:")
print("---------------------------")

# Loop thru list of coordinates
lat_coords = [lat for lat in coords_df["Lat"]]
lng_coords = [lng for lng in coords_df["Lng"]]

for index in range(len(lat_coords)):
                   
    #  Call get_restaurants fn and append to total_count list
    totals_count.append(get_restaurants(lat_coords[index], lng_coords[index] , api_key))
    
    # Print log history
    count += 1
    rem = len(lat_coords) - count
    print("-----------------------------------------")
    if count == len(lat_coords):
        print("Full list of coordinates processed!")
    elif count == len(lat_coords) - 1:
        print("Now getting results for final coordinates. Almost there!")
        print("-----------------------------------------")
    else:
        print(f"Data retrieval for coordinates {count} complete")
        print(f"Getting results for next coordinates.. there are {rem} left")
        print("-----------------------------------------")

coords_df["Total # of Restaurants"] = totals_count

LOG HISTORY OF API CALLS:
---------------------------
Now processing set 1 of max 20
Now processing set 2 of max 20
Now processing set 3 of max 20
Now processing set 4 of max 20
Now processing set 5 of max 20
Now processing set 6 of max 20
Now processing set 7 of max 20
Now processing set 8 of max 20
Now processing set 9 of max 20
Now processing set 10 of max 20
Now processing set 11 of max 20
Now processing set 12 of max 20
Now processing set 13 of max 20
Now processing set 14 of max 20
Now processing set 15 of max 20
Now processing set 16 of max 20
Now processing set 17 of max 20
Now processing set 18 of max 20
Now processing set 19 of max 20
Now processing set 20 of max 20
-----------------------------------------
Data retrieval for coordinates 1 complete
Getting results for next coordinates.. there are 1304 left
-----------------------------------------
Now processing set 1 of max 20
Now processing set 2 of max 20
Now processing set 3 of max 20
Now processing set 4 of max 20
Now pr

Now processing set 7 of max 20
Now processing set 8 of max 20
Now processing set 9 of max 20
Now processing set 10 of max 20
Now processing set 11 of max 20
Now processing set 12 of max 20
-----------------------------------------
Data retrieval for coordinates 12 complete
Getting results for next coordinates.. there are 1293 left
-----------------------------------------
Now processing set 1 of max 20


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [7]:
# Show coords_df with updated total # of restaurants
coords_df.head()

Unnamed: 0,Lat,Lng,Total # of Restaurants
0,38.842665,-123.0728,0
1,38.842665,-123.017222,11
2,38.842665,-122.961644,0
3,38.842665,-122.906066,0
4,38.842665,-122.850488,0


### RESULT

    Only 2 coordinates have >1k restaurants, evident in sorted list below.
    Data should contain significantly more unique restaurants than previous version
    Should also contain significantly less duplicate information

In [8]:
coords_df = coords_df.sort_values("Total # of Restaurants", ascending=False)
coords_df

Unnamed: 0,Lat,Lng,Total # of Restaurants
708,37.798713,-122.405864,2700
737,37.755215,-122.405864,2600
736,37.755215,-122.461442,1000
711,37.798713,-122.239130,864
682,37.842211,-122.239130,835
707,37.798713,-122.461442,830
681,37.842211,-122.294708,783
1036,37.320235,-121.905662,680
1007,37.363733,-121.905662,638
710,37.798713,-122.294708,628


In [15]:
restaurants_df = pd.read_csv("Resources/restaurant_data_coords3.csv", encoding = "ISO-8859-1", 
                             names=["misc", "Biz Address", "Biz City", "Biz Lat", "Biz Lng", "Biz Zip", 
                                    "Category", "Name", "Price", "Query ID", "Query Lat", "Query Lng", 
                                    "Rating", "Review Count", "Yelp ID"])

In [17]:
restaurants_df = restaurants_df[["Query ID", "Query Lat", "Query Lng", "Name", "Category", 
         "Biz Address", "Biz City", "Biz Zip", "Biz Lat", "Biz Lng",
         "Rating", "Review Count", "Price", "Yelp ID", "misc"]]
del restaurants_df["misc"]
restaurants_df.head()

Unnamed: 0,Query ID,Query Lat,Query Lng,Name,Category,Biz Address,Biz City,Biz Zip,Biz Lat,Biz Lng,Rating,Review Count,Price,Yelp ID
0,38.842665000000004-123.017222,38.842665,-123.017222,Railroad Station Bar and Grill,"['American (New)', 'Southern', 'Pubs']",236 S Cloverdale Blvd,Cloverdale,95425,38.803078,-123.015378,4.0,223,$$,slg-wyyA57sZSMWmNshl-Q
1,38.842665000000004-123.017222,38.842665,-123.017222,Hamburger Ranch & Bar-B-Que,"['Barbeque', 'Burgers']",31195 N Redwood Hwy,Cloverdale,95425,38.817818,-123.023628,4.0,381,$$,0Dj4fW3J3DJzrI51PLadRA
2,38.842665000000004-123.017222,38.842665,-123.017222,Trading Post,"['Bakeries', 'American (New)', 'Bars']",102 S Cloverdale Blvd,Cloverdale,95425,38.805104,-123.016866,4.0,130,$$,6o26tulocbwnkZx89EVHkA
3,38.842665000000004-123.017222,38.842665,-123.017222,Cloverdale Ale Company,"['Pubs', 'Beer Bar', 'American (Traditional)']",131 E 1st St,Cloverdale,95425,38.805637,-123.015944,4.0,22,$$,wAbWk-do6kAUt3V9z8f9Wg
4,38.842665000000004-123.017222,38.842665,-123.017222,Piacere Italiano Steak and Seafood,"['Italian', 'Steakhouses', 'Seafood']",504 N Cloverdale Blvd,Cloverdale,95425,38.810692,-123.020622,3.5,123,$$,wcRhJxjChvs7Q4nBft_c8A


In [18]:
restaurants_df.to_csv("Resources/restaurant_data_coords3.csv")

# Difference of 3300, below, is attributed to missing 1700 values from one coordinate and 1600 from another

In [22]:
coords_df["Total # of Restaurants"].sum()

48682

In [23]:
len(restaurants_df)

45382