# Yelp API Restaurant Calls By GPS Coordinates

For each coordinate listed in grid_coordinates.csv, return first 1k restaurants found via yelp api and
extract data regarding:

- name
- address & zip
- coordinates
- rating
- review count
- price level
- category
- yelp id

Write data to restaurant_data_coord.csv

In [15]:
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import csv

# Import API key
from api_keys import api_key

### Perform API calls

- Dataframe from grid_coordinates.csv
- Build function that calls and writes restaurant data to new csv, given pair of coordinates
- Run list of coordinates through function

In [16]:
# Create dataframe from csv file
coords_df = pd.read_csv("Resources/grid_coordinates.csv", 
                        names=["Lat", "Lng"])
coords_df

Unnamed: 0,Lat,Lng
0,38.871663,-123.109852
1,38.871663,-123.017222
2,38.871663,-122.924592
3,38.871663,-122.831962


In [17]:
# Function returns up to 1000 restaurant listings for input coordinates
def get_restaurants(lat, lng, api_key):
    
    url = "https://api.yelp.com/v3/businesses/search"
    headers = {"Authorization": "Bearer %s" % api_key}
    restaurant_data = []
    count = 0
    
    # Increases search return limit
    for offset in range(0, 1000, 50):
        
        # Set parameters and pass into API calls, radius 8046 meters = 5 miles
        params = {"term":"restaurants", "latitude":lat, "longitude":lng, "radius":8046, 
                  "limit":50, "offset":offset}
        req = requests.get(url, params=params, headers=headers)
        
        # Breaks if error occurs with search
        if req.status_code == 400:
            break
        elif req.status_code == 200:
            
            # Convert to json
            response = req.json()
            
            # Log history
            count += 1
            print(f"Now processing query set {count} of max 20")
            
            # Breaks if no further entries in query
            if response["businesses"] == []:
                break
            
            else:
                # Iterate through business results and extract data
                for biz in response["businesses"]:
                    
                    # Logic to replace missing price level data with NaN
                    if "price" not in biz:
                        restaurant_data.append([lat, lng, biz["name"], biz["coordinates"]["latitude"], 
                                                biz["coordinates"]["longitude"], biz["location"]["address1"], 
                                                biz["location"]["zip_code"], biz["rating"], 
                                                biz["review_count"], "", biz["categories"][0]["title"], 
                                                biz["id"]])

                    # Replace missing category data with empty string
                    elif biz["categories"] == []:
                        restaurant_data.append([lat, lng, biz["name"], biz["coordinates"]["latitude"], 
                                                biz["coordinates"]["longitude"], biz["location"]["address1"], 
                                                biz["location"]["zip_code"], biz["rating"], 
                                                biz["review_count"], biz["price"], "", biz["id"]])

                    else:
                        restaurant_data.append([lat, lng, biz["name"], biz["coordinates"]["latitude"], 
                                                biz["coordinates"]["longitude"], biz["location"]["address1"], 
                                                biz["location"]["zip_code"], biz["rating"], 
                                                biz["review_count"], biz["price"], 
                                                biz["categories"][0]["title"], biz["id"]])

    # Write to csv
    with open('Resources/restaurant_data_coords.csv', 'a', encoding="utf-8") as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(restaurant_data)
    csvFile.close()
    
    # Returns total count of restaurants in coordinates
    return response["total"]


In [21]:
# Track # of coordinates processed
count = 0

# List to track total restaurants found in coordinate
totals_count = []

print("LOG HISTORY OF API CALLS:")
print("---------------------------")

# Loop thru list of coordinates
lat_coords = [lat for lat in coords_df["Lat"]]
lng_coords = [lng for lng in coords_df["Lng"]]

for index in range(len(lat_coords)):
                   
    #  Call get_restaurants fn and append to total_count list
    totals_count.append(get_restaurants(lat_coords[index], lng_coords[index] , api_key))
    
    # Print log history
    count += 1
    rem = len(lat_coords) - count
    print("-----------------------------------------")
    if count == len(lat_coords):
        print("Full list of cities processed!")
    elif count == len(lat_coords) - 1:
        print("Now getting results for final city. Almost there!")
        print("-----------------------------------------")
    else:
        print(f"Data retrieval for coordinate complete")
        print(f"Getting results for next coordinate.. there are {rem} coordinates left")
        print("-----------------------------------------")

coords_df["Total # of Restaurants"] = totals_count

LOG HISTORY OF API CALLS:
---------------------------
Now processing query set 1 of max 20
-----------------------------------------
Data retrieval for coordinate complete
Getting results for next coordinate.. there are 3 coordinates left
-----------------------------------------
Now processing query set 1 of max 20
Now processing query set 2 of max 20
-----------------------------------------
Data retrieval for coordinate complete
Getting results for next coordinate.. there are 2 coordinates left
-----------------------------------------
Now processing query set 1 of max 20
-----------------------------------------
Data retrieval for coordinate complete
Getting results for next coordinate.. there are 1 coordinates left
-----------------------------------------
Now processing query set 1 of max 20
-----------------------------------------
Full list of cities processed!


In [22]:
# Show cities_df with updated total # of restaurants
coords_df.head()

Unnamed: 0,Lat,Lng,Total # of Restaurants
0,38.871663,-123.109852,0
1,38.871663,-123.017222,23
2,38.871663,-122.924592,0
3,38.871663,-122.831962,0


### Create Dataframe

- Read csv file
- Clean data

In [158]:
# Read csv file
restaurants_df = pd.read_csv("restaurant_data_coords.csv", encoding = "ISO-8859-1", 
                            names=["Latitude", "Longitude", "Name", "Lat", "Lng", "Address", "Zip", 
                                   "Rating", "# of Reviews", "Price Level", "Category", "Yelp ID"])

# Replace NaN entries with blank string
restaurants_df = restaurants_df.fillna('')
restaurants_df.head()

Unnamed: 0,City,Name,Lat,Lng,Address,Zip,Rating,# of Reviews,Price Level,Category,Yelp ID
0,Alameda,Ohgane Korean BBQ,37.7575,-122.252,2211 South Shore Ctr,94501,4.5,44,,Barbeque,zTPJr9tZ9sU2gRxEQXlWLQ
1,Alameda,Angela's Kitchen,37.7675,-122.24,1640 Park St,94501,4.5,203,$$,Mediterranean,byiUp9IdXSRZk5v9ACfDPg
2,Alameda,Made Pho You,37.7621,-122.245,1245 Park St,94501,4.5,17,,Vietnamese,gs-RAfcWguIsQG8CsS9dGw
3,Alameda,Trabocco Kitchen & Cocktails,37.7573,-122.252,2213 S Shore Ctr,94501,4.0,1152,$$,Italian,17Zv2e4Mh5I-wbp_x7WrpQ
4,Alameda,Speisekammer,37.7661,-122.24,2424 Lincoln Ave,94501,4.0,1310,$$,German,hzcbj-gz-A8yTc4tp0G69g


In [159]:
### Clean data, how to treat missing address, zip, price level, food trucks/mobile, low review counts
### Groupby on df