# Yelp API Restaurant Calls By City and Top Ratings

- Similar to city_calls code but sorts by top rated restaurants 
- Effort to target and avoid skipping desired data (due to 1k query limit per search)
- See city_calls.ipynb for more detailed explanation on api call function

#   

In [19]:
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import csv

# Import API key
from api_keys import api_key

### Perform API calls

- Dataframe from cities.csv
- Build function that calls and writes restaurant data to new csv, given a city
- Run list of cities through function

In [22]:
# Create dataframe from csv file
cities_df = pd.read_csv("Resources/cities.csv", names=["City", "County", "Population", "Area (sq. mi)"])
cities_df.head()

Unnamed: 0,City,County,Population,Area (sq. mi)
Alameda,Alameda,73812,10.61,5.305
Albany,Alameda,18539,1.79,0.895
American Canyon,Napa,19454,4.84,2.42
Antioch,Contra Costa,102372,28.35,14.175
Atherton,San Mateo,6914,5.02,2.51


In [36]:
# Function returns up to 1000 restaurant listings for input city sorted by Yelp-adjusted best rating
def get_restaurants_rating(city, api_key):
    
    url = "https://api.yelp.com/v3/businesses/search"
    headers = {"Authorization": "Bearer %s" % api_key}
    restaurant_data = []
    count = 0
    
    # Increases search return limit
    for offset in range(0, 1000, 50):
        
        # Set parameters and pass into API calls
        params = {"term": "restaurants", "sort_by": "rating", "location":f"{city}, CA", "limit":50, 
                  "offset":offset}
        req = requests.get(url, params=params, headers=headers)
        
        # Breaks if error occurs with search
        if req.status_code == 400:
            break
        elif req.status_code == 200:
            
            # Convert to json
            response = req.json()
            
            # Log history
            count += 1
            print(f"Now processing query set {count} of max 20 for {city}")
            
            # Breaks if no further entries in query
            if response["businesses"] == []:
                break
            
            else:
                # Iterate through business results and extract data
                for biz in response["businesses"]:
                    
                    # Logic to replace missing price level data with NaN
                    if "price" not in biz:
                        restaurant_data.append([city, biz["name"], biz["coordinates"]["latitude"], 
                                                biz["coordinates"]["longitude"], biz["location"]["address1"], 
                                                biz["location"]["zip_code"], biz["rating"], 
                                                biz["review_count"], "", biz["categories"][0]["title"], 
                                                biz["id"]])

                    # Replace missing category data with empty string
                    elif biz["categories"] == []:
                        restaurant_data.append([city, biz["name"], biz["coordinates"]["latitude"], 
                                                biz["coordinates"]["longitude"], biz["location"]["address1"], 
                                                biz["location"]["zip_code"], biz["rating"], 
                                                biz["review_count"], biz["price"], "", biz["id"]])

                    else:
                        restaurant_data.append([city, biz["name"], biz["coordinates"]["latitude"], 
                                                biz["coordinates"]["longitude"], biz["location"]["address1"], 
                                                biz["location"]["zip_code"], biz["rating"], 
                                                biz["review_count"], biz["price"], 
                                                biz["categories"][0]["title"], biz["id"]])

    # Write to csv
    with open('Resources/restaurant_data_rating_2.csv', 'a', encoding="utf-8") as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(restaurant_data)
    csvFile.close()
    
    # Returns total count of restaurants in city
    # note there is a problem with this return response code
    return response["count"]


In [37]:
# Track # of cities processed
count = 0

# List to track total restaurants found in city
totals_count = []

print("LOG HISTORY OF API CALLS:")
print("---------------------------")

# Loop thru list of cities in cities_df
for city in cities_df["City"]:
    
    #  Call get_restaurants fn and append to total_count list
    totals_count.append(get_restaurants_rating(city, api_key))
    
    # Print log history
    count += 1
    rem = len(cities_df["City"]) - count
    print("-----------------------------------------")
    if city == cities_df.iloc[-1,0]:
        print("Full list of cities processed!")
    elif city == cities_df.iloc[-2,0]:
        print("Now getting results for final city. Almost there!")
        print("-----------------------------------------")
    else:
        print(f"Data retrieval for {city} complete")
        print(f"Getting results for next city.. there are {rem} cities left")
        print("-----------------------------------------")

cities_df["Total # of Restaurants"] = totals_count

LOG HISTORY OF API CALLS:
---------------------------


UnboundLocalError: local variable 'response' referenced before assignment

In [1]:
#import dependancies for working with the the csv files
import pandas as pd
import numpy as np
import csv

In [35]:
#checking the longitude & latitude for cities pulled to ensure that we are only getting cities in the bayarea counties


col_names = ["City", "Restaurant Name", "Latitude", "Longitude", "Address", "Zip Code", "Rating", 
             "Number of Reviews", "Price", "Food Category", "Restaurant ID"]
cities_rating_df_2 = pd.read_csv('Resources/restaurant_data_rating_2.csv', names=col_names) 

cities_rating_df_2.head()


Unnamed: 0,City,Restaurant Name,Latitude,Longitude,Address,Zip Code,Rating,Number of Reviews,Price,Food Category,Restaurant ID


In [34]:
max_lat = cities_rating_df_2["Latitude"].max()
min_lat = cities_rating_df_2["Latitude"].min()
max_lng = cities_rating_df_2["Longitude"].max()
min_lng = cities_rating_df_2["Longitude"].min()


print(f"Minimum Latitude is {min_lat} degrees.")
print(f"Maximum Latitude is {max_lat} degrees.")
print(f"Minimum Longitude is {min_lng} degrees.")
print(f"Maximum Longitude is {max_lng} degrees.")



NameError: name 'cities_rating_df_2' is not defined

In [17]:
cities_rating_df["Number of Reviews"].describe()

count    32952.000000
mean       281.691157
std        443.185252
min          0.000000
25%         43.000000
50%        142.000000
75%        346.000000
max      10533.000000
Name: Number of Reviews, dtype: float64

In [18]:
cities_rating_df["Rating"].describe()

count    32952.000000
mean         3.652176
std          0.787633
min          0.000000
25%          3.500000
50%          4.000000
75%          4.000000
max          5.000000
Name: Rating, dtype: float64