In [1]:
# Import Dependencies
import requests
import json
import pandas as pd
import time
from config import yelp_key

In [2]:
# API Base URL
base_url = "https://api.yelp.com/v3/businesses/search?"

In [3]:
# API Headers
headers = {
    "accept" : "application/json",
    "Authorization" : "Bearer " + yelp_key 
}

In [4]:
# Create List of Categories (manually located)
categories = ['beaches','hanggliding','horsebackriding',
             'hiking','hot_air_baloons',
              'paddleboarding','parasailing',
              'sailing','snorkeling','ziplining', 
              'spas','hotsprings','massage','bedbreakfast','hotels','resorts',
              'skiresorts','tours', 'transportation','restaurants','nightlife'
             ]

len(categories)

21

In [5]:
# Import Lat/Long Database
csv_path = "../Resources/CitiesWGeolocation.csv"
locations_df = pd.read_csv(csv_path)
locations_df.head()

Unnamed: 0.1,Unnamed: 0,City,State/Province,Country,Latitude,Longitude
0,0,Lexington,Kentucky,United States,38.046407,-84.497039
1,1,San Diego,California,United States,32.71742,-117.162773
2,2,Cook Islands​,,Cook Islands​,-19.996972,-157.785871
3,3,Park City,Utah,United States,40.646092,-111.497996
4,4,Newcastle Upon Tyne,England,United Kingdom,54.973847,-1.613157


In [6]:
# Gather Lists of Latitudes and Longitudes
latitudes_list = []
longitudes_list = []
for i in range(locations_df.shape[0]):
    latitudes_list.append(locations_df.loc[i].at["Latitude"])
    longitudes_list.append(locations_df.loc[i].at["Longitude"])
print(latitudes_list)
print(longitudes_list)

[38.0464066, 32.7174202, -19.99697155, 40.646092100000004, 54.97384739999999, -27.468968199999996, -28.0402165, 34.244058700000004, -36.718804999999996, 49.5041747, 42.64873625, 37.2395367, -32.728465, 17.223472100000002, 25.265347100000003, 50.67108245, 21.721746, 36.508976000000004, 32.0564572, -16.484598300000002, -33.928992, -19.1421421, -33.934444, -28.648333299999997, -24.7761086, -45.0321923, 33.77217945, 54.70235450000001, -6.166490799999999, -20.2759451, 45.437190799999996, 19.70318225, -4.6574977, -28.002373100000003, 44.4654236, 31.6258257, nan, 37.9374939, 49.8879177, 43.10656029999999, 30.2711224, nan, 15.2214956, 43.4832523, -33.953177600000004, 26.9154576, 46.603354, 13.1500331, 20.169626800000003, 51.53882410000001, 60.39430550000001, 24.578720999999998, nan, -34.61341495, 34.862942600000004, nan, 18.1850507, nan, 12.51756625, -35.14181285, -34.4175, 46.7985624, -33.8611665, -16.9206657, 44.958452799999996, -26.6544338, -34.427808299999995, 51.08668970000001, 44.279621,

In [7]:
# Gather List of URLs
url_list = []
lat_lon_list = []
cat_list = []
for i in range(len(latitudes_list)):
        for x in range(len(categories)):
            latitude = latitudes_list[i]
            longitude = longitudes_list[i]
            category = categories[x]
            # Limit radius to appx 15 miles from the coordinates
            complete_url = base_url + "latitude=" + str(latitude) + "&longitude=" + str(longitude) + "&radius=24140&categories=" + category + "&sort_by=best_match"
            url_list.append(complete_url)
            lat_lon_list.append([latitude,longitude])
            cat_list.append(category)
# Confirm list of API calls is less than 5,000 daily limit
print(len(url_list))
url_list

4977


['https://api.yelp.com/v3/businesses/search?latitude=38.0464066&longitude=-84.4970393&radius=24140&categories=beaches&sort_by=best_match',
 'https://api.yelp.com/v3/businesses/search?latitude=38.0464066&longitude=-84.4970393&radius=24140&categories=hanggliding&sort_by=best_match',
 'https://api.yelp.com/v3/businesses/search?latitude=38.0464066&longitude=-84.4970393&radius=24140&categories=horsebackriding&sort_by=best_match',
 'https://api.yelp.com/v3/businesses/search?latitude=38.0464066&longitude=-84.4970393&radius=24140&categories=hiking&sort_by=best_match',
 'https://api.yelp.com/v3/businesses/search?latitude=38.0464066&longitude=-84.4970393&radius=24140&categories=hot_air_baloons&sort_by=best_match',
 'https://api.yelp.com/v3/businesses/search?latitude=38.0464066&longitude=-84.4970393&radius=24140&categories=paddleboarding&sort_by=best_match',
 'https://api.yelp.com/v3/businesses/search?latitude=38.0464066&longitude=-84.4970393&radius=24140&categories=parasailing&sort_by=best_match

In [None]:
# Create API Calls
data_list=[]

In [21]:

for i in range(len(url_list)):
    response = requests.get(url_list[i],headers=headers)
    data = response.json()

    # Calculate Average Rating and Review Count
    rating_sum = 0
    review_sum = 0
    for x in range(3024,len(data["businesses"])):
        rating_sum += data["businesses"][x]["rating"]
        review_sum += data["businesses"][x]["review_count"]
    rating_avg = rating_sum / len(data)
    review_avg = review_sum / len(data)
    
    # Add the desired data to a dictionary
    data_dict = {
        "latitude" : lat_lon_list[i][0] ,
        "longitude": lat_lon_list[i][1],
        "category" : cat_list[i],
        "results_total" : len(data["businesses"]),
        "avg_rating" : rating_avg,
        "avg_review_count" : review_avg
    }
    # Append the dictionary to a list
    data_list.append(data_dict)
    time.sleep(1)

KeyError: 'businesses'

In [20]:
yelp_df = pd.DataFrame(data_list)   
yelp_df

Unnamed: 0,latitude,longitude,category,results_total,avg_rating,avg_review_count
0,38.046407,-84.497039,beaches,1,1.500000,0.666667
1,38.046407,-84.497039,hanggliding,0,0.000000,0.000000
2,38.046407,-84.497039,horsebackriding,8,3.000000,0.666667
3,38.046407,-84.497039,hiking,4,6.166667,18.666667
4,38.046407,-84.497039,hot_air_baloons,20,28.833333,1914.000000
...,...,...,...,...,...,...
3019,31.625826,-7.989161,skiresorts,0,0.000000,0.000000
3020,31.625826,-7.989161,tours,0,0.000000,0.000000
3021,31.625826,-7.989161,transportation,0,0.000000,0.000000
3022,31.625826,-7.989161,restaurants,0,0.000000,0.000000


In [None]:
yelp_df.to_csv("../Resources/yelpdata.csv")