In [1]:
# Native
import os
import json

# Third party
import requests
import pandas as pd
from sqlalchemy import create_engine
from ratelimit import limits

# Custom
from config import yelp_api_key

In [2]:
# Yelp Fusion API requires api keys to be passed through HTTP header value:
headers = {"Authorization" : "Bearer" + " " + yelp_api_key}

In [10]:
@limits(calls=5, period=1)
def get_yelp_la_restaurants():
    """A function that makes an API call to retrieve information on restaurants in LA then appends said data to relevant dictionary
    values. The "@limits" decorator ensures api calls don't exceed calls-per-second limit.
    """
    LA_restaurants_dict = {"id": [],
                  "name": [],
                  "overall_rating": [],
                  "review_count": [],
                  "address": [],
                  "city": [],
                  "state": [],
                  "zip": []}
    restaurants_processed_count = 0
    
    for i in range(0, 1000, 20):
        try:
            url = "https://api.yelp.com/v3/businesses/search?term=restaurant&location=Los Angeles&limt=50&offset=" + str(i)
            restaurant_data = requests.get(url, headers=headers).json()
            restaurants = restaurant_data["businesses"]
            for j in range(0, len(restaurants)):
                LA_restaurants_dict["id"].append(restaurants[j]["id"])
                LA_restaurants_dict["name"].append(restaurants[j]["name"])
                LA_restaurants_dict["overall_rating"].append(restaurants[j]["rating"])
                LA_restaurants_dict["review_count"].append(restaurants[j]["review_count"])
                LA_restaurants_dict["address"].append(restaurants[j]["location"]["address1"])
                LA_restaurants_dict["city"].append(restaurants[j]["location"]["city"])
                LA_restaurants_dict["state"].append(restaurants[j]["location"]["state"])
                LA_restaurants_dict["zip"].append(restaurants[j]["location"]["zip_code"])
                restaurants_processed_count += 1
                print(f'{restaurants_processed_count}.) {restaurants[j]["name"]} is now being stored.')
        except:
            print("Invalid data. Skipping entry...")
            pass
        
    print("\n----------------------------LA restaurant processing complete.----------------------------\n")
    return LA_restaurants_dict

In [None]:
# Calls and logs API calls (LA restaurant data) to Yelp Fusion  
LA_restaurants_dict = get_yelp_la_restaurants()

In [12]:
# Creates and previews restaurant dataframe
LA_restaurants_df = pd.DataFrame(LA_restaurants_dict)
LA_restaurants_df.tail(25)

Unnamed: 0,id,name,overall_rating,review_count,address,city,state,zip
975,ELVjiXuScL94Sw05HTaWqg,Sharky's Woodfired Mexican Grill,4.0,241,1716 N Cahuenga Blvd,Los Angeles,CA,90028
976,AEPPyGiMb02mVEPLk2n8nQ,Mima's Mediterranean & Homemade Food,4.5,41,628 Saint Vincent Ct,Los Angeles,CA,90014
977,bmZJURPplezNsJrudYGX0w,La Parrilla,3.0,299,1300 Wilshire Blvd,Los Angeles,CA,90017
978,w0cm7DFmBDQ_x2r83cvL6g,Apollonia's Pizzeria,4.0,499,5176 Wilshire Blvd,Los Angeles,CA,90036
979,y4T1r65LDjG_zaVSkxb8bg,Diamond Buffet & Grill,3.5,248,1901 W 7th St,Los Angeles,CA,90057
980,nMj6IEft0iSGtWoiJy8vjw,SongDo Seafood,5.0,21,2851W Olympic Blvd,Los Angeles,CA,90006
981,NfjUwveA5EA7Sptm3iS6Ow,The Izaka-Ya by Katsu-Ya West Hollywood,4.0,1061,8420 W 3rd St,Los Angeles,CA,90048
982,py2dW-WLfw8RfT4lkYX6pA,Hamasaku,4.0,540,11043 Santa Monica Blvd,Los Angeles,CA,90025
983,YpM5qRwt8uwC77WNZtGIfA,Muraya,4.0,117,125 N Larchmont Blvd,Los Angeles,CA,90004
984,6iJ_E5tMJII601mrzwwdrQ,Uncle John’s Cafe,4.0,779,834 S Grand Ave,Los Angeles,CA,90017


In [13]:
@limits(calls=5, period=1)
def get_yelp_reviews(LA_restaurants_dict):
    """A function that makes API calls to Yelp Fusion to retrieve review data for the businesses with business id's stored in the
    "restaurant_dict" above then appends retrieved review data to "restaurant_review_dict".
    """
    LA_restaurant_reviews_dict = {"restaurant": [],
                          "restaurant_id": [],
                         "rating": [],
                         "text": [],
                         "time_created": []}
    reviews_processed_count = 0
    
    for i in range(0, len(LA_restaurants_dict["id"])):
        restaurant_id = LA_restaurants_dict["id"][i]
        try:
            url = "https://api.yelp.com/v3/businesses/" + restaurant_id + "/reviews"
            restaurant_review_data = requests.get(url, headers=headers).json()
            reviews = restaurant_review_data["reviews"]
            for j in range(0, len(reviews)):
                LA_restaurant_reviews_dict["restaurant"].append(LA_restaurants_dict["name"][i])
                LA_restaurant_reviews_dict["restaurant_id"].append(LA_restaurants_dict["id"][i])
                LA_restaurant_reviews_dict["rating"].append(reviews[j]["rating"])
                LA_restaurant_reviews_dict["text"].append(reviews[j]["text"])
                LA_restaurant_reviews_dict["time_created"].append(reviews[j]["time_created"])
            reviews_processed_count += 1
            print(f'{reviews_processed_count}.) Top 3 reviews for {LA_restaurants_dict["name"][i]} completed.')
            print("---------------------------------------------------------------------")
        except:
            print("Business ID is invalid. Skipping invalid business data...")
            pass

    print("\n----------------------------Yelp Reviews API process completed.----------------------------\n")
    return LA_restaurant_reviews_dict

In [None]:
LA_restaurant_reviews_dict = get_yelp_reviews(LA_restaurants_dict)

In [15]:
LA_restaurant_reviews_df = pd.DataFrame(LA_restaurant_reviews_dict)
LA_restaurant_reviews_df.tail(25)

Unnamed: 0,restaurant,restaurant_id,rating,text,time_created
2954,Veggie House,n5D8b0zW2P8EJ58bMrUR9A,5,I've been a vegan for 6 months so I've ordered...,2020-07-27 11:46:32
2955,Habibi Shack Food Truck,CVEq7fSO2VPB10FCMzpIOQ,5,Great food and great service. These guys are s...,2020-07-01 20:14:11
2956,Habibi Shack Food Truck,CVEq7fSO2VPB10FCMzpIOQ,5,Habibi Truck was at my work location today and...,2020-01-31 16:08:20
2957,Habibi Shack Food Truck,CVEq7fSO2VPB10FCMzpIOQ,3,"I had the beef bowl today. On the menu, it ap...",2019-11-18 13:09:00
2958,Yuko Kitchen,4AX7DMPPq2t-Hf8dEpi3yA,5,I had a hard time trying to decide my review. ...,2020-09-13 10:42:48
2959,Yuko Kitchen,4AX7DMPPq2t-Hf8dEpi3yA,5,I HIGHLY recommend their bbq catfish bowl and ...,2020-08-02 01:29:52
2960,Yuko Kitchen,4AX7DMPPq2t-Hf8dEpi3yA,5,I'm conflicted about writing a review for Yuko...,2020-04-19 14:23:20
2961,Haifa Restaurant,blSFX8K7SGK5O--4xylJDQ,5,Great service and delicious food! Very accommo...,2020-06-30 11:03:51
2962,Haifa Restaurant,blSFX8K7SGK5O--4xylJDQ,5,Great shawarma is on point on flavor. This pla...,2020-09-02 21:43:35
2963,Haifa Restaurant,blSFX8K7SGK5O--4xylJDQ,4,I was in search of a Middle Eastern restaurant...,2020-07-28 11:08:42


In [16]:
LA_restaurants_df_copy = LA_restaurants_df.copy().drop_duplicates(keep="first")
LA_restaurant_reviews_df_copy = LA_restaurant_reviews_df.copy().drop_duplicates(keep="first")

In [17]:
LA_restaurants_df_copy.to_csv("yelp_LA_restaurants.csv", header=True)
LA_restaurant_reviews_df_copy.to_csv("yelp_LA_reviews.csv", header=True)