# Efficient Yelp API Calls (Core)

**Goal:** 

Use the Yelp API to search your favorite city for a cuisine type of your choice.

Extract all of the results from your search and compile them into one dataframe using a for loop.

In [3]:
# imports

import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
# import json to read yelp api credentials
import json

# with open: yelp api credentials (save as variable)
with open('/Users/yang0108/.secret/yelp_api.json') as f:
    login = json.load(f)

login.keys()

dict_keys(['client-id', 'api-key'])

In [5]:
# import yelpapi class
from yelpapi import YelpAPI

# create instance with api key
yelp_api = YelpAPI(login['api-key'], timeout_s = 5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x188ebebb520>

In [6]:
# define location and term
LOCATION = 'Seattle, WA'
TERM = 'Korean'

In [7]:
# specify JSON_FILE to save results to
JSON_FILE = 'Data/results_in_progress_Seattle_Korean.json'
JSON_FILE

'Data/results_in_progress_Seattle_Korean.json'

In [8]:
# define function to create and save an empty JSON file
# use parameter delete_if_exists = True if want to start over
# (if there is a previous file)
def create_json_file(JSON_FILE, delete_if_exists = False):
    
    # check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    # if it DOES exist:
    if file_exists == True:
        
        # check if user wants to delete if exists
        if delete_if_exists == True:
            
            print(f"[!] {JSON_FILE} already exists. Deleting previous file.")
            
            # delete previous file and confirm it no longer exists
            os.remove(JSON_FILE)
            
            # recursive call to function after old file is deleted
            create_json_file(JSON_FILE, delete_if_exists = False)
    
    # if it does NOT exist:
    else:
        
        # inform user and save empty list
        print(f"[!] {JSON_FILE} not found. Saving empty list to new file.")
        
        # create any needed folders specified in the JSON_FILE path
        # get the folder name
        folder = os.path.dirname(JSON_FILE)
        
        # if JSON_FILE path included a folder:
        if len(folder) > 0:
            
            # create the folder
            os.makedirs(folder, exist_ok = True)
            
            # save empty list to start the json file
            with open(JSON_FILE, 'w') as f:
                json.dump([], f)

In [9]:
# create new empty json file (delete if already exists)
create_json_file(JSON_FILE, delete_if_exists = True)

[!] Data/results_in_progress_Seattle_Korean.json not found. Saving empty list to new file.


In [11]:
# load previous results and use length of results for offset
with open(JSON_FILE, 'r') as f:
    previous_results = json.load(f)
    
previous_results

[]

In [12]:
# set offset based on previous results
n_results = len(previous_results)

print(f"{n_results} previous results found.")

0 previous results found.


In [14]:
# use search_query to perform api call
results = yelp_api.search_query(location = LOCATION,
                               term = TERM,
                               offset = n_results)

# how many results total?
total_results = results['total']
total_results

732

In [15]:
# how many results did we get in api call?
results_per_page = len(results['businesses'])
results_per_page

20

In [16]:
# calculate number of calls needed for total results
n_pages = math.ceil((results['total'] - n_results) / results_per_page)
n_pages

37

In [18]:
# loop to get all results by calling api multiple times
for i in tqdm_notebook(range(1, n_pages + 1)):
    
    # read in results in progress and check length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
        
    # save number of results to use as offset
    n_results = len(previous_results)
    
    # stop loop if called over 1000 times
    if (n_results + results_per_page) > 1000:
        print("Exceeded 1000 API calls. Stopping loop.")
        break
        
    # call api, use n_results as the offset
    results = yelp_api.search_query(location = LOCATION,
                                   term = TERM,
                                   offset = n_results)
    
    # append new results to previous results and save
    previous_results.extend(results['businesses'])
    
    # display previous results
    with open(JSON_FILE, 'w') as f:
        json.dump(previous_results, f)
        
    # pause
    time.sleep(0.2)

  0%|          | 0/37 [00:00<?, ?it/s]

In [19]:
# convert results in progress json file to df
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,45iVzFsXpjUuiBw0Dhc_Qg,seoul-tofu-and-jjim-seattle,Seoul Tofu & Jjim,https://s3-media4.fl.yelpcdn.com/bphoto/2GgYXM...,False,https://www.yelp.com/biz/seoul-tofu-and-jjim-s...,132,"[{'alias': 'korean', 'title': 'Korean'}]",5.0,"{'latitude': 47.62224, 'longitude': -122.32064}","[pickup, delivery]",$$,"{'address1': '406 Broadway E', 'address2': Non...",12062575642,(206) 257-5642,1130.239024
1,FW0AJpVxlvOwYE-87v3Ocg,korean-bamboo-seattle-3,Korean Bamboo,https://s3-media1.fl.yelpcdn.com/bphoto/6kfG77...,False,https://www.yelp.com/biz/korean-bamboo-seattle...,430,"[{'alias': 'korean', 'title': 'Korean'}]",4.0,"{'latitude': 47.61479, 'longitude': -122.34435}","[pickup, delivery]",$$,"{'address1': '2236 3rd Ave', 'address2': '', '...",12064439898,(206) 443-9898,1084.643351
2,h1Q0Wkx5TUUZeVjJrXTwmQ,chan-seattle-seattle-2,Chan Seattle,https://s3-media2.fl.yelpcdn.com/bphoto/C5Iry3...,False,https://www.yelp.com/biz/chan-seattle-seattle-...,982,"[{'alias': 'asianfusion', 'title': 'Asian Fusi...",4.5,"{'latitude': 47.61312322818342, 'longitude': -...",[delivery],$$,"{'address1': '724 Pine St', 'address2': '', 'a...",14256582626,(425) 658-2626,287.367244
3,_lQGmkw63PESnnffUzl4fg,kimchi-house-seattle,Kimchi House,https://s3-media3.fl.yelpcdn.com/bphoto/oD037B...,False,https://www.yelp.com/biz/kimchi-house-seattle?...,300,"[{'alias': 'korean', 'title': 'Korean'}]",4.5,"{'latitude': 47.6713381, 'longitude': -122.387...","[pickup, delivery]",$$,"{'address1': '5809 24th Ave NW', 'address2': '...",12067845322,(206) 784-5322,7677.922703
4,nUkPOJ5p4E9U7e2DbOzzMw,meet-korean-bbq-seattle,Meet Korean BBQ,https://s3-media3.fl.yelpcdn.com/bphoto/UwvDMA...,False,https://www.yelp.com/biz/meet-korean-bbq-seatt...,314,"[{'alias': 'bbq', 'title': 'Barbeque'}, {'alia...",4.5,"{'latitude': 47.614352, 'longitude': -122.325214}","[pickup, delivery]",$$$$,"{'address1': '500 E Pike St', 'address2': None...",12066952621,(206) 695-2621,361.544031


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
727,SfTKUd6dDLlxIWEYPWQFrA,yummy-teriyaki-bothell,Yummy Teriyaki,https://s3-media1.fl.yelpcdn.com/bphoto/f5VtDY...,False,https://www.yelp.com/biz/yummy-teriyaki-bothel...,71,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",3.5,"{'latitude': 47.8106880187988, 'longitude': -1...","[pickup, delivery]",$,"{'address1': '20631 Bothell Everett Hwy', 'add...",14253980505,(425) 398-0505,23736.469227
728,2CDhqi3a3ED2TyYiv-_fnQ,apple-teriyaki-redmond,Apple Teriyaki,https://s3-media1.fl.yelpcdn.com/bphoto/UFXHoQ...,False,https://www.yelp.com/biz/apple-teriyaki-redmon...,95,"[{'alias': 'japanese', 'title': 'Japanese'}]",3.5,"{'latitude': 47.70279, 'longitude': -122.09359}",[delivery],$,"{'address1': '11523 Avondale Rd NE', 'address2...",14258834389,(425) 883-4389,20245.46776
729,3sj7NDcQv_CEKNQFXXF-BQ,teriyaki-town-shoreline,Teriyaki Town,https://s3-media4.fl.yelpcdn.com/bphoto/1VCGYx...,False,https://www.yelp.com/biz/teriyaki-town-shoreli...,88,"[{'alias': 'japanese', 'title': 'Japanese'}]",3.5,"{'latitude': 47.777331, 'longitude': -122.311783}",[delivery],$,"{'address1': '20320 Ballinger Way NE', 'addres...",12063619088,(206) 361-9088,18178.257249
730,AQSRgIPv7B2szCeQF2LxLw,apple-teriyaki-federal-way,Apple Teriyaki,https://s3-media4.fl.yelpcdn.com/bphoto/NvvfbE...,False,https://www.yelp.com/biz/apple-teriyaki-federa...,36,"[{'alias': 'japanese', 'title': 'Japanese'}]",3.0,"{'latitude': 47.3563199, 'longitude': -122.30925}",[delivery],$,"{'address1': '27400 Pacific Hwy S', 'address2'...",12538390110,(253) 839-0110,28725.671264
731,KqFc899W0u0PBZp0iopEaA,big-teriyaki-lynnwood,Big Teriyaki,https://s3-media3.fl.yelpcdn.com/bphoto/2AoRr4...,False,https://www.yelp.com/biz/big-teriyaki-lynnwood...,55,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",3.5,"{'latitude': 47.8645401682447, 'longitude': -1...",[delivery],$,"{'address1': '3625 148th St SW', 'address2': '...",14256780072,(425) 678-0072,28049.45847


In [27]:
# check for duplicates of ids
final_df.duplicated(subset = 'id').sum()

1

In [28]:
# drop duplicates
final_df = final_df.drop_duplicates(subset = 'id')

# check
final_df.duplicated(subset = 'id').sum()

0

In [29]:
# save final results to csv
final_df.to_csv('Data/final_results_Seattle_Korean.csv',
               index = False)

# alternate option to zip if needed:
# final_df.to_csv('Data/final_results_Seattle_Korean.csv.gz',
#                 compression = 'gzip',
#                 index = False)