# Efficient Yelp API Calls

In [3]:
#imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#additional imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [4]:
#Load API credentials
with open('/Users/justinfields/.secret/yelp_api.json') as f:
    login = json.load(f)
#instantiate YelpAPI variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [5]:
#set our API call parameters
LOCATION = 'Atlanta, GA'
TERM = 'Tacos'

In [6]:
#specifying JSON_filename (can include a folder)
#include the search terms in the filename
JSON_FILE = 'Data/results_in_progress_atl_tacos.json'
JSON_FILE

'Data/results_in_progress_atl_tacos.json'

In [7]:
#Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
#If it does not exist:
if file_exists == False:

    #Create any need folders
    #Get the folder name only
    folder = os.path.dirname(JSON_FILE)
    #If JSON_FILE included a folder:
    if len(folder)>0:
        #Create the folder
        os.makedirs(folder, exist_ok=True)
        
    #INFORM USER AND SAVE EMPTY LIST
    print(f'[i] {JSON_FILE} not found. Saving empty list to file')
    
    #save an empty list
    with open(JSON_FILE, 'w') as f:
        json.dump([],f)
        
#If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_in_progress_atl_tacos.json not found. Saving empty list to file


In [8]:
#Load previous results adn use len of results for offset
with open(JSON_FILE, 'r') as f:
    previous_results = json.load(f)
    
#set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


In [9]:
#Use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location = LOCATION, term = TERM, offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [10]:
#How many results total?
total_results = results['total']
total_results

1800

In [11]:
#How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [12]:
#import additional packages for controlling our loop
import time, math
#use math.ceil to round up for the total number of pages of results
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

90

In [13]:
#Join new results with old list with extend and save to file
previous_results.extend(results['businesses'])
with open(JSON_FILE, 'w') as f:
    json.dump(previous_results, f)

In [14]:
from tqdm.notebook import tqdm_notebook
import time
for i in tqdm_notebook(range(n_pages)):
    #adds 200 ms pause
    time.sleep(.2)

  0%|          | 0/90 [00:00<?, ?it/s]

In [15]:
#delete file and confirm it no longer exists
os.remove(JSON_FILE)
os.path.isfile(JSON_FILE)

False

In [16]:
def create_json_file(JSON_FILE, delete_if_exists=False):
    #Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    #If it does exist:
    if file_exists==True:
        
        #Check if user wants to delete if exists
        if delete_if_exists==True:
            
            print(f'[!] {JSON_FILE} already exists. Deleting previous file...')
            
            #Delete file and confirm it no longer exists
            os.remove(JSON_FILE)
            #Recursive call to function after old file deleted
            create_json_file(JSON_FILE, delete_if_exists=False)
        
        else:
            print(f'[i] {JSON_FILE} already exists.')
            
    #If it does NOT exist
    else:
        
        #Inform user and save empty list
        print(f'[i] {JSON_FILE} not found. Saving empty list to new file.')
        
        #Create any needed folders
        #Get the folder name only
        folder = os.path.dirname(JSON_FILE)
        
        #If JSON_FILE included a folder:
        if len(folder)>0:
            #Create the folder
            os.makedirs(folder, exist_ok=True)
        #Save empty list to start the json file
        with open(JSON_FILE, 'w') as f:
            json.dump([], f)

In [17]:
#Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
#Load previous results and use len of results for offset
with open(JSON_FILE, 'r') as f:
    previous_results = json.load(f)
    
#Set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

#use your yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION, term=TERM, offset=n_results)

#How many results total?
total_results = results['total']
#How many did we get the details for?
results_per_page = len(results['businesses'])
#Use math.ceil to round up for the total number of pages of results
n_pages = math.ceil((results['total']-n_results/ results_per_page))
n_pages

[i] Data/results_in_progress_atl_tacos.json not found. Saving empty list to new file.
- 0 previous results found.


1800

In [18]:
for i in tqdm_notebook(range(1,n_pages+1)):
    
    #Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    #Save number of results to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
        
    #use n_results as the OFFSET
    results = yelp_api.search_query(location=LOCATION, term=TERM, offset=n_results)
    
    #append new results and save to file
    previous_results.extend(results['businesses'])
    
    #display(previous_results)
    with open(JSON_FILE, 'w') as f:
        json.dump(previous_results, f)
        
    time.sleep(.2)

  0%|          | 0/1800 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [19]:
#load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,_JHOGQrKDyPneSo8E9gRDw,oaxaca-no-title,Oaxaca,https://s3-media2.fl.yelpcdn.com/bphoto/0fUO9v...,False,https://www.yelp.com/biz/oaxaca-no-title?adjus...,51,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,"{'latitude': 33.891761, 'longitude': -84.308201}",[],"{'address1': '5255 Peachtree Blvd', 'address2'...",17704504805,(770) 450-4805,17262.004672,
1,rMQPC0UGceG3gmG8iv1POA,casi-cielo-atlanta,Casi Cielo,https://s3-media2.fl.yelpcdn.com/bphoto/2xjCJW...,False,https://www.yelp.com/biz/casi-cielo-atlanta?ad...,506,"[{'alias': 'cocktailbars', 'title': 'Cocktail ...",4.5,"{'latitude': 33.92237, 'longitude': -84.378885}","[delivery, pickup]","{'address1': '6125 Roswell Rd', 'address2': ''...",14045499411,(404) 549-9411,17634.140917,
2,kGb2Rnh3Uwni5k0qQY-05Q,nuevo-laredo-cantina-atlanta,Nuevo Laredo Cantina,https://s3-media2.fl.yelpcdn.com/bphoto/gXlgEd...,False,https://www.yelp.com/biz/nuevo-laredo-cantina-...,1227,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.0,"{'latitude': 33.8094340325727, 'longitude': -8...","[delivery, pickup]","{'address1': '1495 Chattahoochee Ave NW', 'add...",14043529009,(404) 352-9009,4898.788867,$$
3,V86AJK3xytBv2C0bwi9_Ow,la-pastorcita-atlanta,La Pastorcita,https://s3-media2.fl.yelpcdn.com/bphoto/QwqXsI...,False,https://www.yelp.com/biz/la-pastorcita-atlanta...,494,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.0,"{'latitude': 33.84308, 'longitude': -84.32924}","[delivery, pickup]","{'address1': '3304 Buford Hwy NE', 'address2':...",16787058162,(678) 705-8162,11884.742391,$
4,E62caoz6iIpxvf3czm5O_g,taqueria-la-tecampana-austell-2,Taqueria La Tecampana,https://s3-media2.fl.yelpcdn.com/bphoto/fWCLKu...,False,https://www.yelp.com/biz/taqueria-la-tecampana...,63,"[{'alias': 'mexican', 'title': 'Mexican'}]",5.0,"{'latitude': 33.81793429987311, 'longitude': -...","[delivery, pickup]","{'address1': '1936 Veterans Memorial Hwy', 'ad...",16787428353,(678) 742-8353,18497.948149,$$


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
995,CIKGC-_G0yrIkISvnsaFFg,chilis-atlanta-6,Chili's,https://s3-media4.fl.yelpcdn.com/bphoto/QKJBTC...,False,https://www.yelp.com/biz/chilis-atlanta-6?adju...,210,"[{'alias': 'tradamerican', 'title': 'American ...",2.0,"{'latitude': 33.654983920724604, 'longitude': ...","[pickup, delivery]","{'address1': '3660 Camp Creek Pkwy SW', 'addre...",14046290113,(404) 629-0113,14914.718084,$$
996,2Wk5KXzGtrOJ71uP_XSaQg,a-little-nauti-food-truck-atlanta,A Little Nauti Food Truck,https://s3-media2.fl.yelpcdn.com/bphoto/UjBWlo...,False,https://www.yelp.com/biz/a-little-nauti-food-t...,12,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.0,"{'latitude': 33.7552947998047, 'longitude': -8...",[],"{'address1': None, 'address2': None, 'address3...",16782945894,(678) 294-5894,3233.86555,
997,vGpXbr1a6d3JvPXHJSgRMw,palmers-peachtree-city,Palmer's,https://s3-media3.fl.yelpcdn.com/bphoto/5JJdX0...,False,https://www.yelp.com/biz/palmers-peachtree-cit...,313,"[{'alias': 'newamerican', 'title': 'American (...",4.0,"{'latitude': 33.4403686523438, 'longitude': -8...",[delivery],"{'address1': '991 N Peachtree Pkwy', 'address2...",17706295280,(770) 629-5280,39518.241077,$$
998,yMKQoXFFsDk1IfgXxYfafQ,jasons-deli-tucker,Jason's Deli,https://s3-media1.fl.yelpcdn.com/bphoto/WSxoIU...,False,https://www.yelp.com/biz/jasons-deli-tucker?ad...,148,"[{'alias': 'delis', 'title': 'Delis'}, {'alias...",3.0,"{'latitude': 33.8452486525497, 'longitude': -8...","[pickup, delivery]","{'address1': '4073 Lavista Rd', 'address2': ''...",17704934020,(770) 493-4020,17777.727252,$
999,blhndZdg-NT5MPba5BsS_g,drip-atlanta,Drip,https://s3-media4.fl.yelpcdn.com/bphoto/Yyq74M...,False,https://www.yelp.com/biz/drip-atlanta?adjust_c...,143,"[{'alias': 'coffee', 'title': 'Coffee & Tea'},...",4.0,"{'latitude': 33.7407029474063, 'longitude': -8...",[delivery],"{'address1': '928-B Garrett St SE', 'address2'...",14046277375,(404) 627-7375,6552.519072,$


In [21]:
#Check for duplicate ID's
final_df.duplicated(subset='id').sum()

0

In [23]:
#Save the final results to a compressed csv
final_df.to_csv('Data/final_results_atl_tacos.csv.gz', compression='gzip', index=False)