# Efficient Yelp API Calls
- Kevin Barnett
- 05/05/23

In [1]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os, json, math, time
from yelpapi import YelpAPI
from tqdm import tqdm_notebook

In [2]:
# Import API keys
with open('/Users/hamma/.secret/yelp_api.json') as f: 
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [3]:
# Instantiate Yelp API variable
from yelpapi import YelpAPI
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x1d1bec08f40>

In [4]:
# Quick Test Query
results = yelp_api.search_query(location='Baltimore, MD',
                                       term='Crab Cake')
print(type(results))
results.keys()


<class 'dict'>


dict_keys(['businesses', 'total', 'region'])

In [5]:
# Set variables for parameters
location = 'Denver, CO'
term = 'Taco'

In [6]:
# Create empty json file for results in progress
json_file = 'results_in_progress_Denver_taco'
json_file

'results_in_progress_Denver_taco'

In [7]:
# Check if file exists and create elif statement if it does not exist
file_exist = os.path.isfile(json_file)

# If file does not exist
if file_exist == False:
    
    # Create new folder
    folder = os.path.dirname(json_file)
    
    # If json_file included a folder
    if len(folder) > 0:
        os.makedirs(folder, exist_ok=False)
    
    # Inform user and save to empty list
    print(f'{json_file} does not exist. Saving to empty list')
    
    # Save to empty list
    with open(json_file, 'w') as f:
          json.dump([], f)

# If it exists inform the user
else:
    print('{json_file} already exists')

results_in_progress_Denver_taco does not exist. Saving to empty list


In [8]:
# Load previous results and use len of results for offset
with open(json_file, 'r') as f:
    previous_results = json.load(f)

# Set offset based on previous results
n_results = len(previous_results)
print(f'{n_results} previous results found')

0 previous results found


In [9]:
# Use search.query method to perform API call
results = yelp_api.search_query(location=location, term=term, offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [10]:
# Save total results to variable
total_results = results['total']
total_results

2700

In [11]:
# Save results per page to variable
results_per_page = len(results['businesses'])
results_per_page

20

In [12]:
# Round up total number of results per pages
n_pages = math.ceil((results['total'] - n_results) / results_per_page)
n_pages

135

In [13]:
# Create function to create json files
def create_json_file(json_file, delete_if_exists=False):
    
    # Check if file exists
    file_exists = os.path.isfile(json_file)
    
    # If it does exist
    if file_exists == True:
        
        # Check if user wants to delete if exists
        if delete_if_exists==True:
            print(f'[!] {json_file} already exists. Deleting previous file')
        
            # Delete file and confirm it no longer exists
            os.remove(json_file)
            print(f'[i] {json_file} already exists')
            
            # Recursive call to function after old file delete
            create_json_file(json_file, delete_if_exists=False)
        else:
            print(f'[i] {json_file} already exists')
    
    # If it does not exist
    else:
        # Inform user and save empty list
        print(f'[i] {json_file} not found. Saving empty list to new file')
    
        # Create any needed folders
        folder = os.path.dirname(json_file)
    
        # If included a folder
        if len(folder) > 0:
          
            # Create folder
            os.makdirs(folder, exist_ok=True)
    
        # Save empty list to start the json file
        with open(json_file, 'w') as f:
            json.dump([], f)

In [14]:
# Create new json_file using function
create_json_file(json_file)

[i] results_in_progress_Denver_taco already exists


In [15]:
# Lod previous results and use len for offset
with open(json_file, 'r') as f:
    previous_results = json.load(f)
    
# Set offset based on previous results
n_results = len(previous_results)
print(f'{n_results} previous results found')
      
# Use search query method to perfom API call
results = yelp_api.search_query(location=location, term=term, offset=n_results)
      
# How many results total?
total_results = results['total']

# How many did we get details for?
results_per_page = len(results['businesses'])
      
# Round up total number of pages of results
n_pages = math.ceil((total_results - n_results) / results_per_page)
n_pages

0 previous results found


135

In [16]:
# For loop for API calls
for i in tqdm_notebook(range(1,n_pages+1)):
    
    # Read in results progress file and check length
    with open(json_file, 'r') as f:
        previous_results = json.load(f)
        
    # Save number of results to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 API calls, stopping looping')
        break
    
    # Use n_results as the offset
    results = yelp_api.search_query(location=location, term=term, 
                                    offset= n_results)
    
    # Append new results and save file
    previous_results.extend(results['businesses'])
    
    with open(json_file, 'w') as f:
        json.dump(previous_results, f)
        
    # Add 200ms pause
    time.sleep(0.2)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm_notebook(range(1,n_pages+1)):


  0%|          | 0/135 [00:00<?, ?it/s]

Exceeded 1000 API calls, stopping looping


In [17]:
# Load final results
final_df = pd.read_json(json_file)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,5-Y6ioHcfRy-eCBZtU6rHw,venalonzos-centennial-2,Venalonzo's,https://s3-media3.fl.yelpcdn.com/bphoto/pZaizS...,False,https://www.yelp.com/biz/venalonzos-centennial...,159,"[{'alias': 'tacos', 'title': 'Tacos'}, {'alias...",5.0,"{'latitude': 39.5920055, 'longitude': -104.886...",[],"{'address1': '6830 S Yosemite St', 'address2':...",17209236926,(720) 923-6926,19196.011394,
1,MtlvL-SAU6_1gDiRY6MAgw,la-calle-taqueria-y-carnitas-denver,La Calle Taqueria Y Carnitas,https://s3-media4.fl.yelpcdn.com/bphoto/-djBaq...,False,https://www.yelp.com/biz/la-calle-taqueria-y-c...,363,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,"{'latitude': 39.7114611, 'longitude': -105.010...","[pickup, delivery]","{'address1': '1565 W Alameda Ave', 'address2':...",17205836586,(720) 583-6586,14378.626237,$$
2,ShUt408WjgG1lDpyqQUmhQ,romo-s-street-tacos-aurora,Romo’s Street Tacos,https://s3-media1.fl.yelpcdn.com/bphoto/mHBVh8...,False,https://www.yelp.com/biz/romo-s-street-tacos-a...,85,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.0,"{'latitude': 39.617498000784, 'longitude': -10...","[pickup, delivery]","{'address1': '5350 S Parker Rd', 'address2': '...",17202428478,(720) 242-8478,16559.096105,$$
3,OIQh2YtOXjlKfWPx-t8iFA,la-loteria-taqueria-denver,La Loteria Taqueria,https://s3-media1.fl.yelpcdn.com/bphoto/CeiN-B...,False,https://www.yelp.com/biz/la-loteria-taqueria-d...,228,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,"{'latitude': 39.7158, 'longitude': -104.9873}","[pickup, delivery]","{'address1': '42 S Broadway', 'address2': '', ...",17203899055,(720) 389-9055,12395.345944,$$
4,WYi75GrJj6letALupO1X-g,tacos-selene-aurora-2,Tacos Selene,https://s3-media3.fl.yelpcdn.com/bphoto/sK3Q2x...,False,https://www.yelp.com/biz/tacos-selene-aurora-2...,486,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,"{'latitude': 39.72585, 'longitude': -104.80971}",[delivery],"{'address1': '15343 E 6th Ave', 'address2': 'S...",13033437879,(303) 343-7879,5775.359603,$


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
995,WgT_f3tG0VvzX78WotZmFg,guadalajara-castle-rock,Guadalajara,https://s3-media1.fl.yelpcdn.com/bphoto/nY6Xd_...,False,https://www.yelp.com/biz/guadalajara-castle-ro...,255,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.0,"{'latitude': 39.37961, 'longitude': -104.86395}","[delivery, pickup]","{'address1': '150 Wolfensberger Rd', 'address2...",13036606299,(303) 660-6299,42661.479267,$$
996,gcKUJJ1wo37J88OZ9hqFMQ,costa-vida-aurora-3,Costa Vida,https://s3-media2.fl.yelpcdn.com/bphoto/db-rJE...,False,https://www.yelp.com/biz/costa-vida-aurora-3?a...,107,"[{'alias': 'mexican', 'title': 'Mexican'}, {'a...",3.0,"{'latitude': 39.599023, 'longitude': -104.710306}","[delivery, pickup]","{'address1': '23870 E Smoky Hill Rd', 'address...",13032846085,(303) 284-6085,22145.865646,$
997,ZJs3MCm2QcIAsLApQYQ6Zw,el-parral-greenwood-village-3,El Parral,https://s3-media2.fl.yelpcdn.com/bphoto/2iDS8t...,False,https://www.yelp.com/biz/el-parral-greenwood-v...,96,"[{'alias': 'mexican', 'title': 'Mexican'}]",3.5,"{'latitude': 39.596306, 'longitude': -104.881202}",[delivery],"{'address1': '9261 E Arapahoe Rd', 'address2':...",13036499140,(303) 649-9140,18711.642884,$
998,kMiIUrWaSnqKpFdKPJHzMg,marianas-family-restaurant-wiggins,Marianas Family Restaurant,https://s3-media3.fl.yelpcdn.com/bphoto/2Zs-XJ...,False,https://www.yelp.com/biz/marianas-family-resta...,10,"[{'alias': 'mexican', 'title': 'Mexican'}, {'a...",4.5,"{'latitude': 40.2325747940952, 'longitude': -1...",[],"{'address1': '612 Central Ave', 'address2': ''...",19704837278,(970) 483-7278,84231.498881,
999,oTXzVNMV7RtupXjmIRegJw,picas-taqueria-boulder,Pica's Taqueria,https://s3-media3.fl.yelpcdn.com/bphoto/qr8Qtb...,False,https://www.yelp.com/biz/picas-taqueria-boulde...,198,"[{'alias': 'mexican', 'title': 'Mexican'}]",3.5,"{'latitude': 40.0138322613358, 'longitude': -1...","[delivery, pickup]","{'address1': '5360 Arapahoe Ave', 'address2': ...",13034442391,(303) 444-2391,42170.263826,$$


In [18]:
# Check for duplicates
final_df.duplicated(subset='id').sum()

0

In [19]:
# Drop duplicates
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

0

In [20]:
# Save final_df to csv file
final_df.to_csv('final_results_Denver_taco.csv.gz', compression ='gzip',
               index=False)