# Efficient Yelp API Calls (Core)

Rodrigo Arguello-Serrano

## Adding Safeguards to our Data Extraction Workflow

### Imports

In [19]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

### Crendiantials and Accessing API

In [20]:
# Load API Credentials
with open('/Users/rodrigoarguelloserrano/.secret/yelp_api.json') as f:   #use your path here!
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

### Defining search

In [24]:
# set our API call parameters 
LOCATION = 'Livermore,CA'
TERM = 'Sushi'

### Create a results-in-progress JSON file, but only if it doesn't exist.

In [25]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = "Data/results_in_progress_Livermore_sushi.json"
JSON_FILE



'Data/results_in_progress_Livermore_sushi.json'

> Check if our JSON_FILE already exists. This will prevent us from accidentally overwriting an existing file.

> If it doesn't exist:

>Create any folders needed for the file path.
Save an empty list as JSON_File

### Figure out how many pages of results we will need

In [33]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [34]:
## How many results total?
total_results = results['total']
total_results



155

In [35]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [36]:
# Import additional packages for controlling our loop
import time, math
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

8

In [37]:
def create_json_file(JSON_FILE,  delete_if_exists=False):
    
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    ## If it DOES exist:
    if file_exists == True:
        
        ## Check if user wants to delete if exists
        if delete_if_exists==True:
            
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
            ## Recursive call to function after old file deleted
            create_json_file(JSON_FILE,delete_if_exists=False)
        else:
            print(f"[i] {JSON_FILE} already exists.")            
            
            
    ## If it does NOT exist:
    else:
        
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)
        
        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON_FILE,'w') as f:
            json.dump([],f)  


In [38]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages


[!] Data/results_in_progress_Livermore_sushi.json already exists. Deleting previous file...
[i] Data/results_in_progress_Livermore_sushi.json not found. Saving empty list to new file.
- 0 previous results found.


8

- Now that we've deleted the file, we need to re-run our code to create it.

- This process is begging to be turned into a function so we can we easily repeat it.

- While we are making it a function, why don't we go ahead and add the option to delete the JSON file it if already exists, just like we did above.

    - So let's make a create_json_file function that accepts the JSON_FILE filename as first argument and a second argument called delete_if_exists and set to it to False by default.

    - This way, it will not automatically delete previous search results. We will have to explicitly say delete_if_exists = True to do so.

In [39]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/8 [00:00<?, ?it/s]

### After the loop has finished

> **Convert .json to dataframe** <br>
Load in the "results in progress" JSON file into a DataFrame:

In [40]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,HVFJjL-NlcihpJ8SbDDMoA,wasabi-bistro-livermore,Wasabi Bistro,https://s3-media2.fl.yelpcdn.com/bphoto/ZfhvoF...,False,https://www.yelp.com/biz/wasabi-bistro-livermo...,1190,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.0,"{'latitude': 37.705833, 'longitude': -121.740718}","[pickup, delivery]",$$,"{'address1': '922 Larkspur Dr', 'address2': 'S...",19255792952,(925) 579-2952,4098.925263
1,L7Fx8RNlEe942jBi0b0sCA,tommy-katsu-livermore,Tommy Katsu,https://s3-media2.fl.yelpcdn.com/bphoto/rnLNDc...,False,https://www.yelp.com/biz/tommy-katsu-livermore...,228,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 37.6998473121529, 'longitude': -1...","[pickup, delivery]",$$,"{'address1': '2476 Las Positas Rd', 'address2'...",19255835289,(925) 583-5289,2331.119846
2,qy3odDKGWuzh0dMnqIXIxQ,roppongi-sushi-livermore,Roppongi Sushi,https://s3-media3.fl.yelpcdn.com/bphoto/TwUiTe...,False,https://www.yelp.com/biz/roppongi-sushi-liverm...,414,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.0,"{'latitude': 37.68185, 'longitude': -121.76906}","[pickup, delivery]",$$,"{'address1': '2206 1st St', 'address2': '', 'a...",19255835101,(925) 583-5101,446.148773
3,wse-Ef6mHk0MzsxslTYjww,amakara-dublin,Amakara,https://s3-media1.fl.yelpcdn.com/bphoto/ffDQGj...,False,https://www.yelp.com/biz/amakara-dublin?adjust...,3122,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 37.704023, 'longitude': -121.929133}","[pickup, delivery]",$$,"{'address1': '7568 Dublin Blvd', 'address2': '...",19258038485,(925) 803-8485,14066.122597
4,hrIOPvSc4s3jXfIhn4M1pg,sushi-zone-livermore,Sushi Zone,https://s3-media1.fl.yelpcdn.com/bphoto/FCkSw6...,False,https://www.yelp.com/biz/sushi-zone-livermore?...,290,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 37.6800349, 'longitude': -121.746...","[pickup, delivery]",$$,"{'address1': '4094 East Ave', 'address2': '', ...",19254556868,(925) 455-6868,2236.528457


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
150,LJv5A3AKCTRKSLuv43lprQ,safeway-san-ramon-5,Safeway,https://s3-media1.fl.yelpcdn.com/bphoto/E9t32Y...,False,https://www.yelp.com/biz/safeway-san-ramon-5?a...,211,"[{'alias': 'grocery', 'title': 'Grocery'}]",2.5,"{'latitude': 37.7731741, 'longitude': -121.976...",[],$$,"{'address1': '2505 San Ramon Valley Blvd', 'ad...",19258319515,(925) 831-9515,20798.853682
151,wQm1FDUkLlqHRj6eQbPsBg,safeway-dublin-5,Safeway,https://s3-media3.fl.yelpcdn.com/bphoto/6T9Sst...,False,https://www.yelp.com/biz/safeway-dublin-5?adju...,138,"[{'alias': 'grocery', 'title': 'Grocery'}]",2.5,"{'latitude': 37.7060094, 'longitude': -121.927...",[pickup],$$,"{'address1': '7499 Dublin Blvd', 'address2': '...",19255564034,(925) 556-4034,13961.593399
152,iR9FDFTEQpKEU9Wo7w_3tw,eddie-papas-american-hangout-pleasanton-3,Eddie Papas American Hangout,https://s3-media4.fl.yelpcdn.com/bphoto/pFbfQ4...,False,https://www.yelp.com/biz/eddie-papas-american-...,1212,"[{'alias': 'newamerican', 'title': 'American (...",3.5,"{'latitude': 37.6934542, 'longitude': -121.903...","[delivery, pickup]",$$,"{'address1': '4889 Hopyard Rd', 'address2': ''...",19254696266,(925) 469-6266,11592.292598
153,h8IQD-FW3kbFVzFgREXM3A,the-habit-burger-grill-san-ramon,The Habit Burger Grill,https://s3-media1.fl.yelpcdn.com/bphoto/wzO9Vr...,False,https://www.yelp.com/biz/the-habit-burger-gril...,397,"[{'alias': 'burgers', 'title': 'Burgers'}, {'a...",3.0,"{'latitude': 37.778764079586196, 'longitude': ...",[delivery],$,"{'address1': '3121 Crow Canyon Pl.', 'address2...",19253559672,(925) 355-9672,20582.444717
154,02ueACFl6FGXIfvQsxIFPA,the-habit-burger-grill-dublin,The Habit Burger Grill,https://s3-media1.fl.yelpcdn.com/bphoto/AoMLty...,False,https://www.yelp.com/biz/the-habit-burger-gril...,348,"[{'alias': 'burgers', 'title': 'Burgers'}, {'a...",3.5,"{'latitude': 37.708726, 'longitude': -121.930376}",[delivery],$,"{'address1': '7295 Amador Plz Rd', 'address2':...",19258759648,(925) 875-9648,14288.520357


### Check for duplicates

In [41]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()

0

In [42]:
## Drop duplicate ids and confirm there are no more duplicates
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

0

### Save the final DataFrame to a .csv (or a .csv.gz if its too big for the GitHub file size limit).

In [43]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_Livermore_sushi.csv.gz', compression='gzip',index=False)