In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [3]:
# Load API Credentials
with open('/Users/sharheatherclark/.secret/yelp_api.json') as f:   
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x10574b5e0>

In [4]:
LOCATION = 'Grover Beach, CA'
TERM = 'Sushi'

In [5]:
JSON_FILE = "Data/results_inprogress_GB_Sushi.json"
JSON_FILE

'Data/results_inprogress_GB_Sushi.json'

In [6]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f'[i] {JSON_FILE} not found. Saving empty list to file.')
    
    
    # save an empty list
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
# If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_inprogress_GB_Sushi.json not found. Saving empty list to file.


In [7]:
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    # this code above renames file previous results instead of JSON_FILE
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


In [8]:

results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [9]:
## How many results total?
total_results = results['total']
total_results

27

In [10]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page


20

In [11]:
# Import additional packages for controlling our loop
import time, math

# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

2

In [13]:
# join new results with old list with extend and save to file
previous_results.extend(results['businesses'])  
with open(JSON_FILE,'w') as f:
     json.dump(previous_results,f)

In [14]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    # add a 200ms pause
    time.sleep(.2)

  0%|          | 0/2 [00:00<?, ?it/s]

In [15]:
def create_json_file(JSON_FILE,  delete_if_exists=False):
    
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    ## If it DOES exist:
    if file_exists == True:
        
        ## Check if user wants to delete if exists
        if delete_if_exists==True:
            
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
            ## Recursive call to function after old file deleted
            create_json_file(JSON_FILE,delete_if_exists=False)
        else:
            print(f"[i] {JSON_FILE} already exists.")            
            
            
    ## If it does NOT exist:
    else:
        
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)
        
        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON_FILE,'w') as f:
            json.dump([],f)

In [16]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

[!] Data/results_inprogress_GB_Sushi.json already exists. Deleting previous file...
[i] Data/results_inprogress_GB_Sushi.json not found. Saving empty list to new file.
- 0 previous results found.


2

In [17]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/2 [00:00<?, ?it/s]

In [18]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,MS7OyuKNSXUYP3OjXN4wQg,izakaya-raku-grover-beach,Izakaya Raku,https://s3-media4.fl.yelpcdn.com/bphoto/ucLwFz...,False,https://www.yelp.com/biz/izakaya-raku-grover-b...,457,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.0,"{'latitude': 35.1215891, 'longitude': -120.619...",[delivery],$$,"{'address1': '953 W Grand Ave', 'address2': ''...",18054749962,(805) 474-9962,277.293239
1,AuwhyKvo3YyYijodJ4I1cw,yamato-japanese-restaurant-grover-beach,Yamato Japanese Restaurant,https://s3-media2.fl.yelpcdn.com/bphoto/wS-7mh...,False,https://www.yelp.com/biz/yamato-japanese-resta...,281,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 35.1211137, 'longitude': -120.609...",[delivery],$$,"{'address1': '1741 W Grand Ave', 'address2': '...",18054813986,(805) 481-3986,1044.173635
2,adsv__LSrv3R3qroS1YdMg,umi-sushi-pacifica-grover-beach,Umi Sushi Pacifica,https://s3-media2.fl.yelpcdn.com/bphoto/PFjJpO...,False,https://www.yelp.com/biz/umi-sushi-pacifica-gr...,304,"[{'alias': 'sushi', 'title': 'Sushi Bars'}]",4.5,"{'latitude': 35.121263, 'longitude': -120.6149...",[],$$,"{'address1': '1319 W Grand Ave', 'address2': '...",18054897424,(805) 489-7424,623.063433
3,G_j4Fv1dwMHsTh--IE83ng,sushi-805-grover-beach-3,Sushi 805,https://s3-media1.fl.yelpcdn.com/bphoto/LW7BQS...,False,https://www.yelp.com/biz/sushi-805-grover-beac...,383,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",3.5,"{'latitude': 35.1214790344238, 'longitude': -1...",[pickup],$$,"{'address1': '460 W Grand Ave', 'address2': ''...",18054893839,(805) 489-3839,419.2434
4,EwS7GARoLyetjGEMN1VKFw,yanagi-sushi-and-grill-pismo-beach,Yanagi Sushi & Grill,https://s3-media4.fl.yelpcdn.com/bphoto/ORdAjc...,False,https://www.yelp.com/biz/yanagi-sushi-and-gril...,494,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",3.5,"{'latitude': 35.13703351637226, 'longitude': -...","[delivery, pickup]",$$,"{'address1': '555 James Way', 'address2': None...",18057733535,(805) 773-3535,1905.272296


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
22,MmIU0eYRmQarwj1VJiwrdA,the-spoon-trade-grover-beach,The Spoon Trade,https://s3-media2.fl.yelpcdn.com/bphoto/PfQ5vH...,False,https://www.yelp.com/biz/the-spoon-trade-grove...,861,"[{'alias': 'newamerican', 'title': 'American (...",4.5,"{'latitude': 35.122148717721295, 'longitude': ...",[delivery],$$,"{'address1': '295 W Grand Ave', 'address2': ''...",18059046773,(805) 904-6773,618.40719
23,Wnexm80_il0tuacoHyuzlg,new-earth-superfoods-grover-beach,New Earth Superfoods,https://s3-media2.fl.yelpcdn.com/bphoto/KCXx8l...,False,https://www.yelp.com/biz/new-earth-superfoods-...,14,"[{'alias': 'juicebars', 'title': 'Juice Bars &...",5.0,"{'latitude': 35.11998, 'longitude': -120.6088233}","[delivery, pickup]",,"{'address1': '191 Oak Park Blvd', 'address2': ...",18055922450,(805) 592-2450,1157.192127
24,6SuwBmAUS9v370ZitiE38Q,the-garden-grille-and-bar-pismo-beach-2,The Garden Grille and Bar,https://s3-media3.fl.yelpcdn.com/bphoto/R_jljB...,False,https://www.yelp.com/biz/the-garden-grille-and...,5,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",3.0,"{'latitude': 35.13684779341118, 'longitude': -...",[],,"{'address1': '601 James St', 'address2': '', '...",18057736020,(805) 773-6020,1886.753919
25,21gVk08-Twn1CeW5SFhd3Q,vons-grover-beach,Vons,https://s3-media3.fl.yelpcdn.com/bphoto/pJdUMc...,False,https://www.yelp.com/biz/vons-grover-beach?adj...,135,"[{'alias': 'grocery', 'title': 'Grocery'}]",3.0,"{'latitude': 35.1199029902995, 'longitude': -1...",[],$$,"{'address1': '1758 W Grand Ave', 'address2': '...",18054810877,(805) 481-0877,1018.411376
26,WbrSizq1WSByciaUtcV53g,smart-and-final-extra-arroyo-grande-5,Smart & Final Extra!,https://s3-media1.fl.yelpcdn.com/bphoto/rDDIUv...,False,https://www.yelp.com/biz/smart-and-final-extra...,59,"[{'alias': 'grocery', 'title': 'Grocery'}]",2.5,"{'latitude': 35.122127532959, 'longitude': -12...",[],$$,"{'address1': '1464 E Grand Ave', 'address2': '...",18055741599,(805) 574-1599,1458.523639


In [20]:
# check for duplicate results
final_df.duplicated(subset='id').sum()

0

In [21]:
final_df.to_csv('Data/final_results_GB_Sushi.csv.gz', compression='gzip',index=False)