In [9]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook



In [10]:
# Load API Credentials
with open('/Users/purvikansara/.secret/yelp_api.json') as f:   #use your path here!
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['API Key'], timeout_s=5.0)



In [11]:
# set our API call parameters and filename before the first call
LOCATION  = 'Seattle'
TERM = 'pizza'
## INFORM USER AND SAVE EMPTY LIST
print(f"[i] JSON_FILE not found. Saving empty list to file.")


[i] JSON_FILE not found. Saving empty list to file.


In [15]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = f"Data/results_in_progress_seattle_pizza.json"
JSON_FILE



'Data/results_in_progress_seattle_pizza.json'

In [16]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
    
    
    ## save an empty list to start the file
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")



[i] Data/results_in_progress_seattle_pizza.json not found. Saving empty list to new file.


In [17]:
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')



- 0 previous results found.


In [18]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()



dict_keys(['businesses', 'total', 'region'])

In [19]:
## How many results total?
total_results = results['total']
total_results



2100

In [20]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page



20

In [21]:
# Import additional packages for controlling our loop
import time, math
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages



105

In [22]:
# join new results with old list with extend and save to file
previous_results.extend(results['businesses'])  
with open(JSON_FILE,'w') as f:
     json.dump(previous_results,f)


In [None]:
pip install tqdm

In [None]:
# from tqdm.notebook import tqdm_notebook
# import time
# for i in tqdm_notebook(range(n_pages)):
#     # adds 200 ms pause
#     time.sleep(.2) 


In [23]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=5)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
#     ass="function call from-rainbow">display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    # add a 200ms pause
    time.sleep(.2)



  0%|          | 0/105 [00:00<?, ?it/s]

In [24]:
## delete file and confirm it no longer exits.
os.remove(JSON_FILE)
os.path.isfile(JSON_FILE)



False

In [25]:
def create_json_file(JSON_FILE,  delete_if_exists=False):
    
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    ## If it DOES exist:
    if file_exists == True:
        
        ## Check if user wants to delete if exists
        if delete_if_exists==True:
            
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
        else:
            print(f"[i] {JSON_FILE} already exists.")            
            
            
    ## If it does NOT exist:
    else:
        
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)
        
        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON_FILE,'w') as f:
            json.dump([],f)  



In [26]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages



[i] Data/results_in_progress_seattle_pizza.json not found. Saving empty list to new file.
- 0 previous results found.


105

In [27]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)



  0%|          | 0/105 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [28]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,1rkgyfwzXG7qSdVZdfRt7g,roccos-seattle,Rocco's,https://s3-media3.fl.yelpcdn.com/bphoto/AWBXTE...,False,https://www.yelp.com/biz/roccos-seattle?adjust...,1717,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",4.0,"{'latitude': 47.614470434736106, 'longitude': ...","[pickup, delivery]",$$,"{'address1': '2312 2nd Ave', 'address2': '', '...",12063974210,(206) 397-4210,1451.852896
1,-FOAQv22SXtSBs7nptI3UA,serious-pie-downtown-seattle-2,Serious Pie Downtown,https://s3-media2.fl.yelpcdn.com/bphoto/dy0pJ5...,False,https://www.yelp.com/biz/serious-pie-downtown-...,4432,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,"{'latitude': 47.61304626911818, 'longitude': -...","[pickup, delivery]",$$,"{'address1': '2001 4th Ave', 'address2': None,...",12068387388,(206) 838-7388,1422.856174
2,IrohtoYjnAR_vc6w6CRCxA,moto-seattle,Moto,https://s3-media2.fl.yelpcdn.com/bphoto/FBmvjy...,False,https://www.yelp.com/biz/moto-seattle?adjust_c...,122,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",5.0,"{'latitude': 47.56207, 'longitude': -122.38509}",[delivery],$$,"{'address1': '4526 42nd Ave SW', 'address2': '...",12064208880,(206) 420-8880,7965.106751
3,M9xzvwgK58T0w7wvXedvuQ,hot-mamas-pizza-seattle,Hot Mama's Pizza,https://s3-media4.fl.yelpcdn.com/bphoto/LEL1qj...,False,https://www.yelp.com/biz/hot-mamas-pizza-seatt...,942,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 47.615379179632, 'longitude': -12...",[delivery],$,"{'address1': '700 E Pine St', 'address2': '', ...",12063226444,(206) 322-6444,1446.839232
4,EqrYxhlEyDsfwBqiGXKk9Q,dantini-pizza-seattle,Dantini Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/E0xwT0...,False,https://www.yelp.com/biz/dantini-pizza-seattle...,46,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 47.6307274, 'longitude': -122.374...",[],,"{'address1': '1417 Elliott Ave W', 'address2':...",12064209912,(206) 420-9912,2953.060608


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,LnlxRnBw1UXk1X_3zZxC6g,qfc-seattle-24,QFC,https://s3-media1.fl.yelpcdn.com/bphoto/Pt8qPb...,False,https://www.yelp.com/biz/qfc-seattle-24?adjust...,73,"[{'alias': 'grocery', 'title': 'Grocery'}]",2.5,"{'latitude': 47.57876, 'longitude': -122.2998}",[],$$,"{'address1': '2707 Rainier Ave S', 'address2':...",12067252418,(206) 725-2418,5839.909306
996,NR0jIGzbYyHtEiU7pw2-iQ,philly-ya-belly-everett,Philly Ya Belly,https://s3-media3.fl.yelpcdn.com/bphoto/qWokLo...,False,https://www.yelp.com/biz/philly-ya-belly-evere...,432,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",4.5,"{'latitude': 47.88512, 'longitude': -122.26433}","[delivery, pickup]",$$,"{'address1': '12432 Hwy 99', 'address2': 'Ste ...",14257100130,(425) 710-0130,29350.289996
997,i4IRTw6FBafHvyeZN_kdeg,massimo-italian-bar-and-grill-gig-harbor-2,Massimo Italian Bar & Grill,https://s3-media4.fl.yelpcdn.com/bphoto/d6yzCd...,False,https://www.yelp.com/biz/massimo-italian-bar-a...,297,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",3.5,"{'latitude': 47.3836, 'longitude': -122.62695}",[],$$,"{'address1': '13802 Purdy Dr NW', 'address2': ...",12535146237,(253) 514-6237,34640.916724
998,3MhzuhU5H64YJ0f2SO96MA,dominos-pizza-kirkland-4,Domino's Pizza,https://s3-media3.fl.yelpcdn.com/bphoto/XM01eG...,False,https://www.yelp.com/biz/dominos-pizza-kirklan...,50,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",2.5,"{'latitude': 47.7211837768555, 'longitude': -1...",[],$,"{'address1': '13501 100th Ave NE', 'address2':...",14258204660,(425) 820-4660,14278.003084
999,6ZKNFPLWRIVWshUkMNlgng,starbucks-reserve-roastery-seattle-seattle-2,Starbucks Reserve Roastery Seattle,https://s3-media3.fl.yelpcdn.com/bphoto/7HpCFc...,False,https://www.yelp.com/biz/starbucks-reserve-roa...,3650,"[{'alias': 'coffee', 'title': 'Coffee & Tea'},...",4.5,"{'latitude': 47.614067, 'longitude': -122.32808}",[delivery],$$,"{'address1': '1124 Pike St', 'address2': '', '...",12066240173,(206) 624-0173,1379.678503


In [30]:
# load previous final results
d1_df = pd.read_json('Data/results_in_progress_seattle_pizza.json')
display(d1_df.head(), d1_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,1rkgyfwzXG7qSdVZdfRt7g,roccos-seattle,Rocco's,https://s3-media3.fl.yelpcdn.com/bphoto/AWBXTE...,False,https://www.yelp.com/biz/roccos-seattle?adjust...,1717,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",4.0,"{'latitude': 47.614470434736106, 'longitude': ...","[pickup, delivery]",$$,"{'address1': '2312 2nd Ave', 'address2': '', '...",12063974210,(206) 397-4210,1451.852896
1,-FOAQv22SXtSBs7nptI3UA,serious-pie-downtown-seattle-2,Serious Pie Downtown,https://s3-media2.fl.yelpcdn.com/bphoto/dy0pJ5...,False,https://www.yelp.com/biz/serious-pie-downtown-...,4432,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,"{'latitude': 47.61304626911818, 'longitude': -...","[pickup, delivery]",$$,"{'address1': '2001 4th Ave', 'address2': None,...",12068387388,(206) 838-7388,1422.856174
2,IrohtoYjnAR_vc6w6CRCxA,moto-seattle,Moto,https://s3-media2.fl.yelpcdn.com/bphoto/FBmvjy...,False,https://www.yelp.com/biz/moto-seattle?adjust_c...,122,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",5.0,"{'latitude': 47.56207, 'longitude': -122.38509}",[delivery],$$,"{'address1': '4526 42nd Ave SW', 'address2': '...",12064208880,(206) 420-8880,7965.106751
3,M9xzvwgK58T0w7wvXedvuQ,hot-mamas-pizza-seattle,Hot Mama's Pizza,https://s3-media4.fl.yelpcdn.com/bphoto/LEL1qj...,False,https://www.yelp.com/biz/hot-mamas-pizza-seatt...,942,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 47.615379179632, 'longitude': -12...",[delivery],$,"{'address1': '700 E Pine St', 'address2': '', ...",12063226444,(206) 322-6444,1446.839232
4,EqrYxhlEyDsfwBqiGXKk9Q,dantini-pizza-seattle,Dantini Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/E0xwT0...,False,https://www.yelp.com/biz/dantini-pizza-seattle...,46,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 47.6307274, 'longitude': -122.374...",[],,"{'address1': '1417 Elliott Ave W', 'address2':...",12064209912,(206) 420-9912,2953.060608


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,LnlxRnBw1UXk1X_3zZxC6g,qfc-seattle-24,QFC,https://s3-media1.fl.yelpcdn.com/bphoto/Pt8qPb...,False,https://www.yelp.com/biz/qfc-seattle-24?adjust...,73,"[{'alias': 'grocery', 'title': 'Grocery'}]",2.5,"{'latitude': 47.57876, 'longitude': -122.2998}",[],$$,"{'address1': '2707 Rainier Ave S', 'address2':...",12067252418,(206) 725-2418,5839.909306
996,NR0jIGzbYyHtEiU7pw2-iQ,philly-ya-belly-everett,Philly Ya Belly,https://s3-media3.fl.yelpcdn.com/bphoto/qWokLo...,False,https://www.yelp.com/biz/philly-ya-belly-evere...,432,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",4.5,"{'latitude': 47.88512, 'longitude': -122.26433}","[delivery, pickup]",$$,"{'address1': '12432 Hwy 99', 'address2': 'Ste ...",14257100130,(425) 710-0130,29350.289996
997,i4IRTw6FBafHvyeZN_kdeg,massimo-italian-bar-and-grill-gig-harbor-2,Massimo Italian Bar & Grill,https://s3-media4.fl.yelpcdn.com/bphoto/d6yzCd...,False,https://www.yelp.com/biz/massimo-italian-bar-a...,297,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",3.5,"{'latitude': 47.3836, 'longitude': -122.62695}",[],$$,"{'address1': '13802 Purdy Dr NW', 'address2': ...",12535146237,(253) 514-6237,34640.916724
998,3MhzuhU5H64YJ0f2SO96MA,dominos-pizza-kirkland-4,Domino's Pizza,https://s3-media3.fl.yelpcdn.com/bphoto/XM01eG...,False,https://www.yelp.com/biz/dominos-pizza-kirklan...,50,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",2.5,"{'latitude': 47.7211837768555, 'longitude': -1...",[],$,"{'address1': '13501 100th Ave NE', 'address2':...",14258204660,(425) 820-4660,14278.003084
999,6ZKNFPLWRIVWshUkMNlgng,starbucks-reserve-roastery-seattle-seattle-2,Starbucks Reserve Roastery Seattle,https://s3-media3.fl.yelpcdn.com/bphoto/7HpCFc...,False,https://www.yelp.com/biz/starbucks-reserve-roa...,3650,"[{'alias': 'coffee', 'title': 'Coffee & Tea'},...",4.5,"{'latitude': 47.614067, 'longitude': -122.32808}",[delivery],$$,"{'address1': '1124 Pike St', 'address2': '', '...",12066240173,(206) 624-0173,1379.678503


In [32]:
# # check for duplicate results
# final_df.duplicated().sum()



In [33]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()



1

In [34]:
## Drop duplicate ids and confirm there are no more duplicates
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()


0

In [35]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_seattle_pizza.csv.gz', compression='gzip',index=False)

