In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI

# Credentials and Accessing the API

In [2]:
# Load API Credentials
import json
with open('/Users/Nick/.secret/yelp_api.json') as f:
    login = json.load(f)
login.keys()
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

# Define Search

In [3]:
# set our API call parameters and filename before the first call
LOCATION = 'New York City,NY,10011'
TERM = 'Macarons'

# Create a results-in-progress JSON file, but only if it doesn't exist.

In [4]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = f"Data/results_in_progress_macarons_nyc.json"
JSON_FILE

'Data/results_in_progress_macarons_nyc.json'

In [5]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_in_progress_macarons_nyc.json not found. Saving empty list to file.


# Determine how many results are already in the file

In [6]:
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


# Figure out how many pages of results we will need

In [7]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [8]:
## How many results total?
total_results = results['total']
total_results

364

In [9]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [10]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

19

# Add this page of results to .json file

In [11]:
# join new results with old list with extend and save to file
previous_results.extend(results['businesses'])  
with open(JSON_FILE,'w') as f:
     json.dump(previous_results,f)

# Setting a progress bar in our for loop.

In [12]:
from tqdm.notebook import tqdm_notebook
for i in tqdm_notebook(range(n_pages)):
    # adds 200 ms pause
    time.sleep(.2)

  0%|          | 0/19 [00:00<?, ?it/s]

# For Loop to call each page

In [13]:
for i in tqdm_notebook( range(1,n_pages+1)):
    time.sleep(.2)
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
#     display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)

  0%|          | 0/19 [00:00<?, ?it/s]

# Convert .json to dataframe

In [14]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,_DcfYjnGffQI3iP1Be0kdA,ladurée-madison-new-york-2,Ladurée Madison,https://s3-media1.fl.yelpcdn.com/bphoto/8sYGln...,False,https://www.yelp.com/biz/ladur%C3%A9e-madison-...,1956,"[{'alias': 'macarons', 'title': 'Macarons'}, {...",4.5,"{'latitude': 40.7707702252517, 'longitude': -7...","[pickup, delivery]",$$$,"{'address1': '864 Madison Ave', 'address2': ''...",16465583157,(646) 558-3157,4237.763954
1,TmEPtynTP1qoTqwUbGKyGw,ladurée-soho-new-york-2,Ladurée SoHo,https://s3-media2.fl.yelpcdn.com/bphoto/kOU2qg...,False,https://www.yelp.com/biz/ladur%C3%A9e-soho-new...,1712,"[{'alias': 'tea', 'title': 'Tea Rooms'}, {'ali...",3.5,"{'latitude': 40.724329, 'longitude': -74.002508}","[pickup, delivery]",$$$,"{'address1': '398 W Broadway', 'address2': '',...",16463927868,(646) 392-7868,2145.385641
2,yTyqDCF5fB2nLVcSXEHMQg,made-by-pauline-new-york-2,Made by Pauline,https://s3-media4.fl.yelpcdn.com/bphoto/wNj_Kg...,False,https://www.yelp.com/biz/made-by-pauline-new-y...,106,"[{'alias': 'macarons', 'title': 'Macarons'}, {...",4.5,"{'latitude': 40.750802, 'longitude': -73.989484}",[delivery],$$,"{'address1': '151 W 34th St', 'address2': '', ...",12124941003,(212) 494-1003,1301.424555
3,x2OI4M2QJOo3Ymf8YHBhdw,la-maison-du-macaron-new-york,La Maison du Macaron,https://s3-media3.fl.yelpcdn.com/bphoto/K5v0wv...,False,https://www.yelp.com/biz/la-maison-du-macaron-...,894,"[{'alias': 'macarons', 'title': 'Macarons'}, {...",4.0,"{'latitude': 40.74318, 'longitude': -73.99419}",[delivery],$$,"{'address1': '132 W 23rd St', 'address2': '', ...",12122432757,(212) 243-2757,633.819024
4,meVzfRH_wLAJjcrCAhkh1g,patisserie-chanson-new-york-3,Patisserie Chanson,https://s3-media3.fl.yelpcdn.com/bphoto/H8O7jD...,False,https://www.yelp.com/biz/patisserie-chanson-ne...,474,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",4.0,"{'latitude': 40.7417, 'longitude': -73.990571}","[delivery, pickup]",$$,"{'address1': '20 W 23rd St', 'address2': '', '...",19294238880,(929) 423-8880,960.237956


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
359,yy3PgFnyKYRwpUujvMgBZA,space-market-new-york-2,Space Market,https://s3-media1.fl.yelpcdn.com/bphoto/S6kN3L...,False,https://www.yelp.com/biz/space-market-new-york...,28,"[{'alias': 'convenience', 'title': 'Convenienc...",2.5,"{'latitude': 40.76147, 'longitude': -73.96025}","[pickup, delivery]",$$,"{'address1': '1130 1st Ave', 'address2': '', '...",12124863040,(212) 486-3040,4014.055224
360,N3dyf-UuX-f1SHJhhpsMlQ,7-eleven-hoboken,7-Eleven,https://s3-media1.fl.yelpcdn.com/bphoto/DLeheZ...,False,https://www.yelp.com/biz/7-eleven-hoboken?adju...,10,"[{'alias': 'convenience', 'title': 'Convenienc...",3.0,"{'latitude': 40.74214620734, 'longitude': -74....","[delivery, pickup]",$,"{'address1': '422 Washington St', 'address2': ...",12012226323,(201) 222-6323,2368.773272
361,mosd9Voq5Ws6k6Uduii_ww,bar-boulud-new-york-2,Bar Boulud,https://s3-media2.fl.yelpcdn.com/bphoto/L8ZAzt...,False,https://www.yelp.com/biz/bar-boulud-new-york-2...,921,"[{'alias': 'french', 'title': 'French'}, {'ali...",3.5,"{'latitude': 40.7718443592464, 'longitude': -7...","[delivery, pickup]",$$$,"{'address1': '1900 Broadway', 'address2': '', ...",12125950303,(212) 595-0303,3554.464638
362,_TbQFqDl7B-uXNIFYVkEIQ,7-eleven-union-city-5,7-Eleven,https://s3-media3.fl.yelpcdn.com/bphoto/3cl2NG...,False,https://www.yelp.com/biz/7-eleven-union-city-5...,4,"[{'alias': 'convenience', 'title': 'Convenienc...",2.5,"{'latitude': 40.77577, 'longitude': -74.0261}","[delivery, pickup]",$,"{'address1': '3900 Bergenline', 'address2': ''...",12012230711,(201) 223-0711,4132.835776
363,mwvukHjc7Trs5fRbZ4-TsA,fabbrica-restaurant-and-bar-brooklyn,Fabbrica Restaurant & Bar,https://s3-media2.fl.yelpcdn.com/bphoto/YKfdym...,False,https://www.yelp.com/biz/fabbrica-restaurant-a...,312,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",3.5,"{'latitude': 40.7196429, 'longitude': -73.9626...","[restaurant_reservation, delivery, pickup]",$$,"{'address1': '44 N 6th St', 'address2': None, ...",13474220923,(347) 422-0923,4231.837651


# Check for duplicates

In [15]:
# check for duplicate IDs
final_df.duplicated(subset='id').sum()

0

# Save the final DataFrame to a .csv (or a .csv.gz if its too big for the GitHub file size limit).

In [16]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_macarons_nyc.csv.gz', compression='gzip',index=False)