## Imports

In [1]:
# Imports
import numpy as np
import pandas as pd

# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

## Load API Credentials

In [2]:
# Load API Credentials
with open('/Users/joshl/.secret/yelp_api.json') as f:
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

## Create JSON File

Next I want to set the parameters for the search terms.

In [3]:
# set the API call parameters 
LOCATION = 'Boston, MA'
TERM = 'Clam Chowder'

In [4]:
# Specifying JSON_FILE filename
boston_chowder = "Data/results_in_progress_chowder.json"
boston_chowder

'Data/results_in_progress_chowder.json'

Next I will define a function in order to make API calls from Yelp.

In [5]:
def create_json_file(boston_chowder, delete_if_exists=False):
    file_exists = os.path.isfile(boston_chowder)
    
    if file_exists == True:
        if delete_if_exists == True:
            print(f"[i] {boston_chowder} already exists. Deleting previous file...")
            os.remove(boston_chowder)
            create_json_file(boston_chowder, delete_if_exists=False)
        else:
            print(f"[i] {boston_chowder} already exists.")
            
    else:
        print(f"[i] {boston_chowder} not found. Saving empty list to new file.")
        folder = os.path.dirname(boston_chowder)
        if len(folder)>0:
            os.makedirs(folder, exist_ok=True)
        with open(boston_chowder,'w') as f:
            json.dump([],f)

Now I can create the new JSON file using the function from above.

In [6]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(boston_chowder, delete_if_exists=True)

with open(boston_chowder,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

[i] Data/results_in_progress_chowder.json not found. Saving empty list to new file.
- 0 previous results found.


70

In order to get all pages for the API call, I want to iterate through all 70 pages starting at the proper offset. I will stop the loop at 1000 calls in order to prevent getting an error from the YelpAPI.

In [7]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    with open(boston_chowder, 'r') as f:
        previous_results = json.load(f)
    
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(boston_chowder,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/70 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


## Converting the Call to DataFrame

In [9]:
# load final results
boston_chowder_final = pd.read_json(boston_chowder)
display(boston_chowder_final.head(), boston_chowder_final.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,XNvkDjM6rMnSfzubT32brQ,boston-chowda-boston-3,Boston Chowda,https://s3-media3.fl.yelpcdn.com/bphoto/DZ5nAH...,False,https://www.yelp.com/biz/boston-chowda-boston-...,1184,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.0,"{'latitude': 42.360065, 'longitude': -71.055374}","[pickup, delivery]",$$,"{'address1': '1 Faneuil Hall Market Pl', 'addr...",16177424441,(617) 742-4441,5270.566848
1,xkLwAfgIYXYHzJF8C_fhCA,tonys-clam-shop-quincy,Tony's Clam Shop,https://s3-media2.fl.yelpcdn.com/bphoto/fuctCw...,False,https://www.yelp.com/biz/tonys-clam-shop-quinc...,685,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.0,"{'latitude': 42.27524349415289, 'longitude': -...",[],$$,"{'address1': '861 Quincy Shore Dr', 'address2'...",16177735090,(617) 773-5090,5899.108532
2,y2w6rFaO0XEiG5mFfOsiFA,neptune-oyster-boston,Neptune Oyster,https://s3-media4.fl.yelpcdn.com/bphoto/qzU7C1...,False,https://www.yelp.com/biz/neptune-oyster-boston...,5939,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.5,"{'latitude': 42.36321, 'longitude': -71.05594}",[delivery],$$$,"{'address1': '63 Salem St', 'address2': '', 'a...",16177423474,(617) 742-3474,5622.373454
3,i6uHrxuS1D_6V8WhaWLy5A,lukes-lobster-back-bay-boston,Luke's Lobster Back Bay,https://s3-media1.fl.yelpcdn.com/bphoto/EnMeIE...,False,https://www.yelp.com/biz/lukes-lobster-back-ba...,1994,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.0,"{'latitude': 42.34869123638961, 'longitude': -...","[pickup, delivery]",$$,"{'address1': '75 Exeter Street', 'address2': '...",18573504626,(857) 350-4626,4407.684543
4,HEaQQAH6qyFXamKkpil91A,paulis-boston,Pauli's,https://s3-media2.fl.yelpcdn.com/bphoto/9hiaqb...,False,https://www.yelp.com/biz/paulis-boston?adjust_...,1412,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",4.5,"{'latitude': 42.363244212396, 'longitude': -71...","[pickup, delivery]",$$,"{'address1': '65 Salem St', 'address2': None, ...",18572847064,(857) 284-7064,5623.058982


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,aDwA3DyR8gjY2nwGSqz8pQ,allies-beach-street-cafe-manchester-by-the-sea-2,Allie's Beach Street Cafe,https://s3-media1.fl.yelpcdn.com/bphoto/dhpuy-...,False,https://www.yelp.com/biz/allies-beach-street-c...,130,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",3.5,"{'latitude': 42.57381, 'longitude': -70.770451}",[],$$,"{'address1': '35 Beach St', 'address2': '', 'a...",19787049571,(978) 704-9571,37353.699141
996,8qKsRvKOwqNMbMfdIQfueQ,boathouse-400-boston,Boathouse 400,https://s3-media2.fl.yelpcdn.com/bphoto/Cd7TD2...,False,https://www.yelp.com/biz/boathouse-400-boston?...,9,"[{'alias': 'seafood', 'title': 'Seafood'}]",2.5,"{'latitude': 42.36007, 'longitude': -71.1182}",[delivery],,"{'address1': '400 Soldiers Field Rd', 'address...",16177830090,(617) 783-0090,7297.873861
997,5Ekp-L6hnnZz3-qPkNHy3g,napper-tandys-roslindale,Napper Tandy's,https://s3-media3.fl.yelpcdn.com/bphoto/rhOtwq...,False,https://www.yelp.com/biz/napper-tandys-roslind...,84,"[{'alias': 'irish', 'title': 'Irish'}, {'alias...",3.5,"{'latitude': 42.2873506034849, 'longitude': -7...","[pickup, delivery]",$$,"{'address1': '4195 Washington St', 'address2':...",16173238400,(617) 323-8400,6455.446934
998,T1wflZohQRZVb9SKQapfFA,del-friscos-double-eagle-steakhouse-boston-4,Del Frisco's Double Eagle Steakhouse,https://s3-media4.fl.yelpcdn.com/bphoto/hm3IlP...,False,https://www.yelp.com/biz/del-friscos-double-ea...,1215,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",3.5,"{'latitude': 42.3489496, 'longitude': -71.038692}","[pickup, delivery]",$$$$,"{'address1': '250 Northern Ave', 'address2': '...",16179511368,(617) 951-1368,4299.83674
999,uLJDW4zL079EG73h0aMAnA,parkers-pub-wrentham,Parkers Pub,https://s3-media3.fl.yelpcdn.com/bphoto/TwtC-f...,False,https://www.yelp.com/biz/parkers-pub-wrentham?...,64,"[{'alias': 'pubs', 'title': 'Pubs'}]",4.0,"{'latitude': 42.08339, 'longitude': -71.32718}","[pickup, delivery]",$$,"{'address1': '303 Shears St', 'address2': '', ...",17742108962,(774) 210-8962,33857.005998


Now I will check for any duplicates in the DataFrame.

In [10]:
# check for duplicate ID's 
boston_chowder_final.duplicated(subset='id').sum()

0

Since there are no duplicated rows, I will save the dataframe to a .csv.

In [11]:
# save the final results to a compressed csv
boston_chowder_final.to_csv('Data/final_results_chowder.csv.gz', compression='gzip',index=False)