Mike Fiddler Core 8/21/22

In [4]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [6]:
# Load API Credentials
with open('/Users/purvikansara/.secret/yelp_api.json') as f:   #use your path here!
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['API Key'], timeout_s=5.0)

In [7]:
# set our API call parameters and filename before the first call
LOCATION = 'Boston, MA,02119'
TERM = 'Steak and Cheese'

In [8]:
JSON_FILE_boston = f"Data/results_in_progress_steak_and_cheese.json"
JSON_FILE_boston

'Data/results_in_progress_steak_and_cheese.json'

In [9]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE_boston)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE_boston)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE_boston} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open(JSON_FILE_boston,'w') as f:
        json.dump([],f)  
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE_boston} already exists.")

[i] Data/results_in_progress_steak_and_cheese.json not found. Saving empty list to file.


In [10]:
## Load previous results and use len of results for offset
with open(JSON_FILE_boston,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


In [11]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [12]:
## How many results total?
total_results = results['total']
total_results

354

In [13]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [14]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

18

In [15]:
# join new results with old list with extend and save to file
previous_results.extend(results['businesses'])  
with open(JSON_FILE_boston,'w') as f:
     json.dump(previous_results,f)

In [16]:
for i in tqdm_notebook( range(1,n_pages+1)):
    time.sleep(.2)
    ## Read in results in progress file and check the length
    with open(JSON_FILE_boston, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
#     display(previous_results)
    with open(JSON_FILE_boston,'w') as f:
        json.dump(previous_results,f)

  0%|          | 0/18 [00:00<?, ?it/s]

In [17]:
# load final results
final_df = pd.read_json(JSON_FILE_boston)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,V9JwfAKDxqzZaH8srJsu3g,joes-famous-steak-and-cheese-boston,Joe's Famous Steak & Cheese,https://s3-media2.fl.yelpcdn.com/bphoto/39o7Q2...,False,https://www.yelp.com/biz/joes-famous-steak-and...,43,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,"{'latitude': 42.3288345, 'longitude': -71.0838...","[pickup, delivery]",$,"{'address1': '140 Dudley St', 'address2': '', ...",16174428824,(617) 442-8824,658.456126
1,oUuZ7qwvVcVdXrxQ0pcEJg,ugis-subs-boston,Ugi's Subs,https://s3-media2.fl.yelpcdn.com/bphoto/GrrXri...,False,https://www.yelp.com/biz/ugis-subs-boston?adju...,14,"[{'alias': 'sandwiches', 'title': 'Sandwiches'}]",3.0,"{'latitude': 42.3285599122569, 'longitude': -7...","[pickup, delivery]",$,"{'address1': '68 Warren St', 'address2': '', '...",16174277032,(617) 427-7032,614.397714
2,ubb9LaZSVa1A2FQ9YIfiaA,bennetts-sandwich-shop-boston-2,Bennett's Sandwich Shop,https://s3-media3.fl.yelpcdn.com/bphoto/4_BZjk...,False,https://www.yelp.com/biz/bennetts-sandwich-sho...,95,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",4.5,"{'latitude': 42.34319200093286, 'longitude': -...","[pickup, delivery]",$$,"{'address1': '1348 Boylston St', 'address2': '...",18572399736,(857) 239-9736,2529.979404
3,KbLm8n_ikUMkdx3Lz17u7A,rondos-sub-shop-boston,Rondo's Sub Shop,https://s3-media3.fl.yelpcdn.com/bphoto/0hpBt2...,False,https://www.yelp.com/biz/rondos-sub-shop-bosto...,122,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",4.5,"{'latitude': 42.3411312, 'longitude': -71.0541...",[delivery],$,"{'address1': '134 W Broadway', 'address2': '',...",16172690274,(617) 269-0274,3219.73082
4,U-wNf5xZBHoHIH_Nlnif7g,nicoles-pizza-boston-2,Nicole's Pizza,https://s3-media1.fl.yelpcdn.com/bphoto/rigmGT...,False,https://www.yelp.com/biz/nicoles-pizza-boston-...,125,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",3.5,"{'latitude': 42.34237, 'longitude': -71.07518}","[pickup, delivery]",$,"{'address1': '639 Tremont St', 'address2': '',...",16172660223,(617) 266-0223,2286.163666


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
349,nPDICQyiWkmkHRIs7rZp_A,subway-boston-50,Subway,https://s3-media4.fl.yelpcdn.com/bphoto/9h34OO...,False,https://www.yelp.com/biz/subway-boston-50?adju...,9,"[{'alias': 'sandwiches', 'title': 'Sandwiches'}]",2.5,"{'latitude': 42.3594975, 'longitude': -71.0587...","[pickup, delivery]",$,"{'address1': '100 City Hall Plz', 'address2': ...",18572335787.0,(857) 233-5787,4579.158931
350,MHx0eqYHPu72dxjoU6cMMQ,subway-boston-49,Subway,https://s3-media3.fl.yelpcdn.com/bphoto/l6dInA...,False,https://www.yelp.com/biz/subway-boston-49?adju...,18,"[{'alias': 'sandwiches', 'title': 'Sandwiches'}]",2.5,"{'latitude': 42.3569291795438, 'longitude': -7...","[pickup, delivery]",$,"{'address1': '274 Franklin St', 'address2': ''...",16173307883.0,(617) 330-7883,4555.508828
351,s9jbBsXvoCz9fB1LJ70vXA,viga-italian-eatery-and-caterer-boston-4,Viga Italian Eatery & Caterer,https://s3-media3.fl.yelpcdn.com/bphoto/xsj8K1...,False,https://www.yelp.com/biz/viga-italian-eatery-a...,85,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",3.5,"{'latitude': 42.35784, 'longitude': -71.05834}","[delivery, pickup]",$,"{'address1': '275 Washington St', 'address2': ...",16177428442.0,(617) 742-8442,4423.377905
352,19xuTm-mcnG4k7rF3ZTmeg,clover-food-truck-boston-16,Clover Food Truck,https://s3-media1.fl.yelpcdn.com/bphoto/HjACIU...,False,https://www.yelp.com/biz/clover-food-truck-bos...,487,"[{'alias': 'newamerican', 'title': 'American (...",4.0,"{'latitude': 42.3519287109375, 'longitude': -7...",[],$,"{'address1': '', 'address2': '', 'address3': '...",,,3748.286977
353,g9tC1E_jX-8eTdGMMuLpow,subway-roslindale-2,Subway,https://s3-media1.fl.yelpcdn.com/bphoto/BPREUe...,False,https://www.yelp.com/biz/subway-roslindale-2?a...,6,"[{'alias': 'sandwiches', 'title': 'Sandwiches'}]",3.5,"{'latitude': 42.2857982191418, 'longitude': -7...","[delivery, pickup]",$,"{'address1': '4238-4244 Washington St', 'addre...",16173235002.0,(617) 323-5002,5489.669544


In [18]:
# check for duplicate IDs
final_df.duplicated(subset='id').sum()

0

In [19]:
final_df.drop_duplicates(subset = 'id', inplace= True)

In [20]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_steak_and_cheese.csv.gz', compression='gzip',index=False)