In [1]:
# Import libraries
import numpy as np
import pandas as pd

import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [4]:
#credentials

with open('/Users/paula/.secret/yelp_api.json') as file:
    login = json.load(file)
yelp_login = YelpAPI(login['api-key'], timeout_s=5.0)

In [13]:
#spedify file path and variables for query:
LOCATION = 'Ventura, CA'
TERM = 'coffee'
query_file = f'data/results_for_ventura_coffee'
query_file

'data/results_for_ventura_coffee'

In [14]:
#function for creating files

def json_create_file(JSON_FILE, delete_if_exists= False):
    
    file_exists = os.path.isfile(JSON_FILE)
    
    if file_exists == True:
        if delete_if_exists==True:
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            os.remove(JSON_FILE)
        else:
            print(f"[i] {JSON_FILE} already exists.")  
    else:
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        folder = os.path.dirname(JSON_FILE) # checks for folder
        if len(folder)>0: #creates folder if needed
            os.makedirs(folder,exist_ok=True)
        with open(JSON_FILE,'w') as f: #creates file
            json.dump([],f)  
        
        

In [18]:
# Using it

json_create_file(query_file, delete_if_exists=False)

[i] data/results_for_ventura_coffee not found. Saving empty list to new file.


In [20]:
with open(query_file,'r') as f:
    previous_results = json.load(f)

In [21]:
n_results = len(previous_results)

print(f'- {n_results} previous results found.')

- 0 previous results found.


In [22]:
results = yelp_login.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)

In [23]:
#check total and calculate pages

## total results:
total_results = results['total']

## How many per request?
results_per_page = len(results['businesses'])

# total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

21

In [24]:
#Request Loop with progress bar

for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(query_file, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_login.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(query_file,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/21 [00:00<?, ?it/s]

In [25]:
#Final DF

final_df = pd.read_json(query_file)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,v0rljG019LUEBWscFd3QSA,prospect-coffee-roasters-ventura,Prospect Coffee Roasters,https://s3-media2.fl.yelpcdn.com/bphoto/-8dNBe...,False,https://www.yelp.com/biz/prospect-coffee-roast...,284,"[{'alias': 'coffeeroasteries', 'title': 'Coffe...",4.5,"{'latitude': 34.2797234, 'longitude': -119.285...",[],$$,"{'address1': '92 S Laurel St', 'address2': Non...",,,4929.700179
1,tVXzQcLwvy49evwF8q2Szg,humblemaker-coffee-co-ventura-7,Humblemaker Coffee Co.,https://s3-media2.fl.yelpcdn.com/bphoto/7caFL8...,False,https://www.yelp.com/biz/humblemaker-coffee-co...,47,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",4.5,"{'latitude': 34.28045, 'longitude': -119.29578}","[pickup, delivery]",$$,"{'address1': '324 E Main St', 'address2': 'Ste...",18056283219.0,(805) 628-3219,5877.736872
2,hHlVADfvYyzN8XGDVaSjNw,beacon-coffee-company-ventura,Beacon Coffee Company,https://s3-media1.fl.yelpcdn.com/bphoto/qTya7A...,False,https://www.yelp.com/biz/beacon-coffee-company...,260,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",4.5,"{'latitude': 34.2443544757629, 'longitude': -1...",[],$,"{'address1': '5777 Olivas Park Dr', 'address2'...",,,3246.92608
3,kJ4LP49r_LnoCBzOJdLp4A,tatianas-coffee-and-tea-ventura,Tatiana's Coffee and Tea,https://s3-media4.fl.yelpcdn.com/bphoto/UUua83...,False,https://www.yelp.com/biz/tatianas-coffee-and-t...,472,"[{'alias': 'coffee', 'title': 'Coffee & Tea'},...",5.0,"{'latitude': 34.2753877, 'longitude': -119.263...",[delivery],$$,"{'address1': '2470 E Main St', 'address2': Non...",18059017620.0,(805) 901-7620,2867.309808
4,EdhZ2l_wuhxvCFoMnMrwSA,singing-sun-coffee-ventura,Singing Sun Coffee,https://s3-media1.fl.yelpcdn.com/bphoto/PLVS9f...,False,https://www.yelp.com/biz/singing-sun-coffee-ve...,92,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",4.5,"{'latitude': 34.27796, 'longitude': -119.27135}",[delivery],$$,"{'address1': '1930 E Main St', 'address2': Non...",,,3673.101744


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
411,91DKVY0ljt0b9gGoZbjFHQ,ralphs-grocery-camarillo-2,Ralphs Grocery,https://s3-media3.fl.yelpcdn.com/bphoto/ZVDWrN...,False,https://www.yelp.com/biz/ralphs-grocery-camari...,85,"[{'alias': 'grocery', 'title': 'Grocery'}]",3.5,"{'latitude': 34.2234948781155, 'longitude': -1...",[],$$,"{'address1': '674 Las Posas Rd', 'address2': '...",18053881781,(805) 388-1781,15957.200255
412,FRb1Yj0GS971p_KpyZYadQ,dennys-camarillo,Denny's,https://s3-media3.fl.yelpcdn.com/bphoto/OaVInM...,False,https://www.yelp.com/biz/dennys-camarillo?adju...,154,"[{'alias': 'breakfast_brunch', 'title': 'Break...",2.0,"{'latitude': 34.2181152436497, 'longitude': -1...","[delivery, pickup]",$,"{'address1': '1659 E Daily Dr', 'address2': ''...",18054844137,(805) 484-4137,17919.539035
413,bMgauTlniS7o8vM6HYttOw,ihop-camarillo,IHOP,https://s3-media2.fl.yelpcdn.com/bphoto/anAYOW...,False,https://www.yelp.com/biz/ihop-camarillo?adjust...,96,"[{'alias': 'breakfast_brunch', 'title': 'Break...",3.5,"{'latitude': 34.2187918, 'longitude': -119.04987}","[delivery, pickup]",$$,"{'address1': '1620 E Daily Dr', 'address2': ''...",18054841346,(805) 484-1346,17746.45226
414,9Okx6DWQSaoxPtKBr-YRIQ,vallarta-supermarkets-oxnard-3,Vallarta Supermarkets,https://s3-media1.fl.yelpcdn.com/bphoto/O-a_Zj...,False,https://www.yelp.com/biz/vallarta-supermarkets...,69,"[{'alias': 'grocery', 'title': 'Grocery'}]",3.0,"{'latitude': 34.190659, 'longitude': -119.178235}","[delivery, pickup]",$,"{'address1': '1050 S A St', 'address2': '', 'a...",18052401400,(805) 240-1400,9768.389697
415,_otwHgGA1ow-crjgnMdQ5A,vons-camarillo-2,Vons,https://s3-media1.fl.yelpcdn.com/bphoto/FmK572...,False,https://www.yelp.com/biz/vons-camarillo-2?adju...,63,"[{'alias': 'grocery', 'title': 'Grocery'}]",2.5,"{'latitude': 34.2248473, 'longitude': -119.037...",[],$$,"{'address1': '820 Arneill Rd', 'address2': '',...",18054828802,(805) 482-8802,18707.122056


In [26]:
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

0

In [27]:
final_df.to_csv('Data/final_results_Ventura_coffee.csv.gz', compression='gzip',index=False)