# Efficient Yelp API calls
- Michael Vincent
- 9/19

## Imports

In [1]:
# Imports
import numpy as np
import pandas as pd
import os, math, json, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

## Helper function for JSON file creation.

In [2]:
def create_json_file(json_file, delete_if_exists = False):
    
    # Check if the file exists
    file_exists = os.path.isfile(json_file)
    
    # If the file does exist
    if file_exists:
        # Check if the user wants to delete the file
        if delete_if_exists:
            print(f'[!] {json_file} already exists. Deleting the previous file.')
            os.remove(json_file)
        else:
            print(f'[i] {json_file} alread exsits.')
    # If the file does NOT exist
    else:
        # Inform user and save empty list
        print(f'[i] {json_file} not found. Saving empty list to new file.')
        
        # Create any needed folders
        folder = os.path.dirname(json_file)
        
        # If json_file included a folder
        if len(folder) > 0:
            os.makedirs(foldel, exist_ok = True)
            
        # Save the empty list to start the file
        with open(json_file, 'w') as f:
            json.dump([], f)

## Load API credentials


In [3]:
# Load API credentials
with open('/home/michael/.secret/yelp_api.json') as f:
    login = json.load(f)
yelp_api = YelpAPI(login['api-key'], timeout_s = 5.0)

## Specify file name and create the file to store the API calls

In [4]:
json_file = 'yelp_records.json'
create_json_file(json_file)

[i] yelp_records.json alread exsits.


## Initial query and setup for the loop

In [5]:
# Initial query
results = yelp_api.search_query(location = 'Seattle, WA',
                                term = 'Vegetarian')

results.keys()

dict_keys(['businesses', 'total', 'region'])

In [6]:
# Find the total number of results
print('There are', results['total'], 'results in our query.')

There are 4300 results in our query.


In [7]:
# Find the number of results per page
results_per_page = len(results['businesses'])
print('There are', results_per_page, 'results per page.')

There are 20 results per page.


In [8]:
# Find the number of pages necessary to get all of our results
n_pages = math.ceil(results['total'] / len(results['businesses']))
n_pages

215

In [9]:
# Store the search results
previous_results = results['businesses']

## Loop to get the remaining results


In [10]:
# Make a loop to get all of our queries
for i in tqdm_notebook(range(1, n_pages + 1)):
    # Get the results so far
    with open(json_file, 'r') as f:
        previous_results = json.load(f)
    n_results = len(previous_results)
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 API calls. Terminating loop.')
        break
    # Use n_results as the offset
    results = yelp_api.search_query(location = 'Seattle, WA',
                                    term = 'Vegetarian',
                                    offset = n_results)
    # Append the new results and save the file
    previous_results.extend(results['businesses'])
    with open(json_file, 'w') as f:
        json.dump(previous_results, f)
        
    # Add a 200 ms pause
    time.sleep(0.2)

  0%|          | 0/215 [00:00<?, ?it/s]

Exceeded 1000 API calls. Terminating loop.


## Put the search results in a data frame and save to a compressed CSV file

In [11]:
df = pd.read_json(json_file)
display(df.head(), df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,djjEdAZkY4eyzI3_8TBLVA,plum-bistro-seattle-2,Plum Bistro,https://s3-media3.fl.yelpcdn.com/bphoto/DlnguN...,False,https://www.yelp.com/biz/plum-bistro-seattle-2...,1802,"[{'alias': 'vegan', 'title': 'Vegan'}]",4.0,"{'latitude': 47.6137675, 'longitude': -122.317...","[delivery, pickup]",$$,"{'address1': '1429 12th Ave', 'address2': '', ...",12068385333,(206) 838-5333,1888.19153
1,9nJlVsm9GEz6DFhK5ckaYA,cafe-flora-seattle,Cafe Flora,https://s3-media1.fl.yelpcdn.com/bphoto/M33I7s...,False,https://www.yelp.com/biz/cafe-flora-seattle?ad...,1433,"[{'alias': 'vegetarian', 'title': 'Vegetarian'...",4.0,"{'latitude': 47.62397, 'longitude': -122.29521}",[delivery],$$,"{'address1': '2901 E Madison St', 'address2': ...",12063259100,(206) 325-9100,3033.471253
2,0jZIOA_9H3YOsbiyoyRWrA,seattle-lotus-vegetarian-restaurant-seattle,Seattle Lotus Vegetarian Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/GWvM2a...,False,https://www.yelp.com/biz/seattle-lotus-vegetar...,176,"[{'alias': 'vegetarian', 'title': 'Vegetarian'...",4.5,"{'latitude': 47.5088066528403, 'longitude': -1...",[pickup],$$,"{'address1': '10439 16th Ave SW', 'address2': ...",12064666345,(206) 466-6345,13054.220036
3,mOybprI4rEcRQUxBqAhupw,harvest-beat-seattle,Harvest Beat,https://s3-media3.fl.yelpcdn.com/bphoto/KVq5tn...,False,https://www.yelp.com/biz/harvest-beat-seattle?...,230,"[{'alias': 'vegetarian', 'title': 'Vegetarian'...",4.5,"{'latitude': 47.66115, 'longitude': -122.33678}",[],$$$,"{'address1': '1711 N 45th St', 'address2': '',...",12065471348,(206) 547-1348,3976.167517
4,7eoMEbus8ANI15VuYzUNkQ,moonlight-cafe-seattle,Moonlight Cafe,https://s3-media4.fl.yelpcdn.com/bphoto/J2m1Jy...,False,https://www.yelp.com/biz/moonlight-cafe-seattl...,451,"[{'alias': 'vegetarian', 'title': 'Vegetarian'...",4.0,"{'latitude': 47.59901, 'longitude': -122.30672}","[delivery, pickup]",$,"{'address1': '1919 S Jackson St', 'address2': ...",12064857313,(206) 485-7313,3654.663113


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,USoCUrnuGw1mwzVpo_dlmA,guanacos-tacos-pupuseria-seattle,Guanaco's Tacos Pupuseria,https://s3-media3.fl.yelpcdn.com/bphoto/qJDEQ5...,False,https://www.yelp.com/biz/guanacos-tacos-pupuse...,365,"[{'alias': 'salvadoran', 'title': 'Salvadoran'...",4.0,"{'latitude': 47.65714142901622, 'longitude': -...","[delivery, pickup]",$$,"{'address1': '4106 Brooklyn Ave NE', 'address2...",12065472369,(206) 547-2369,3877.166003
996,nQPPzUaPIMeJdohOrrjmOQ,junebaby-seattle,JuneBaby,https://s3-media3.fl.yelpcdn.com/bphoto/X-35Xy...,False,https://www.yelp.com/biz/junebaby-seattle?adju...,574,"[{'alias': 'southern', 'title': 'Southern'}]",4.0,"{'latitude': 47.67591, 'longitude': -122.3041}",[delivery],$$,"{'address1': '2122 NE 65th St', 'address2': No...",12062574470,(206) 257-4470,6089.943581
997,WpewCoy0Y3scs24z87fkSA,new-china-express-seattle-2,New China Express,https://s3-media2.fl.yelpcdn.com/bphoto/nLMKuV...,False,https://www.yelp.com/biz/new-china-express-sea...,176,"[{'alias': 'chinese', 'title': 'Chinese'}]",3.5,"{'latitude': 47.64518117682651, 'longitude': -...","[delivery, pickup]",$$,"{'address1': '2809 Thorndyke Ave W', 'address2...",12062174725,(206) 217-4725,4193.937034
998,q8jisVM1b30HxvjCMWrXVw,yangguofu-ygf-malatang-seattle,YangGuoFu YGF MalaTang,https://s3-media4.fl.yelpcdn.com/bphoto/tJSbK5...,False,https://www.yelp.com/biz/yangguofu-ygf-malatan...,48,"[{'alias': 'chinese', 'title': 'Chinese'}, {'a...",4.0,"{'latitude': 47.66371549516702, 'longitude': -...",[],$$,"{'address1': '4730 University Way NE', 'addres...",12069223496,(206) 922-3496,4581.551901
999,q1mDH2gq_NVCj0y3Pq7bDA,chicagos-pizza-with-a-twist-lynnwood-2,Chicago's Pizza With A Twist,https://s3-media2.fl.yelpcdn.com/bphoto/NEuDPc...,False,https://www.yelp.com/biz/chicagos-pizza-with-a...,89,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",3.5,"{'latitude': 47.80842312103281, 'longitude': -...","[delivery, pickup]",,"{'address1': '20925 Cypress Way', 'address2': ...",14256163393,(425) 616-3393,21122.261794


In [12]:
# Check for duplicates
df.duplicated(subset = 'id').sum()

5

In [13]:
# Remove the duplicates
df.drop_duplicates(subset = 'id', inplace = True)

# Confirm the duplicates were dropped
df.duplicated(subset = 'id').sum()

0

In [14]:
# Make a folder to save the data in
os.makedirs('data', exist_ok = True)

In [15]:
# Save the data as a compressed csv
df.to_csv('data/final_results_seattle_vegetarian',
          compression = 'gzip',
          index = False)