# Shelly-Ann Duncan
# 11/17/22
# Yelp API Calls (Core) 


# Import necessary libraries

In [1]:
# imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#additional imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
# load API credentials
with open('/Users/shell/.secret/yelp_api.json', 'r') as f: 
    login = json.load(f)
login.keys()

dict_keys(['Client-ID', 'API Key'])

In [3]:
# instantiate YelpAPI variable
yelp = YelpAPI(login['API Key'], timeout_s = 5.0)
yelp

<yelpapi.yelpapi.YelpAPI at 0x20825829070>

# Define search terms and file paths

In [4]:
# set API call paramertes and filename before the first call
location = 'Brooklyn, NY 11236'
term = 'hamburgers'

In [5]:
# specify folder for saving data
FOLDER = 'Data/'
os.makedirs(FOLDER, exist_ok = True)

In [6]:
# specify JSON_FILE filename (can include a folder)
JSON_FILE = FOLDER + f"{location.split(',')[0]}-{term}.json"
JSON_FILE

'Data/Brooklyn-hamburgers.json'

# Check if Json file exists and create if not

In [7]:
# check if the JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)

# if it doens't exist:
if file_exists == False:
    
    # create any needed folders
    # get the folder name only
    folder = os.path.dirname(JSON_FILE)
    
    # if JSON_FILE included a folder:
    if len(folder) > 0:
        # create the folder
        os.makedirs(folder, exist_ok = True)
        
        # inform user and save empty list
        print(f"(i) {JSON_FILE} not found. Saving empty list to file.")
        
        # save the first page of results
        with open(JSON_FILE, 'w') as f:
            json.dump([], f)

# if it exists, inform user
else:
    print(f"(i) {JSON_FILE} already exists.")   

(i) Data/Brooklyn-hamburgers.json already exists.


# Make the first API call to get the first page of data

In [8]:
# quick test query
results = yelp.search_query(location = location, term = term)
type(results)

dict

In [9]:
# get the result keys
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [10]:
# get the results for the businesses
results['businesses'][0]

{'id': 'enbGceHhbBWe4cgMSePd1g',
 'alias': '3rd-and-7-brooklyn',
 'name': '3rd & 7',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/R4FqTDFgLcVfwS-OQa-8jA/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/3rd-and-7-brooklyn?adjust_creative=azSZ1YV457_fpKmHVQM9Bg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=azSZ1YV457_fpKmHVQM9Bg',
 'review_count': 92,
 'categories': [{'alias': 'sportsbars', 'title': 'Sports Bars'},
  {'alias': 'tradamerican', 'title': 'American (Traditional)'}],
 'rating': 4.0,
 'coordinates': {'latitude': 40.61497, 'longitude': -73.93606},
 'transactions': ['delivery'],
 'price': '$$',
 'location': {'address1': '3622 Quentin Rd',
  'address2': '',
  'address3': '',
  'city': 'Brooklyn',
  'zip_code': '11234',
  'country': 'US',
  'state': 'NY',
  'display_address': ['3622 Quentin Rd', 'Brooklyn, NY 11234']},
 'phone': '+17183366300',
 'display_phone': '(718) 336-6300',
 'distance': 4147.371871553623}

In [11]:
# convert to data frame
pd.DataFrame(results['businesses'])

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,enbGceHhbBWe4cgMSePd1g,3rd-and-7-brooklyn,3rd & 7,https://s3-media2.fl.yelpcdn.com/bphoto/R4FqTD...,False,https://www.yelp.com/biz/3rd-and-7-brooklyn?ad...,92,"[{'alias': 'sportsbars', 'title': 'Sports Bars...",4.0,"{'latitude': 40.61497, 'longitude': -73.93606}",[delivery],$$,"{'address1': '3622 Quentin Rd', 'address2': ''...",17183366300,(718) 336-6300,4147.371872
1,VkYg-L1v7_MkTs2vd4GGMA,bear-burgers-brooklyn,Bear Burgers,https://s3-media3.fl.yelpcdn.com/bphoto/8La1rT...,False,https://www.yelp.com/biz/bear-burgers-brooklyn...,3,"[{'alias': 'newamerican', 'title': 'American (...",4.0,"{'latitude': 40.64002, 'longitude': -73.90645}","[delivery, pickup]",,"{'address1': '8923 Flatlands Ave', 'address2':...",16318762809,(631) 876-2809,613.23954
2,p6RecYP3IzMacRVVYeBwkQ,hot-bagels-brooklyn-7,Hot Bagels,https://s3-media2.fl.yelpcdn.com/bphoto/LaDs11...,False,https://www.yelp.com/biz/hot-bagels-brooklyn-7...,23,"[{'alias': 'bagels', 'title': 'Bagels'}]",3.5,"{'latitude': 40.64203, 'longitude': -73.89913}","[delivery, pickup]",$,"{'address1': '1594 Rockaway Pkwy', 'address2':...",17182573068,(718) 257-3068,260.402337
3,fi8ATHFkHnZgFGJWZUl_BA,aunts-et-uncles-brooklyn,Aunts et Uncles,https://s3-media4.fl.yelpcdn.com/bphoto/H9Le_E...,False,https://www.yelp.com/biz/aunts-et-uncles-brook...,151,"[{'alias': 'cafes', 'title': 'Cafes'}]",4.5,"{'latitude': 40.65207, 'longitude': -73.94954}","[delivery, pickup]",$$,"{'address1': '1407 Nostrand Ave', 'address2': ...",13472950001,(347) 295-0001,4467.798853
4,GTYn-iP81HKVKl6FGT0_Cw,smashburger-brooklyn-4,Smashburger,https://s3-media4.fl.yelpcdn.com/bphoto/8mUOj9...,False,https://www.yelp.com/biz/smashburger-brooklyn-...,236,"[{'alias': 'burgers', 'title': 'Burgers'}]",3.0,"{'latitude': 40.65172, 'longitude': -73.87286}","[delivery, pickup]",$$,"{'address1': '528 Gateway Dr', 'address2': Non...",17182356900,(718) 235-6900,2597.572463
5,to_hmXb3Me-SZsiMfYuoGw,lebron-restaurant-brooklyn-4,Lebron Restaurant,https://s3-media4.fl.yelpcdn.com/bphoto/JJH62I...,False,https://www.yelp.com/biz/lebron-restaurant-bro...,5,"[{'alias': 'latin', 'title': 'Latin American'}...",2.5,"{'latitude': 40.66918689036557, 'longitude': -...","[delivery, pickup]",,"{'address1': '1567 Pitkin Ave', 'address2': ''...",17183429070,(718) 342-9070,3568.559015
6,i995W2_m6GuiBPnbHP2sPw,oasis-diner-restaurant-brooklyn-3,Oasis Diner Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/IA4pmU...,False,https://www.yelp.com/biz/oasis-diner-restauran...,195,"[{'alias': 'diners', 'title': 'Diners'}]",4.0,"{'latitude': 40.61786, 'longitude': -73.93211}","[delivery, pickup]",$$,"{'address1': '2132 Flatbush Ave', 'address2': ...",17182583461,(718) 258-3461,3683.51269
7,SyWBNlwqpgb4GLdpTEoQAw,more-than-fries-brooklyn,More Than Fries,https://s3-media2.fl.yelpcdn.com/bphoto/RulwlU...,False,https://www.yelp.com/biz/more-than-fries-brook...,1,"[{'alias': 'hotdogs', 'title': 'Fast Food'}, {...",5.0,"{'latitude': 40.6642, 'longitude': -73.92428}","[delivery, pickup]",,"{'address1': '1077a Rutland Rd', 'address2': '...",13472406806,(347) 240-6806,3458.581191
8,VgXNaUKNFW2ibLU9LkuGjg,lindenwood-diner-and-restaurant-brooklyn,Lindenwood Diner & Restaurant,https://s3-media2.fl.yelpcdn.com/bphoto/M1EBu2...,False,https://www.yelp.com/biz/lindenwood-diner-and-...,376,"[{'alias': 'diners', 'title': 'Diners'}, {'ali...",4.0,"{'latitude': 40.66989, 'longitude': -73.85758}",[pickup],$$,"{'address1': '2870 Linden Blvd', 'address2': '...",17182356343,(718) 235-6343,4864.815003
9,gm8BRPLIRTzapBuBUWNPfA,island-burger-brooklyn-4,Island Burger,https://s3-media4.fl.yelpcdn.com/bphoto/MQnA2i...,False,https://www.yelp.com/biz/island-burger-brookly...,194,"[{'alias': 'burgers', 'title': 'Burgers'}, {'a...",3.5,"{'latitude': 40.634518758278936, 'longitude': ...","[delivery, pickup]",$,"{'address1': '2093 Nostrand Ave', 'address2': ...",13477899955,(347) 789-9955,4125.713778


In [12]:
# how many results total?
results['total']

218

In [13]:
# what is in the region?
results['region']

{'center': {'longitude': -73.8991928100586, 'latitude': 40.639615788863864}}

In [14]:
# how many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [15]:
# use the math.cell to round up for the total number of pages of results
n_pages = math.ceil((results['total']) / results_per_page)
n_pages

11

# Extract and save the data

In [16]:
# create a for loop
for i in tqdm_notebook(range(1, n_pages + 1)):
   # the block of code we want to TRY to run
    try:
        
        time.sleep(.2)
        
        # Read in results in progress file and check the length
        with open(JSON_FILE, 'r') as f:
            previous_results = json.load(f)
            
        # save number of results for to use as offset
        n_results = len(previous_results)
        
        
        # use n_results as the OFFSET 
        results = yelp.search_query(location = location,
                                        term = term, 
                                        offset = n_results + 1)

        # append new results and save to file
        previous_results.extend(['businesses'])

        with open(JSON_FILE,'w') as f:
            json.dump(previous_results,f)
            
    # what to do if we get an error/exception.
    except Exception as e: # saving the error message so we can print it.
        print('[!] ERROR: ',e)

  0%|          | 0/11 [00:00<?, ?it/s]

# Convert JSON to dataframe

In [17]:
# load the final results
df = pd.read_json(JSON_FILE)
df.head()

Unnamed: 0,0
0,businesses
1,businesses
2,businesses
3,businesses
4,businesses


* Not sure what error caused my final results to occur like this.

In [18]:
# convert the file to a .csv.gz
csv_file = JSON_FILE.replace('.json', '.csv.gz')
csv_file

'Data/Brooklyn-hamburgers.csv.gz'

In [20]:
# save to a compressed csv (this is to save space)
df.to_csv(csv_file, compression = 'gzip', index = False)