In [1]:
!pip install yelpapi
!pip install tqdm




In [2]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
# os - for saving and loading files
# json - to work with json files
# math - to round up results
# time - to add a short pause to not overwhelm the server
import os, json, math, time

# to make yelpapi calls
from yelpapi import YelpAPI

# progress bar from tqdm_notebook
from tqdm.notebook import tqdm_notebook



Load Credentials and Create Yelp API Object

In [3]:
# Load API Credentials
with open('/Users/nourtafa/.secret/yelp_api.json', 'r') as f:
    login = json.load(f)

In [4]:
login.keys()

dict_keys(['client-id', 'api-key'])

In [5]:
# Instantiate YelpAPI Variable
yelp = YelpAPI(login['api-key'], timeout_s = 5.0)

Define Search Terms and File Paths

In [6]:
# set our API call parameters 
LOCATION = 'Philadelphia, PA'
TERM = 'Philly cheesesteak'

In [7]:
LOCATION.split(',')[0]

'Philadelphia'

In [8]:
## Specify fodler for saving data
FOLDER = 'Data/'

os.makedirs(FOLDER, exist_ok = True)
# Specifying JSON_FILE filename (can include a folder)
JSON_FILE = FOLDER+f"{LOCATION.split(',')[0]}-{TERM}.json"

In [9]:
JSON_FILE

'Data/Philadelphia-Philly cheesesteak.json'

Check if Json File exists and Create it if it doesn't

In [10]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder, exist_ok = True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open(JSON_FILE, 'w') as f:
          json.dump([], f)
        
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/Philadelphia-Philly cheesesteak.json already exists.


In [11]:
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')


- 0 previous results found.


In [12]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [13]:
results['total']

1100

In [14]:
results['region']

{'center': {'longitude': -75.11764526367188, 'latitude': 40.00218959599397}}

In [15]:
results['businesses']

[{'id': 'RQAF6a0akMiot5lZZnMNNw',
  'alias': 'dalessandro-s-steaks-and-hoagies-philadelphia',
  'name': 'Dalessandro’s Steaks & Hoagies',
  'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/yGVA9fRK4ijYuAdhjJK_fA/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/dalessandro-s-steaks-and-hoagies-philadelphia?adjust_creative=PPh82t5T8b6EVvQrQMiv2w&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=PPh82t5T8b6EVvQrQMiv2w',
  'review_count': 3159,
  'categories': [{'alias': 'sandwiches', 'title': 'Sandwiches'},
   {'alias': 'cheesesteaks', 'title': 'Cheesesteaks'}],
  'rating': 4.0,
  'coordinates': {'latitude': 40.029494, 'longitude': -75.2059714},
  'transactions': ['delivery'],
  'price': '$$',
  'location': {'address1': '600 Wendover St',
   'address2': '',
   'address3': '',
   'city': 'Philadelphia',
   'zip_code': '19128',
   'country': 'US',
   'state': 'PA',
   'display_address': ['600 Wendover St', 'Philadelphia, PA 19128']},
  'phone': '+121

In [16]:
## How many results total?
pd.DataFrame(results['businesses'])

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,RQAF6a0akMiot5lZZnMNNw,dalessandro-s-steaks-and-hoagies-philadelphia,Dalessandro’s Steaks & Hoagies,https://s3-media3.fl.yelpcdn.com/bphoto/yGVA9f...,False,https://www.yelp.com/biz/dalessandro-s-steaks-...,3159,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",4.0,"{'latitude': 40.029494, 'longitude': -75.2059714}",[delivery],$$,"{'address1': '600 Wendover St', 'address2': ''...",12154825407,(215) 482-5407,8111.519762
1,-mIlmp5l4hKlp1tvHRdvTg,sonnys-famous-steaks-philadelphia,Sonny's Famous Steaks,https://s3-media2.fl.yelpcdn.com/bphoto/c333HA...,False,https://www.yelp.com/biz/sonnys-famous-steaks-...,1887,"[{'alias': 'cheesesteaks', 'title': 'Cheeseste...",4.0,"{'latitude': 39.94982923010674, 'longitude': -...","[pickup, delivery]",$$,"{'address1': '228 Market St', 'address2': '', ...",12156295760,(215) 629-5760,6264.888631
2,LM54ufrINJWoTN5imV8Etw,johns-roast-pork-philadelphia,John's Roast Pork,https://s3-media3.fl.yelpcdn.com/bphoto/qPyjRa...,False,https://www.yelp.com/biz/johns-roast-pork-phil...,1746,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",4.5,"{'latitude': 39.9210273851256, 'longitude': -7...","[pickup, delivery]",$$,"{'address1': '14 E Snyder Ave', 'address2': ''...",12154631951,(215) 463-1951,9321.694746
3,PP3BBaVxZLcJU54uP_wL6Q,pats-king-of-steaks-philadelphia-5,pat's king of steaks,https://s3-media4.fl.yelpcdn.com/bphoto/f-Kogk...,False,https://www.yelp.com/biz/pats-king-of-steaks-p...,4533,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",3.0,"{'latitude': 39.9332010582612, 'longitude': -7...","[pickup, delivery]",$$,"{'address1': '1237 E Passyunk Ave', 'address2'...",12154681546,(215) 468-1546,8451.47628
4,IkY2ticzHEn4QFn8hQLSWg,genos-steaks-philadelphia,Geno's Steaks,https://s3-media2.fl.yelpcdn.com/bphoto/meMxwQ...,False,https://www.yelp.com/biz/genos-steaks-philadel...,3621,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",2.5,"{'latitude': 39.9338367849925, 'longitude': -7...","[pickup, delivery]",$$,"{'address1': '1219 S 9th St', 'address2': '', ...",12153890659,(215) 389-0659,8371.140102
5,cFSyJluKa2SHtgMMvlx6SQ,angelos-pizzeria-philadelphia,Angelo's Pizzeria,https://s3-media3.fl.yelpcdn.com/bphoto/iz4rk2...,False,https://www.yelp.com/biz/angelos-pizzeria-phil...,588,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 39.940651781214825, 'longitude': ...",[delivery],$$,"{'address1': '736 S 9th St', 'address2': '', '...",12159220000,(215) 922-0000,7643.476659
6,jxEMFqwDJXjCxmcm5t5jVQ,cleavers-philadelphia,Cleavers,https://s3-media1.fl.yelpcdn.com/bphoto/9dfyW_...,False,https://www.yelp.com/biz/cleavers-philadelphia...,1538,"[{'alias': 'cheesesteaks', 'title': 'Cheeseste...",4.0,"{'latitude': 39.9513931274414, 'longitude': -7...","[pickup, delivery]",$$,"{'address1': '108 S 18th St', 'address2': '', ...",12155153828,(215) 515-3828,7227.91675
7,q-zV08jt6U-q05SMEuQJAQ,tony-and-nicks-steaks-philadelphia,Tony & Nick's Steaks,https://s3-media1.fl.yelpcdn.com/bphoto/NW552n...,False,https://www.yelp.com/biz/tony-and-nicks-steaks...,2014,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",4.0,"{'latitude': 39.9141065085686, 'longitude': -7...","[pickup, delivery]",$$,"{'address1': '39 E Oregon Ave', 'address2': ''...",12155515725,(215) 551-5725,10146.334733
8,7pAgxBMUjrVPH7xh3fn-gw,oh-brother-philly-philadelphia,Oh Brother Philly,https://s3-media3.fl.yelpcdn.com/bphoto/YbjY1z...,False,https://www.yelp.com/biz/oh-brother-philly-phi...,744,"[{'alias': 'cheesesteaks', 'title': 'Cheeseste...",4.5,"{'latitude': 39.94971, 'longitude': -75.14411}","[pickup, delivery]",$$,"{'address1': '206 Market St', 'address2': '', ...",12155153255,(215) 515-3255,6258.740848
9,LFyuwzv0CHgR44y0XSJbPA,leos-steak-shop-folcroft,Leo's Steak Shop,https://s3-media3.fl.yelpcdn.com/bphoto/9R5m0_...,False,https://www.yelp.com/biz/leos-steak-shop-folcr...,617,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",4.5,"{'latitude': 39.90521, 'longitude': -75.27942}",[delivery],$$,"{'address1': '1403 Chester Pike', 'address2': ...",16104619901,(610) 461-9901,17507.961498


In [17]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [18]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total'])/ results_per_page)
n_pages

55

In [19]:
for i in tqdm_notebook(range(1,n_pages+1)):
    ## The block of code we want to TRY to run
    try:
        
        time.sleep(.2)
        
        ## Read in results in progress file and check the length
        with open(JSON_FILE, 'r') as f:
            previous_results = json.load(f)
        
        ## save number of results for to use as offset
        n_results = len(previous_results)
        
        
        ## use n_results as the OFFSET 
        results = yelp.search_query(location = location, term = term,
                                   offset = n_results+1)

        ## append new results and save to file
        previous_results.extend(results['businesses'])
        
        with open(JSON_FILE, 'w') as f:
            json.dump(previous_results, f)

            
    ## What to do if we get an error/exception.
    except Exception as e:
        print(' [!] ERROR', e)
        

  0%|          | 0/55 [00:00<?, ?it/s]

 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not defined
 [!] ERROR name 'location' is not 

In [20]:
df = pd.read_json(JSON_FILE)

In [21]:
df.head()

In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Float64Index: 0 entries
Empty DataFrame


In [23]:
## convert the filename to a .csv.gz
csv_file = JSON_FILE.replace('.json','.csv.gz')
csv_file

'Data/Philadelphia-Philly cheesesteak.csv.gz'

In [24]:
## Save it as a compressed csv (to save space)
df.to_csv(csv_file, compression = 'gzip', index = False)

Bonus: compare filesize with os module's os.path.getsize

In [25]:
size_json = os.path.getsize(JSON_FILE)
size_csv_gz = os.path.getsize(JSON_FILE.replace('.json','.csv.gz'))

print(f'JSON FILE: {size_json:,} Bytes')
print(f'CSV.GZ FILE: {size_csv_gz:,} Bytes')

print(f'the csv.gz is {size_json/size_csv_gz} times smaller!')

JSON FILE: 2 Bytes
CSV.GZ FILE: 63 Bytes
the csv.gz is 0.031746031746031744 times smaller!
