# Efficient Yelp API Calls 

In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
# os - for saving and loading files
# json - to work with json files
# math - to round up results
# time - to add a short pause to not overwhelm the server
import os, json, math, time

# to make yelpapi calls
from yelpapi import YelpAPI

# progress bar from tqdm_notebook
from tqdm.notebook import tqdm_notebook

In [44]:
!pip install yelpapi
!pip install pandas



## Loading Yelp API

In [3]:

import json
with open('/Users/patelmedzy/.secret/yelp_api.json') as f:
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [4]:
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x1417400a0>

## Yelp API to search pizza near in my city (Richmond, Tx)

In [5]:
location='Richmond, TX 77407'
term='Pizza'


In [6]:
location.split(',')[0]

'Richmond'

In [7]:
## Specify folder for saving data
FOLDER = 'Data/'

os.makedirs(FOLDER, exist_ok = True)
# Specifying JSON_FILE filename (can include a folder)
JSON_FILE = FOLDER+f"{location.split(',')[0]}-{term}.json"

In [8]:
JSON_FILE

'Data/Richmond-Pizza.json'

In [9]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    
    ## If JSON folder name is not empty:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder, exist_ok = True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open(JSON_FILE, 'w') as f:
          json.dump([], f)
        
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/Richmond-Pizza.json already exists.


In [10]:
os.path.isfile(JSON_FILE)

True

In [24]:
# use our yelp_api variable's search_query method to perform our API call
pizza_results = yelp_api.search_query(term = term, location = location)

In [25]:
type(pizza_results)

dict

In [26]:
len(pizza_results)

3

In [27]:
pizza_results.keys()

dict_keys(['businesses', 'total', 'region'])

In [28]:
pizza_results['total']

158

In [29]:
pizza_results['businesses']

[{'id': 'cHToQZPO6yEfxJSMFU12sw',
  'alias': 'fat-boys-pizza-richmond-richmond',
  'name': "Fat Boy's Pizza - Richmond",
  'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/GTS4yw5K-OqTVeOUFZCllQ/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/fat-boys-pizza-richmond-richmond?adjust_creative=Q1Qdfk781tT6qWPSSx9wKg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=Q1Qdfk781tT6qWPSSx9wKg',
  'review_count': 62,
  'categories': [{'alias': 'pizza', 'title': 'Pizza'},
   {'alias': 'desserts', 'title': 'Desserts'},
   {'alias': 'beerbar', 'title': 'Beer Bar'}],
  'rating': 3.5,
  'coordinates': {'latitude': 29.653703149508893,
   'longitude': -95.70841800256004},
  'transactions': ['pickup', 'delivery'],
  'price': '$$',
  'location': {'address1': '10445 W Grand Pkwy S',
   'address2': 'Ste 150',
   'address3': '',
   'city': 'Richmond',
   'zip_code': '77407',
   'country': 'US',
   'state': 'TX',
   'display_address': ['10445 W Grand Pkwy S',
    'S

In [30]:
pizza_results['region']

{'center': {'longitude': -95.72628021240234, 'latitude': 29.676708410974392}}

In [31]:
pizza = pd.DataFrame(pizza_results['businesses'])

In [36]:
pizza.head(2)

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,cHToQZPO6yEfxJSMFU12sw,fat-boys-pizza-richmond-richmond,Fat Boy's Pizza - Richmond,https://s3-media2.fl.yelpcdn.com/bphoto/GTS4yw...,False,https://www.yelp.com/biz/fat-boys-pizza-richmo...,62,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",3.5,"{'latitude': 29.653703149508893, 'longitude': ...","[pickup, delivery]",$$,"{'address1': '10445 W Grand Pkwy S', 'address2...",17138322182,(713) 832-2182,3085.821153
1,nZ9FWG7QlbE4bYCTFJs7DQ,cup-n-char-richmond,Cup N Char,https://s3-media3.fl.yelpcdn.com/bphoto/gx1e3d...,False,https://www.yelp.com/biz/cup-n-char-richmond?a...,18,"[{'alias': 'pizza', 'title': 'Pizza'}]",5.0,"{'latitude': 29.661013325930316, 'longitude': ...",[],,"{'address1': '10450 Fm 1464', 'address2': 'Ste...",12819407440,(281) 940-7440,4228.448569


In [37]:
results_per_page = len(pizza_results['businesses'])
results_per_page

20

In [38]:
(pizza_results['total'])/ results_per_page

7.9

In [39]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((pizza_results['total'])/ results_per_page)
n_pages

8

## Compiling extracted search results into a Dataframe

In [42]:
!pip install tqdm



In [45]:
for i in tqdm_notebook( range(1,n_pages+1)):
    ## The block of code we want to TRY to run
    try:
        
        time.sleep(.2)
        
        ## Read in results in progress file and check the length
        with open(JSON_FILE, 'r') as f:
            previous_results = json.load(f)
        
        ## save number of results for to use as offset
        n_results = len(previous_results)
        
        
        ## use n_results as the OFFSET 
        results = yelp_api.search_query(location = location, term=term,
                                   offset = n_results+1)

        ## append new results and save to file
        previous_results.extend(results['businesses'])
        
        with open(JSON_FILE, 'w') as f:
            json.dump(previous_results, f)

            
    ## What to do if we get an error/exception.
    except Exception as e:
        print('[!] ERROR', e)

  0%|          | 0/8 [00:00<?, ?it/s]

In [46]:
df = pd.read_json(JSON_FILE)

In [47]:
df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,nZ9FWG7QlbE4bYCTFJs7DQ,cup-n-char-richmond,Cup N Char,https://s3-media3.fl.yelpcdn.com/bphoto/gx1e3d...,False,https://www.yelp.com/biz/cup-n-char-richmond?a...,18,"[{'alias': 'pizza', 'title': 'Pizza'}]",5.0,"{'latitude': 29.661013325930316, 'longitude': ...",[],"{'address1': '10450 Fm 1464', 'address2': 'Ste...",12819407440,(281) 940-7440,4228.448569,
1,knuJk3YRhUgX-7NtGv3hKg,brooklyn-pizzeria-richmond,Brooklyn Pizzeria,https://s3-media2.fl.yelpcdn.com/bphoto/deghxC...,False,https://www.yelp.com/biz/brooklyn-pizzeria-ric...,193,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 29.6609608699916, 'longitude': -9...",[delivery],"{'address1': '7930 W Grand Pkwy S', 'address2'...",12812323333,(281) 232-3333,2290.908598,$
2,RF0xAbGNKkTrIvlJwsLsLA,twisted-pizza-and-curries-richmond,Twisted Pizza & Curries,https://s3-media2.fl.yelpcdn.com/bphoto/XFDE4k...,False,https://www.yelp.com/biz/twisted-pizza-and-cur...,50,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 29.6542968, 'longitude': -95.7143...","[delivery, pickup]","{'address1': '11131 Harlem Rd', 'address2': 'S...",18325000023,(832) 500-0023,2746.648223,
3,Zzn0kEXv88U24HGjvJSxGQ,daddyo-s-pizza-katy-katy,DaddyO’s Pizza - Katy,https://s3-media1.fl.yelpcdn.com/bphoto/Gy9djJ...,False,https://www.yelp.com/biz/daddyo-s-pizza-katy-k...,131,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 29.7288749792794, 'longitude': -9...","[delivery, pickup]","{'address1': '6356 S Peek Rd', 'address2': 'St...",13463770777,(346) 377-0777,4396.158549,$
4,SEMYGKDD8O0ErJ-E8d_sxw,center-court-pizza-and-brew-richmond-3,Center Court Pizza & Brew,https://s3-media2.fl.yelpcdn.com/bphoto/K6WuMA...,False,https://www.yelp.com/biz/center-court-pizza-an...,69,"[{'alias': 'chicken_wings', 'title': 'Chicken ...",4.0,"{'latitude': 29.651672764769724, 'longitude': ...","[delivery, pickup]","{'address1': '18320 W Airport Blvd', 'address2...",17132347120,(713) 234-7120,3521.05081,$$


In [48]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 157 entries, 0 to 156
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             157 non-null    object 
 1   alias          157 non-null    object 
 2   name           157 non-null    object 
 3   image_url      157 non-null    object 
 4   is_closed      157 non-null    bool   
 5   url            157 non-null    object 
 6   review_count   157 non-null    int64  
 7   categories     157 non-null    object 
 8   rating         157 non-null    float64
 9   coordinates    157 non-null    object 
 10  transactions   157 non-null    object 
 11  location       157 non-null    object 
 12  phone          157 non-null    object 
 13  display_phone  157 non-null    object 
 14  distance       157 non-null    float64
 15  price          120 non-null    object 
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 18.7+ KB


## Saving File
- csv
- compressed csv file 

In [49]:
## convert the filename to a .csv.gz
csv_file = JSON_FILE.replace('.json','.csv.gz')
csv_file

'Data/Richmond-Pizza.csv.gz'

In [50]:
## Save it as a compressed csv (to save space)
df.to_csv(csv_file, compression = 'gzip', index= False)

## Comparing File Sizes

In [51]:
# Compare filesize with os module's os.path.getsize
size_json = os.path.getsize(JSON_FILE)
size_csv_gz = os.path.getsize(JSON_FILE.replace('.json','.csv.gz'))

print(f'JSON FILE: {size_json:,} Bytes')
print(f'CSV.GZ FILE: {size_csv_gz:,} Bytes')

print(f'the csv.gz is {size_json/size_csv_gz} times smaller!')

JSON FILE: 154,333 Bytes
CSV.GZ FILE: 22,843 Bytes
the csv.gz is 6.756249179179617 times smaller!
