# Yelp ADI

- Robert Yonce
- 5/3/23
- Updated 5/7/23

# Imports

In [1]:
# Standard Imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports

import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

# Keys & Login

In [2]:
# Keys

with open('/Users/robertyonce/.secret/yelp_api.json') as f:
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [3]:
# YelpAPI

yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api


<yelpapi.yelpapi.YelpAPI at 0x1630f5550>

# Set Parameters and File

In [4]:
# set our API call parameters 
LOCATION = 'Knoxville,TN'
TERM = 'Asian'

In [5]:
# Specifying JSON_FILE filename and including the search terms in the filename

JSON_FILE_KNOX = "Data/results_in_progress_Knox_Asian.json"
JSON_FILE_KNOX

'Data/results_in_progress_Knox_Asian.json'

In [6]:
# Check if JSON_FILE_KNOX exists

file_exists = os.path.isfile(JSON_FILE_KNOX)

# If it does not exist: 

if file_exists == False:
    
    # Create New Folder if needed
    # Get the Folder Name only
    
    folder = os.path.dirname(JSON_FILE_KNOX)
    
    # If JSON_FILE_KNOX included a folder:
    
    if len(folder)>0:
        
        # create the folder
        
        os.makedirs(folder,exist_ok=True)
        
        
    # INFORM USER AND SAVE EMPTY LIST
    
    print(f'[i] {JSON_FILE_KNOX} not found. Saving empty list to file.')
    
    
    # save an empty list
    with open(JSON_FILE_KNOX,'w') as f:
        json.dump([],f)  
        
# If it exists, inform user
else:
    print(f"[i] {JSON_FILE_KNOX} already exists.")

[i] Data/results_in_progress_Knox_Asian.json not found. Saving empty list to file.


In [7]:
# Load previous results and use len of results for offset

with open(JSON_FILE_KNOX,'r') as f:
    previous_results = json.load(f)
    
# Set offset based on previous results

n_results = len(previous_results)
print(f'- {n_results} previous results found.')


- 0 previous results found.


# API CALL

In [8]:
# use our yelp_api variable's search_query method to perform our API call

results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [9]:
# How many results total?

total_results = results['total']
total_results


258

In [10]:
# Results per page

results_per_page = len(results['businesses'])
results_per_page

20

In [11]:
# Use math.ceil to round up for the total number of pages of results.

n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

13

In [13]:
previous_results.extend(results['businesses'])  
with open(JSON_FILE_KNOX,'w') as f:
     json.dump(previous_results,f)

# Extend and Loop

In [14]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE_KNOX, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    with open(JSON_FILE_KNOX,'w') as f:
        json.dump(previous_results,f)
    
    # add a 200ms pause
    time.sleep(.2)


  0%|          | 0/13 [00:00<?, ?it/s]

# Final DF

In [15]:
# Load final results

final_df = pd.read_json(JSON_FILE_KNOX)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,-sjr3fdVEisP9Y5Z3MLnxQ,kaizen-knoxville-2,Kaizen,https://s3-media4.fl.yelpcdn.com/bphoto/hKHnwn...,False,https://www.yelp.com/biz/kaizen-knoxville-2?ad...,289,"[{'alias': 'asianfusion', 'title': 'Asian Fusi...",4.5,"{'latitude': 35.9694209, 'longitude': -83.918324}",[delivery],$$,"{'address1': '127 S Central Ave', 'address2': ...",18654094444,(865) 409-4444,1460.420272
1,Lrz9QwH4HK8EDTkcZ8kBUg,szechuan-garden-chinese-restaurant-knoxville,Szechuan Garden Chinese Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/w1jq9k...,False,https://www.yelp.com/biz/szechuan-garden-chine...,106,"[{'alias': 'chinese', 'title': 'Chinese'}]",4.5,"{'latitude': 35.93579709084952, 'longitude': -...",[delivery],$,"{'address1': '4211 Chapman Hwy', 'address2': '...",18655790889,(865) 579-0889,2862.519676
2,YR29u_bjgrKELlfVpZO8yQ,seoul-brothers-knoxville,Seoul Brothers,https://s3-media3.fl.yelpcdn.com/bphoto/HuIANl...,False,https://www.yelp.com/biz/seoul-brothers-knoxvi...,26,"[{'alias': 'korean', 'title': 'Korean'}]",4.5,"{'latitude': 35.97026, 'longitude': -83.92195}",[],,"{'address1': '333 W Depot Ave', 'address2': ''...",18659738779,(865) 973-8779,1467.360708
3,AefbTZ6lsJFsPRbyVOgCZQ,asia-kitchen-knoxville,Asia Kitchen,https://s3-media2.fl.yelpcdn.com/bphoto/x5l8K3...,False,https://www.yelp.com/biz/asia-kitchen-knoxvill...,244,"[{'alias': 'chinese', 'title': 'Chinese'}, {'a...",4.0,"{'latitude': 35.9226273945922, 'longitude': -8...","[delivery, pickup]",$$,"{'address1': '8511 Kingston Pike', 'address2':...",18656709858,(865) 670-9858,13118.960309
4,qr9mEaxLSm-2pmiiwVoAUA,bida-saigon-knoxville-2,Bida Saigon,https://s3-media3.fl.yelpcdn.com/bphoto/tDLKyl...,False,https://www.yelp.com/biz/bida-saigon-knoxville...,345,"[{'alias': 'vietnamese', 'title': 'Vietnamese'}]",4.0,"{'latitude': 35.9229926146895, 'longitude': -8...",[delivery],$,"{'address1': '8078 Kingston Pike', 'address2':...",18656945999,(865) 694-5999,11906.289194


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
229,6NonvuqS9o758vmGd8HnMA,western-sizzlin-wood-grill-buffet-pigeon-forge,Western Sizzlin Wood Grill Buffet,https://s3-media2.fl.yelpcdn.com/bphoto/szqRMK...,False,https://www.yelp.com/biz/western-sizzlin-wood-...,356,"[{'alias': 'tradamerican', 'title': 'American ...",2.5,"{'latitude': 35.813666, 'longitude': -83.577396}",[delivery],$$,"{'address1': '2301 Pkwy', 'address2': '', 'add...",18654293120,(865) 429-3120,35117.450976
230,2N2MOrGKkBO0a5PgwyL7nQ,chick-fil-a-sevierville-2,Chick-fil-A,https://s3-media1.fl.yelpcdn.com/bphoto/0bSjYR...,False,https://www.yelp.com/biz/chick-fil-a-seviervil...,73,"[{'alias': 'hotdogs', 'title': 'Fast Food'}, {...",3.5,"{'latitude': 35.8378568, 'longitude': -83.5717...",[delivery],$,"{'address1': '1432 Pkwy', 'address2': '', 'add...",18659082506,(865) 908-2506,34442.81289
231,Pt47vI3g6LchvNLGtPMMOQ,chick-fil-a-knoxville-16,Chick-fil-A,https://s3-media2.fl.yelpcdn.com/bphoto/LWSjA4...,False,https://www.yelp.com/biz/chick-fil-a-knoxville...,49,"[{'alias': 'hotdogs', 'title': 'Fast Food'}, {...",2.5,"{'latitude': 36.0074039, 'longitude': -84.0173...",[delivery],$,"{'address1': '6564 Clinton Hwy', 'address2': '...",18659472416,(865) 947-2416,10034.373291
232,qg2IbQM-ILM9IDM5xs_xIQ,dicks-last-resort-pigeon-forge-pigeon-forge,Dick's Last Resort - Pigeon Forge,https://s3-media4.fl.yelpcdn.com/bphoto/MKTWHX...,False,https://www.yelp.com/biz/dicks-last-resort-pig...,346,"[{'alias': 'tradamerican', 'title': 'American ...",3.0,"{'latitude': 35.80361471589055, 'longitude': -...","[pickup, delivery]",$$,"{'address1': '131 Island Dr', 'address2': 'Ste...",18652801744,(865) 280-1744,36139.332488
233,615r0Oxf_2UVKgT4N5qpXg,golden-corral-buffet-and-grill-sevierville,Golden Corral Buffet & Grill,https://s3-media4.fl.yelpcdn.com/bphoto/pdyxIH...,False,https://www.yelp.com/biz/golden-corral-buffet-...,34,"[{'alias': 'tradamerican', 'title': 'American ...",3.5,"{'latitude': 35.875951, 'longitude': -83.5704231}","[pickup, delivery]",$$,"{'address1': '513 Winfield Dunn Pkwy', 'addres...",18654538859,(865) 453-8859,33149.187774


In [16]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234 entries, 0 to 233
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             234 non-null    object 
 1   alias          234 non-null    object 
 2   name           234 non-null    object 
 3   image_url      234 non-null    object 
 4   is_closed      234 non-null    bool   
 5   url            234 non-null    object 
 6   review_count   234 non-null    int64  
 7   categories     234 non-null    object 
 8   rating         234 non-null    float64
 9   coordinates    234 non-null    object 
 10  transactions   234 non-null    object 
 11  price          187 non-null    object 
 12  location       234 non-null    object 
 13  phone          234 non-null    object 
 14  display_phone  234 non-null    object 
 15  distance       234 non-null    float64
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 27.8+ KB


In [None]:
## Drop duplicate ids and confirm there are no more duplicates

final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

In [None]:
# Save the final results to a compressed csv

final_df.to_csv('Data/results_in_progress_Knox_Asian.csv.gz', compression='gzip',index=False)
