In [1]:
# Import the requests library.
import requests as r
import pandas as pd
import numpy as np
from itertools import product

# Import the API key.
from config import Yelp_API_Key

In [2]:
# Define the API key, Endpoint, and Header
key = Yelp_API_Key
url = 'https://api.yelp.com/v3/businesses/search'
headers = {'Authorization':'bearer %s' % key}

In [10]:
# Read in the zip code csv file that we extracted from the HHI dataset
zips_data_df = pd.read_csv("data/HH/TX2_Zips_List.csv", index_col=None,dtype={"Zip_Code":"string"})

In [11]:
zips_data_df.head()

Unnamed: 0,State_ab,Zip_Code
0,TX,77095
1,TX,77096
2,TX,77098
3,TX,77099
4,TX,77235


In [4]:
# Scrape

# Include list of addresses and 100 results
addresses = zips_data_df
offset = np.arange(0,100,50)

# Create list of tuples of combinations between addresses and offset
tuples = list(product(addresses, offset))

In [5]:
# Use a for loop to store results into list of lists, and turn that into a DF
# Empty list
list = []

# Future DF columns
cols = ['Name', 'Type', 'Address', 'Zip Code']

# For Nested loops

# Outer loop retrieves data from Yelp
for index, row in addresses.iterrows():
    current_zip = row['Zip_Code']
    search_parameters = {
        'location': current_zip,
        'term': 'fast food',
        'radius': 5000,
        'limit': 50,
        'offset': index}
    resp = r.get(url, headers=headers, params=search_parameters)
    raw_data = resp.json()
# Displays each zip code as it is run through the API so that we can track progress and identify when/where a problem occurs
    print(current_zip)
    try:
        
#Inner loop appends the retrieved data to our list so that it's usable
         for business in raw_data['businesses']:
                name = business['name'],
                food_type = business['categories'][0]['alias']
                address = business['location']['address1'],
                zip_code = business['location']['zip_code'],
                list.append([name, food_type, address, zip_code])
    # Displays any errors that appear
    except KeyError as e:
        print (f"Caught an error at line {e}")
        pass
    
df = pd.DataFrame.from_records(list, index='Name', columns=cols)

# Counts the total number of records returned    
print(f'Total Records: {len(df)}')

df.head(2)

77095
77096
77098
77099
77235
77301
77302
77304
77306
77320
77327
77331
77332
77335
77338
77339
77345
77346
77350
77353
77356
77357
77358
77360
77372
77373
77375
77377
77378
77379
77380
77381
77382
77385
77386
77388
77389
77396
77401
77406
77412
77414
77417
77418
77419
77429
77435
77437
77441
77445
77447
77449
77450
77459
77461
77465
77466
77469
77471
77474
77477
77478
77479
77480
77482
77484
77486
77488
77489
77493
77494
77498
77501
77502
77503
77504
77506
77510
77511
77515
77518
77522
77530
77532
77533
77534
77535
77536
77539
77541
77545
77546
77547
77550
77551
77553
77563
77568
77571
77573
77575
77580
77583
77584
77586
77588
77590
77598
77611
77612
77613
77614
77619
77625
77627
77631
77632
77640
77642
77651
77655
77656
77657
77660
77662
77664
77665
77701
77703
77705
77706
77707
77708
77801
77802
77803
77807
77830
77833
77835
77836
77840
77842
77845
77856
77859
77864
77868
77879
77902
77954
77957
77963
77964
77975
77979
77983
77990
77994
77995
78003
78006
78011
78013
78014
78015
7801

Unnamed: 0_level_0,Type,Address,Zip Code
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"(Freddy's Frozen Custard & Steakburgers,)",hotdogs,"(12407 FM1960 W,)","(77065,)"
"(Simply Greek,)",greek,"(8475 North,)","(77095,)"


In [7]:
df.to_csv('data/Yelp_Results/State_TX2.csv', columns = ["Type", "Address", "Zip Code"], index_label= "Name", header = True)