In [26]:
import requests
import pandas as pd
from yelp_secret_key import api_key, api_key_secret

In [37]:
url = 'https://api.yelp.com/v3/businesses/search'
headers = {
    'Authorization': 'Bearer ' + api_key_secret,
}

# Define the search parameters
params = {
    'term': 'mexican',
    'location': 'San Diego',
    'limit': 20, # retrieve up to 20 results per request
}


# Initialize an empty DataFrame to store restaurant data
df = pd.DataFrame(columns=['Name', 'Rating', 'Address', 'Price', 'ID'])

# Loop through the requests until we reach 500 restaurants
offset = 0
while offset < 500:
    params['offset'] = offset # set the offset parameter for the current request
    response = requests.get(url, headers=headers, params=params)
    data = response.json()
    for business in data['businesses']:
        df = df.append({
            'Name': business['name'],
            'Rating': business['rating'],
            'Address': business['location']['address1'],
            'Price': business.get('price', 'N/A'),
            'ID': business['id']
        }, ignore_index=True)
    offset += 20 # increment the offset by 20 for the next request

# Save the DataFrame as a CSV file
df.to_csv('mexican_restaurants.csv', index=False)

In [38]:
df

Unnamed: 0,Name,Rating,Address,Price,ID
0,Mike's Red Tacos,4.5,4310 Genesee Ave,$$,WoQJC9s_Hgj2IUXF7qQxqQ
1,Cocina 35,4.5,1435 6th Ave,$$,fMWoLTioV_ttwKX90bSL1Q
2,La Puerta,4.0,560 4th Ave,$$,mzLGnmpwtrIgUeplJtmOOw
3,Lolita's Mexican Food,4.0,7305 Clairemont Mesa Blvd,$,8gtS-ugNXvM90hZgqjIWiw
4,California Burritos,4.5,7754 Starling Dr,$,xsm2uqj58l8QTqHxRgbMHQ
...,...,...,...,...,...
495,Acapulco Bay Seafood,4.0,3233 Mission Blvd,$$,ZSxrKN5jdbgCvw5jfroHUA
496,Tom Ham's Lighthouse,4.0,2150 Harbor Island Dr,$$$,3YhbkLhemwZLIwkoygSD1A
497,Tacos El G,3.5,1940 Highland Ave,$,GUt47DeMiHd_jBNm1sLjOA
498,Grand Ole Bbq Y Asado,4.5,3302 32nd St,$$,nWq7Sdd-fQId7S9UH6BBWA


In [50]:
# Get the Yelp ID for the first restaurant in the DataFrame
restaurant_id = df.loc[0, 'ID']

# Make an API call to get the reviews for the restaurant
reviews_url = f'https://api.yelp.com/v3/businesses/{restaurant_id}/reviews'
reviews_response = requests.get(reviews_url, headers=headers)

# Parse the JSON response to get the review text and rating for each review
reviews_data = reviews_response.json()
for review in reviews_data['reviews']:
    review_text = review['text']
    review_rating = review['rating']
    print(review_rating, review_text)


KeyError: 'reviews'

# pull reviews

In [49]:
# Load the restaurant data from a CSV file
df = pd.read_csv('mexican_restaurants.csv')

# Create an empty list to store the reviews for each restaurant
reviews_list = []

# Loop over each restaurant in the dataframe
for index, row in df.iterrows():
    # Get the Yelp ID for the current restaurant
    restaurant_id = row['ID']
    
    # Set up variables for pagination
    offset = 0
    limit = 20  # limit set to 20 to avoid hitting Yelp API rate limit
    total_reviews = []
    
    # Loop until we have retrieved 100 reviews for this restaurant
    while True:
        # Make an API call to get the next page of reviews
        reviews_url = f'https://api.yelp.com/v3/businesses/{restaurant_id}/reviews'
        params = {
            'offset': offset,
            'limit': limit,
        }
        response = requests.get(reviews_url, headers=headers, params=params)
        data = response.json()
        
        # Check if there are any more reviews available for this restaurant
        if not data['reviews']:
            break
        
        # Extract the text and rating fields from each review and add them to our list
        for review in data['reviews']:
            reviews_list.append({
                'Text': review['text'],
                'Rating': review['rating']
            })
            
            # Stop if we have retrieved 100 reviews for this restaurant
            if len(reviews_list) >= 100:
                break
        
        # Update the pagination variables
        offset += limit
        
        # Stop if we have retrieved 100 reviews for this restaurant
        if len(reviews_list) >= 100:
            break
    
    # Add the reviews as a new column to the dataframe
    df.loc[index, 'Reviews'] = str(reviews_list)
    
    # Clear the reviews list for the next restaurant
    reviews_list = []

# Save the updated dataframe to a new CSV file
df.to_csv('mexican_restaurants_w_reviews.csv', index=False)





KeyError: 'reviews'

If Yelp limits the number of reviews per call to 3, and you want to retrieve 100 reviews for each of the 500 restaurants, you would need to make 34 API calls per restaurant. Here's the math:

100 reviews per restaurant / 3 reviews per call = 33.33 calls per restaurant
Round up to 34 calls per restaurant
34 calls per restaurant * 500 restaurants = 17,000 API calls
So you would need to make a total of 17,000 API calls to retrieve 100 reviews for each of the 500 restaurants. Keep in mind that Yelp has a daily API call limit, so you may need to spread out the API calls over multiple days or increase your daily API call limit with Yelp.