In [54]:
import requests
import time
from typing import List
import pandas as pd

In [94]:
def get_all_businesses(api_key: str, location: str, categories: str = 'restaurants', total_limit: int = 1000, pause_seconds: int=1):
    businesses = []
    limit = 50  # Max value allowed by Yelp
    offset = 0

    while True:
        url = 'https://api.yelp.com/v3/businesses/search'
        headers = {'Authorization': f'Bearer {api_key}'}
        params = {'location': location, 'categories': categories, 'limit': limit, 'offset': offset}

        response = requests.get(url, headers=headers, params=params)
        data = response.json()
        businesses.extend(data.get('businesses', []))

        # Break if no more businesses are returned or if we've reached the user-defined limit
        if not data.get('businesses') or len(businesses) >= total_limit:
            break

        offset += limit  # Prepare offset for next batch of results
        time.sleep(pause_seconds)

    return businesses[:total_limit]  # Return up to the total_limit of businesses

"""
def get_bad_reviews(api_key: str, business_ids: List[str], pause_seconds: int=1) -> List[dict]:
    '''Fetch bad reviews for each business ID, with a pause between requests.'''
    bad_reviews = []
    for business_id in business_ids:
        url = f'https://api.yelp.com/v3/businesses/{business_id}/reviews'
        headers = {'Authorization': f'Bearer {api_key}'}
        
        try:
            response = requests.get(url, headers=headers)
            response.raise_for_status()  # Raises an HTTPError for bad responses
            reviews = response.json().get('reviews', [])
            
            # Filter for 1-star reviews
            for review in reviews:
                if review['rating'] == 1:
                    bad_reviews.append({'business_id': business_id, 'review': review['text'], 'rating': review['rating']})
                    
            time.sleep(pause_seconds)  # Pause to respect rate limits
        except requests.exceptions.HTTPError as err:
            print(f"HTTP Error: {err}")
            continue  # Skip to the next business ID on error
        
    return bad_reviews
""";


def get_all_reviews(api_key: str, business_ids: List[str], pause_seconds: int=1) -> List[dict]:
    """Fetch all reviews for each business ID, with a pause between requests."""
    all_reviews = []
    for business_id in business_ids:
        url = f'https://api.yelp.com/v3/businesses/{business_id}/reviews'
        headers = {'Authorization': f'Bearer {api_key}'}
        
        try:
            response = requests.get(url, headers=headers)
            response.raise_for_status()  # Raises an HTTPError for bad responses
            reviews = response.json().get('reviews', [])
            
            # Filter for 1-star reviews
            for review in reviews:
                all_reviews.append({'business_id': business_id, 'review': review['text'], 'rating': review['rating']})
                    
            time.sleep(pause_seconds)  # Pause to respect rate limits
        except requests.exceptions.HTTPError as err:
            print(f"HTTP Error: {err}")
            continue  # Skip to the next business ID on error
        
    return all_reviews


In [49]:
# fill this term by your api
api_key = '************************************'
location = 'Manhattan'
categories = 'restaurants'


# Step 1: Get list of business IDs
business = get_all_businesses(api_key, location, categories, total_limit=1000, pause_seconds=1)

In [65]:
"""
import pickle

# Saving the object to disk
with open('yelp_business.pkl', 'wb') as file:
    pickle.dump(business, file)
""";

In [92]:
"""
with open('yelp_business.pkl', 'rb') as file:
    business = pickle.load(file)
""";


In [66]:
business_ids = []
for b in business:
    business_ids.append(b['id'])

In [68]:
len(business_ids)

1000

In [91]:
reviews = get_all_reviews(api_key, business_ids, pause_seconds=1.2)

HTTP Error: 429 Client Error: Too Many Requests for url: https://api.yelp.com/v3/businesses/-OixbLnFLCzQclxCSbUQ8w/reviews
HTTP Error: 429 Client Error: Too Many Requests for url: https://api.yelp.com/v3/businesses/l_RAv09V67j-Hm9dyZxWhw/reviews
HTTP Error: 429 Client Error: Too Many Requests for url: https://api.yelp.com/v3/businesses/SaYTVG7wHCMDBofHCgXPaA/reviews
HTTP Error: 429 Client Error: Too Many Requests for url: https://api.yelp.com/v3/businesses/VvsZAnEwU4c8Xkyrzx05Nw/reviews
HTTP Error: 429 Client Error: Too Many Requests for url: https://api.yelp.com/v3/businesses/A_YpTLbAlEqeLVSs9bxbEA/reviews
HTTP Error: 429 Client Error: Too Many Requests for url: https://api.yelp.com/v3/businesses/OFfZUS-nu6NDnsWe1B2bUQ/reviews
HTTP Error: 429 Client Error: Too Many Requests for url: https://api.yelp.com/v3/businesses/z5hRX3iJ5Ty_S38iG_WY3Q/reviews


KeyboardInterrupt: 

In [76]:
len(reviews)

35

In [80]:
pd.DataFrame(reviews).to_csv('test_review.csv', index=False)