In [3]:
#Dependencies 
import requests
import json
import pandas as pd
from api_key import yelp_api_key, mapbox_api_key

from pprint import pprint

In [3]:
# Yelp API 

# Yelp API base URL for searching businesses
yelp_url = "https://api.yelp.com/v3/businesses/search"

# Location and other parameters
location = "san francisco"
term = "restaurants"  
limit = 5 # Maximum results per request (max 5)

# Your Yelp API key
yelp_api_key = ""

# Define the headers to include the API key
headers = {
    "Authorization": f"Bearer {yelp_api_key}"
}

# Define the parameters for the API call
yelp_params = {
    "location": location,
    "term": term,
    "limit": limit
}

# Make the GET request to the Yelp API
yelp_response = requests.get(yelp_url, headers=headers, params=yelp_params)

# Check if the request was successful (status code 200)
if yelp_response.status_code == 200:
    yelp_businesses = yelp_response.json()  # Convert the response to JSON format
    pprint(yelp_businesses)  # Pretty print the businesses data
else:
    print(f"Error: {yelp_response.status_code} - {yelp_response.text}")


{'businesses': [{'alias': 'bottega-san-francisco-2',
                 'attributes': {'business_temp_closed': None,
                                'menu_url': 'https://www.bottegavalencia.com/_files/ugd/936480_cccea938a0a94069a27dbb142fca3f00.pdf',
                                'open24_hours': None,
                                'waitlist_reservation': True},
                 'business_hours': [{'hours_type': 'REGULAR',
                                     'is_open_now': True,
                                     'open': [{'day': 0,
                                               'end': '2300',
                                               'is_overnight': False,
                                               'start': '1100'},
                                              {'day': 1,
                                               'end': '2300',
                                               'is_overnight': False,
                                               'start': '1100'},
      

Extracting and Transforming Yelp Data

In [10]:
# WalkScore API 
# Check if the Yelp request was successful
if yelp_response.status_code == 200:
    yelp_data_json = yelp_response.json()  # Convert the response to JSON format
    businesses = yelp_data_json.get("businesses", [])  # Extract the businesses from the response
    
    yelp_data = [] 

    # Loop over the Yelp businesses and call WalkScore API for each business
    for business in businesses:
        business_name = business["name"]
        business_id = business["id"]
        latitude = business["coordinates"]["latitude"]
        longitude = business["coordinates"]["longitude"]
        address = ", ".join(business["location"]["display_address"])
        rating = business["rating"]
        review_count = business["review_count"]
        price = business.get("price", "N/A")  # Some businesses may not have a price listed
        bus_categories = ", ".join([category["title"] for category in business["categories"]]),
        transactions = business["transactions"]
        
        yelp_data.append({
            "Name" : business_name,
            "Business_ID": business_id,
            "Latitude": latitude, 
            "Longitude": longitude, 
            "Address": address,
            "Rating": rating,
            "Review_count": review_count,
            "Price": price, 
            "Categories":bus_categories,
            "Transactions": transactions
        })
        
        # # WalkScore API base URL
        # walkscore_url = "http://api.walkscore.com/score"

        # # Define the parameters for the WalkScore API call
        # walkscore_params = {
        #     "lat": latitude,
        #     "lon": longitude,
        #     "address": address,
        #     "wsapikey": walkscore_api_key
        # }

        # # Make the GET request to the WalkScore API
        # walkscore_response = requests.get(walkscore_url, params=walkscore_params)

        # # Check if the WalkScore request was successful
        # if walkscore_response.status_code == 200:
        #     walkscore_data = walkscore_response.json()

        #     # Extract the Walk Score and Transit Score
        #     walk_score = walkscore_data.get("walkscore")
        #     transit_score = walkscore_data.get("transit", {}).get("score", "N/A")

#             # Print the results for the business
#             print(f"Business: {name}")
#             print(f"Address: {address}")
#             print(f"Rating: {rating}")
#             print(f"Review Count: {review_count}")
#             print(f"Price: {price}")
#             print(f"Categories: {categories}")
#             print(f"Walk Score: {walk_score}")
#             print(f"Transit Score: {transit_score}")
#             print("-" * 40)
#         else:
#             print(f"Error retrieving WalkScore for {name}: {walkscore_response.status_code}")
# else:
#     print(f"Error retrieving Yelp data: {yelp_response.status_code}")


yelp_pd = pd.DataFrame(yelp_data)
yelp_pd

Unnamed: 0,Name,Business_ID,Latitude,Longitude,Address,Rating,Review_count,Price,Categories,Transactions
0,Bottega,QueFVMcMlT-6aZFv2M47mg,37.75472,-122.4212,"1132 Valencia St, San Francisco, CA 94158",4.3,1202,$$,"(Italian, Pasta Shops, Pizza,)","[delivery, pickup]"


In [11]:
yelp_pd.dtypes

Name             object
Business_ID      object
Latitude        float64
Longitude       float64
Address          object
Rating          float64
Review_count      int64
Price            object
Categories       object
Transactions     object
dtype: object

In [None]:
# Take ZIP codes out of address 
# Make Lat and Long to coordinates 
# Bin by price ($-$$$)
# Bin by Review Count ()
# Bin by rating (1 - 5)
# convert transactions to two boolean columns 
# convert to CSV 

Pulling Categories from Yelp 

In [8]:
import requests
from bs4 import BeautifulSoup
import pprint

# URL of the Yelp categories page
url = "https://blog.yelp.com/businesses/yelp_category_list/#h-the-yelp-category-list"

# Make the GET request to fetch the page content
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the page content using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find the section of the page that contains the categories
    # This part may require adjustment depending on the page structure
    categories_section = soup.find_all('ul')  # Find all <ul> (unordered lists) which contain categories

    # Initialize a list to store restaurant-related categories
    restaurant_categories = []

    # Loop through all <ul> lists and extract <li> items
    for ul in categories_section:
        for li in ul.find_all('li'):
            category = li.text.strip()
            
            # Filter categories that contain "restaurant" in the name (you can customize this filter)
            if "restaurant" in category.lower():
                restaurant_categories.append(category)

    # Pretty print the filtered restaurant categories
    pprint.pprint(restaurant_categories)
else:
    print(f"Error: {response.status_code} - {response.text}")

Error: 403 - <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<HTML><HEAD><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
<TITLE>ERROR: The request could not be satisfied</TITLE>
</HEAD><BODY>
<H1>403 ERROR</H1>
<H2>The request could not be satisfied.</H2>
<HR noshade size="1px">
Request blocked.
We can't connect to the server for this app or website at this time. There might be too much traffic or a configuration error. Try again later, or contact the app or website owner.
<BR clear="all">
If you provide content to customers through CloudFront, you can find steps to troubleshoot and help prevent this error by reviewing the CloudFront documentation.
<BR clear="all">
<HR noshade size="1px">
<PRE>
Generated by cloudfront (CloudFront)
Request ID: 9_b2CekGIpXfgOlZmcfJLWcf00RBBlUKyWLCrcmtpDNSasFwWbCaJQ==
</PRE>
<ADDRESS>
</ADDRESS>
</BODY></HTML>


MapBox API Call and Directions Estimate Loop 

In [5]:
#MapBox API 


#Loop for driving
#base url for mapbox 
mapbox_url = "https://api.mapbox.com/directions/v5/"
st_coordinates = (37.787937, -122.407677) #Starting coordinates in Union Square, SF 
method = "driving"
driving_data = []

for index, row in yelp_data.iterrows():
    bs_coordinates = (row['Longitude'], row['Latitude'])
    
    coordinates = f"{st_coordinates[1]},{st_coordinates[0]};{bs_coordinates[0]},{bs_coordinates[1]}"
    
    query_url = f"{mapbox_url}{method}/{coordinates}.json"

    #Mapbox paramaters 
    driving_params = {
        "access_token" : mapbox_api_key,
        "overview" : "full",
        "annotations": "distance, duration"
    }
    
    #Get request for MapBox API 
    mapbox_response = requests.get(query_url, params = driving_params)
         
    #if successful, extract duration and distance 
    if mapbox_response.status_code == 200: 
        mapbox_data = mapbox_response.json()

        if 'routes' in mapbox_data and len(mapbox_data['routes']) > 0:
            distance = mapbox_data['routes'][0]['distance']  # Distance in meters
            duration = mapbox_data['routes'][0]['duration']  # Duration in seconds
            route_id = mapbox_data['routes'][0]['uuid']  # route id
            
            driving_data.append({
                "route_id": route_id,
                "distance_meters": distance, 
                "duration_seconds" : duration,
                "bs_lat": bs_coordinates[0],
                "bs_lon": bs_coordinates[1]
            })
            
        else:
            print(f"Error with MapBox API for {row['name']}: {mapbox_response.status_code}")


NameError: name 'yelp_data' is not defined

In [None]:
# Does the accessibility of transportation affect restaurants differently depending on the type of cuisine they serve?




In [None]:
# How does transportation accessibility impact restaurants across different price brackets (high-end vs. budget-friendly)?


In [None]:
# Quantitatively, how does increasing the number of stops on a train or bus line by 5 affect the surrounding restaurant scene?
