In [1]:
# imports
import requests
import pandas as pd
import os

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [13]:
import os
import requests
import pandas as pd

# Load bike stations data
bike_stations = pd.read_csv('../data/bike_stations_madrid.csv')

url = "https://api.foursquare.com/v3/places/search"
headers = {
    'Accept': 'application/json',
    'Authorization': os.getenv('FOURSQUARE_KEY')  # Your Foursquare API key from the environment variable
}

# Function to request Foursquare places for a given bike station
def fetch_pois_foursquare(item, index):
    params = {
        'll': f"{item['latitude']},{item['longitude']}",  # Latitude and longitude from the row
        'radius': 1000, 
        'query': 'restaurant' 
    }
    
    # Print the status for each row before making the request
    print(f"Processing row {index + 1}/{len(bike_stations)}: Station {item['name']} at ({item['latitude']}, {item['longitude']})")
    
    # Make the API request
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code == 200:
        results = response.json().get('results', [])
        return pd.Series([item['name'], results])
    else:
        return pd.Series([item['name'], f"Error: {response.status_code}"])

# Apply the function to all rows of the DataFrame, passing the index using a modified approach
foursquare_places = bike_stations.apply(lambda row: fetch_pois_foursquare(row, row.name), axis=1, result_type="expand")

# Rename columns for clarity
foursquare_places.columns = ['Station Name', 'Foursquare Results']

# Display the DataFrame (optional)
print(foursquare_places.head())


Processing row 1/610: Station 377 - Metro Abrantes at (40.38091762, -3.72783615)
Processing row 2/610: Station 507 - Seis - Sexta at (40.44845033, -3.5965029500000005)
Processing row 3/610: Station 192 - Avda. de los Toreros - Fco. Silvela at (40.43182646446866, -3.671467718385311)
Processing row 4/610: Station 333 - Illescas - Camarena at (40.392175, -3.7562323)
Processing row 5/610: Station 3 - Plaza Conde Suchil at (40.4303223, -3.7072537)
Processing row 6/610: Station 151 - Orense 12 at (40.44881224951987, -3.695359265012366)
Processing row 7/610: Station 574 - Chantada at (40.472695, -3.710678)
Processing row 8/610: Station 267 - Paseo de la Chopera - Fernando Poo at (40.39500000000003, -3.700399664723874)
Processing row 9/610: Station 391 - Junta Municipal Distrito de Usera at (40.3817026328731, -3.711296220901499)
Processing row 10/610: Station 434 - Charleroi - Cocherón de la Villa at (40.38191654, -3.632193720000001)
Processing row 11/610: Station 560 - Playa de Zarauz - Playa

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [18]:
# Initialize a list to store all restaurant details in one go
restaurant_details = []

# Function to extract details from Foursquare results
def extract_details(row):
    # Check if 'Foursquare Results' is a list and not a string
    if isinstance(row['Foursquare Results'], list):
        # Iterate over the restaurants in the Foursquare results
        for restaurant in row['Foursquare Results']:
            restaurant_info = {
                'station_name': row['Station Name'],
                'restaurant_name': restaurant.get('name', 'No name'),
                'distance': restaurant.get('distance', 'No distance'),
                'address': restaurant.get('location', {}).get('formatted_address', 'No address'),
                'categories': ', '.join([category.get('name', 'No category') for category in restaurant.get('categories', [])])
            }
            restaurant_details.append(restaurant_info)
    else:
        # If 'Foursquare Results' is not a list, you can print the row to debug
        print(f"Skipping row {row['Station Name']} due to invalid data: {row['Foursquare Results']}")

# Extract details for all rows in one go
foursquare_places.apply(extract_details, axis=1)



Skipping row 526 - Plaza del Liceo due to invalid data: Error: 502
Skipping row 137 - Plaza de la República Argentina due to invalid data: Error: 502


0      None
1      None
2      None
3      None
4      None
       ... 
605    None
606    None
607    None
608    None
609    None
Length: 610, dtype: object

Put your parsed results into a DataFrame

In [33]:
# Convert the list of restaurant details into a DataFrame
foursquare_restaurants = pd.DataFrame(restaurant_details)

# Display the resulting DataFrame
print(foursquare_restaurants.head())

foursquare_restaurants.to_csv('../data/foursquare_restaurants.csv', index=False)

           station_name restaurant_name     distance     address  \
0  377 - Metro Abrantes       IGo Pasta  1465.968046  No address   
1  377 - Metro Abrantes     Botafumeiro   769.947456  No address   
2  377 - Metro Abrantes           Lurca    53.496441  No address   
3  377 - Metro Abrantes     Burger King    51.557548  No address   
4  377 - Metro Abrantes     Doña Patata   799.625475  No address   

                       categories  rating  review_count         phone  
0    Chinese, Asian Fusion, Ramen     3.8            23  +34917047808  
1  Tapas Bars, Dive Bars, Spanish     4.3             9  +34914727054  
2                     Restaurants     0.0             0  +34915650503  
3                        American     0.0             0  +34915650503  
4             Tapas Bars, Spanish     4.6             5  +34914727532  


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [6]:
import os
import requests
import pandas as pd

# NOTE ONLY DID 5 CALLS DUE TO YELP'S DAILY CALL LIMIT
print(os.getenv('YELP_KEY'))


# Load bike stations data
bike_stations = pd.read_csv('../data/bike_stations_madrid.csv')

# Yelp API URL and headers
url = "https://api.yelp.com/v3/businesses/search"
headers = {
    'Accept': 'application/json',
    'Authorization': f"Bearer {os.getenv('YELP_KEY')}"  # Use your Yelp API key from the environment variable
}

# Function to request Yelp places for a given bike station
def fetch_pois_yelp(item, index):
    params = {
        'latitude': item['latitude'],  # Latitude from the row
        'longitude': item['longitude'],  # Longitude from the row
        'radius': 1000, 
        'categories': 'restaurants'  # Only restaurants
    }
    
    # Print the status for each row before making the request
    print(f"Processing row {index + 1}/{80}: Station {item['name']} at ({item['latitude']}, {item['longitude']})")
    
    # Make the API request
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code == 200:
        results = response.json().get('businesses', [])
        return pd.Series([item['name'], results])
    else:
        return pd.Series([item['name'], f"Error: {response.status_code}"])

# Limit the number of bike stations to 5
bike_stations_subset = bike_stations.head(80)  # Get the first 5 stations

# Apply the function to the first 5 bike stations, passing the index
yelp_places = bike_stations_subset.apply(lambda row: fetch_pois_yelp(row, row.name), axis=1, result_type="expand")

# Rename columns for clarity
yelp_places.columns = ['Station Name', 'Yelp Results']

# Display the DataFrame (optional)
print(yelp_places.head())


Processing row 1/80: Station 377 - Metro Abrantes at (40.38091762, -3.72783615)
Processing row 2/80: Station 507 - Seis - Sexta at (40.44845033, -3.5965029500000005)
Processing row 3/80: Station 192 - Avda. de los Toreros - Fco. Silvela at (40.43182646446866, -3.671467718385311)
Processing row 4/80: Station 333 - Illescas - Camarena at (40.392175, -3.7562323)
Processing row 5/80: Station 3 - Plaza Conde Suchil at (40.4303223, -3.7072537)
Processing row 6/80: Station 151 - Orense 12 at (40.44881224951987, -3.695359265012366)
Processing row 7/80: Station 574 - Chantada at (40.472695, -3.710678)
Processing row 8/80: Station 267 - Paseo de la Chopera - Fernando Poo at (40.39500000000003, -3.700399664723874)
Processing row 9/80: Station 391 - Junta Municipal Distrito de Usera at (40.3817026328731, -3.711296220901499)
Processing row 10/80: Station 434 - Charleroi - Cocherón de la Villa at (40.38191654, -3.632193720000001)
Processing row 11/80: Station 560 - Playa de Zarauz - Playa de la Cale

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [7]:
print(yelp_places.columns)


Index(['Station Name', 'Yelp Results'], dtype='object')


In [9]:
# Check the columns of foursquare_places to make sure 'Yelp Results' exists

# Initialize a list to store all restaurant details in one go
restaurant_details = []

# Function to extract details from Yelp results
def extract_details_yelp(row):
    # Check if 'Yelp Results' is a list and not a string
    if isinstance(row.get('Yelp Results'), list):  # Use .get() to avoid KeyError if the column is missing
        # Iterate over the restaurants in the Yelp results
        for restaurant in row['Yelp Results']:
            restaurant_info = {
                'station_name': row['Station Name'],
                'restaurant_name': restaurant.get('name', 'No name'),
                'distance': restaurant.get('distance', 'No distance'),
                'address': ', '.join(restaurant.get('location', {}).get('address', ['No address'])),
                'categories': ', '.join([category.get('title', 'No category') for category in restaurant.get('categories', [])]),
                'rating': restaurant.get('rating', 'No rating'),
                'review_count': restaurant.get('review_count', 'No reviews'),
                'phone': restaurant.get('phone', 'No phone')            }
            restaurant_details.append(restaurant_info)
    else:
        # If 'Yelp Results' is not a list, print the row to debug
        print(f"Skipping row {row['Station Name']} due to invalid data: {row['Yelp Results']}")

# Extract details for all rows in one go
yelp_places.apply(extract_details_yelp, axis=1)

# Convert the list of restaurant details into a DataFrame
yelp_restaurants = pd.DataFrame(restaurant_details)

# Display the resulting DataFrame
print(yelp_restaurants.head())


Skipping row 377 - Metro Abrantes due to invalid data: Error: 400
Skipping row 507 - Seis - Sexta due to invalid data: Error: 400
Skipping row 192 - Avda. de los Toreros - Fco. Silvela due to invalid data: Error: 400
Skipping row 333 - Illescas - Camarena due to invalid data: Error: 400
Skipping row 3 - Plaza Conde Suchil due to invalid data: Error: 400
Skipping row 151 - Orense 12 due to invalid data: Error: 400
Skipping row 574 - Chantada due to invalid data: Error: 400
Skipping row 267 - Paseo de la Chopera - Fernando Poo due to invalid data: Error: 400
Skipping row 391 - Junta Municipal Distrito de Usera due to invalid data: Error: 400
Skipping row 434 - Charleroi - Cocherón de la Villa due to invalid data: Error: 400
Skipping row 560 - Playa de Zarauz - Playa de la Caleta due to invalid data: Error: 400
Skipping row 4 - Malasaña due to invalid data: Error: 400
Skipping row 164 - Paseo de las Delicias - Tomás Bretón due to invalid data: Error: 400
Skipping row 12 - San Hermenegildo

Put your parsed results into a DataFrame

In [5]:
yelp_restaurants = pd.DataFrame(yelp_restaurants)

yelp_restaurants.head()

yelp_restaurants.to_csv('../data/yelp_restaurants.csv', index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

The Yelp API provided us with more complete data. This is because they had a more robust API, they were able to suggest us more restaurants. Beyond that, the Yelp API also provided us with rating for the restaurant, which Foursquare did not. The Yelp API gives us more details from just one single call using the latitude and longitude, therefore giving us more complete data.

Get the top 10 restaurants according to their rating

In [31]:
# Convert the 'rating' column to numeric (ignoring errors for non-numeric entries)
yelp_restaurants['rating'] = pd.to_numeric(yelp_restaurants['rating'], errors='coerce')

# Sort the restaurants by rating in descending order and select top 10
top_10_restaurants = yelp_restaurants.sort_values(by='rating', ascending=False).head(10)

# Add a 'Rank' column with numbers from 1 to 10
top_10_restaurants['Rank'] = range(1, 11)

# Reorganize the columns to have 'Rank' first, then 'station_name', 'restaurant_name', and 'rating'
top_10_restaurants = top_10_restaurants[['Rank', 'station_name', 'restaurant_name', 'rating']]

# Display the top 10 restaurants with rank
print(top_10_restaurants)


    Rank                               station_name       restaurant_name  \
71     1                  333 - Illescas - Camarena        American Fries   
9      2                       377 - Metro Abrantes             El Cuadro   
18     3                       377 - Metro Abrantes         El Pollo Loko   
5      4                       377 - Metro Abrantes  Cafetería Bar Dayton   
88     5                     3 - Plaza Conde Suchil                Lakasa   
94     6                     3 - Plaza Conde Suchil              Angelita   
99     7                     3 - Plaza Conde Suchil                Bacira   
44     8  192 - Avda. de los Toreros - Fco. Silvela          La Tasqueria   
41     9  192 - Avda. de los Toreros - Fco. Silvela               Cañadío   
8     10                       377 - Metro Abrantes           La Piazzola   

    rating  
71     5.0  
9      5.0  
18     5.0  
5      5.0  
88     4.8  
94     4.8  
99     4.8  
44     4.8  
41     4.7  
8      4.7  
