Import required libraries


In [24]:
import os
import pandas as pd
import json
import requests
import folium
from tqdm import tqdm


#### Load stations dataframe


In [25]:
stations_df = pd.read_csv('../data/stations.csv')


# Foursquare


Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice.


### Foursquare iteration over bike tations


In [26]:
# Constants
FSQ_API_URL = "https://api.foursquare.com/v3/places/search"


def get_nearby_poi(latitude, longitude, radius=1000, open_now=True):
    """
    Retrieves nearby points of interest using Foursquare API.

    Args:
    latitude (float): Latitude of the location.
    longitude (float): Longitude of the location.
    radius (int, optional): Search radius in meters. Default is 1000.
    open_now (bool, optional): Whether to search only for places that are open now. Default is True.
    sort (str, optional): Sorting method of the results. Default is 'DISTANCE'.

    Returns:
    list: A list of dictionaries with POI information.

    Raises:
    Exception: If the API request fails.
    """
    params = {
        "radius": str(radius),
        "ll": f"{latitude},{longitude}",
        "open_now": "true" if open_now else "false"
    }

    # Securely load the API key
    api_key = os.getenv('FSQ_key')
    if not api_key:
        raise Exception("API key not found")

    headers = {
        "Accept": "application/json",
        "Authorization": api_key
    }

    try:
        response = requests.get(FSQ_API_URL, params=params, headers=headers)
        response.raise_for_status()
        return json.loads(response.text)
    except requests.RequestException as e:
        raise Exception(f"Error fetching data: {e}")

# Example usage
# nearby_pois = get_nearby_poi(40.7128, -74.0060)


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)


### Create a dataframe named 'df_fspoi' that consolidates details of both stations and points of interest. This dataframe includes columns for station name, latitude, longitude, and usage, as well as columns for the name, distance, rating, and address of each point of interest


In [27]:
def collect_nearby_poi_data(stations_df, max_responses=None):
    df_fspoi = pd.DataFrame(columns=[
                            'station', 'usage', 'total_bikes', 'poi_name', 'poi_distance', 'poi_address'])
    rows_to_append = []
    response_count = 0  # Keep track of the number of responses collected

    for index, station in tqdm(stations_df.iterrows(), total=len(stations_df), desc="Processing Stations"):
        myjson = get_nearby_poi(station['latitude'], station['longitude'])
        for business in myjson['results']:
            fs_info_data = {
                "name": business['name'],
                "distance": business['distance'],
                "address": business['location']['formatted_address']
            }
            row = [station['name'], station['usage_percentage'], station['total_bikes'], fs_info_data['name'],
                   fs_info_data['distance'], fs_info_data['address']]
            rows_to_append.append(row)
            response_count += 1

            if max_responses is not None and response_count >= max_responses:
                return pd.DataFrame(rows_to_append, columns=df_fspoi.columns)

    df_fspoi = pd.concat([df_fspoi, pd.DataFrame(
        rows_to_append, columns=df_fspoi.columns)], ignore_index=True)

    return df_fspoi


# Limit to 100 responses
df_fspoi = collect_nearby_poi_data(stations_df, max_responses=100)


Processing Stations:   1%|          | 9/1461 [00:07<21:11,  1.14it/s]


Put your parsed results into a DataFrame


In [28]:
df_fspoi


Unnamed: 0,station,usage,total_bikes,poi_name,poi_distance,poi_address
0,Benjamin Godard - Victor Hugo,0.942857,35,Square Lamartine,94,"3 Square Lamartine, 75016 Paris"
1,Benjamin Godard - Victor Hugo,0.942857,35,Axxia,185,"116 rue de la Faisanderie, 75116 Paris"
2,Benjamin Godard - Victor Hugo,0.942857,35,Place du Trocadéro,906,"19 place du Trocadéro et du Onze Novembre, 750..."
3,Benjamin Godard - Victor Hugo,0.942857,35,Bs Design,548,"16 rue Spontini, 75116 Paris"
4,Benjamin Godard - Victor Hugo,0.942857,35,Jardin du Ranelagh,968,"avenue du Ranelagh, 75016 Paris"
...,...,...,...,...,...,...
95,Jouffroy d'Abbans - Wagram,0.714286,35,Clinique Internationale du Parc Monceau,322,"21 rue de Chazelles, 21-23, 75017 Paris"
96,Jouffroy d'Abbans - Wagram,0.714286,35,Chirurgie de la Main et de l'épaule,336,"92 boulevard de Courcelles, 75017 Paris"
97,Jouffroy d'Abbans - Wagram,0.714286,35,Gazon Synthétique 24,527,"rue de Prony, 75017 Paris"
98,Jouffroy d'Abbans - Wagram,0.714286,35,Café Marion,829,"8 avenue de Friedland, 75008 Paris"


In [29]:
# I'm saving the dataframe to CSV for the next section
df_fspoi.to_csv('../data/fsq_poi.csv', index=False)


In [30]:
df_fspoi.shape


(100, 6)

In [31]:
df_fspoi.info


<bound method DataFrame.info of                           station     usage  total_bikes  \
0   Benjamin Godard - Victor Hugo  0.942857           35   
1   Benjamin Godard - Victor Hugo  0.942857           35   
2   Benjamin Godard - Victor Hugo  0.942857           35   
3   Benjamin Godard - Victor Hugo  0.942857           35   
4   Benjamin Godard - Victor Hugo  0.942857           35   
..                            ...       ...          ...   
95     Jouffroy d'Abbans - Wagram  0.714286           35   
96     Jouffroy d'Abbans - Wagram  0.714286           35   
97     Jouffroy d'Abbans - Wagram  0.714286           35   
98     Jouffroy d'Abbans - Wagram  0.714286           35   
99     Jouffroy d'Abbans - Wagram  0.714286           35   

                                   poi_name  poi_distance  \
0                          Square Lamartine            94   
1                                     Axxia           185   
2                        Place du Trocadéro           906   
3  

In [32]:
df_fspoi.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   station       100 non-null    object 
 1   usage         100 non-null    float64
 2   total_bikes   100 non-null    int64  
 3   poi_name      100 non-null    object 
 4   poi_distance  100 non-null    int64  
 5   poi_address   100 non-null    object 
dtypes: float64(1), int64(2), object(3)
memory usage: 4.8+ KB


Checking for Null Values


In [33]:
print(df_fspoi.isnull().sum())


station         0
usage           0
total_bikes     0
poi_name        0
poi_distance    0
poi_address     0
dtype: int64


# Yelp


Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice.


In [34]:
def get_nearby_yelp(latitude, longitude):

    # Define my API Key, My Endpoint, and My Header
    API_KEY = os.getenv('YELP_key')
    ENDPOINT = 'https://api.yelp.com/v3/businesses/search'
    HEADERS = {'Authorization': 'bearer %s' % API_KEY}

    # BUSINESS SEARCH PARAMETERS
    PARAMETERS = {'latitude': f"{latitude}",
                  'longitude': f"{longitude}",
                  'radius': 1000}

    # Make a request to the Yelp API
    response = requests.get(url=ENDPOINT,
                            params=PARAMETERS,
                            headers=HEADERS)

    yelp_data = response.json()

    return yelp_data


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)


In [35]:
from tqdm import tqdm

def collect_nearby_yelp_poi_data(stations_df, max_responses=None):
    columns = ['station', 'usage', 'total_bikes', 'poi_name', 'poi_distance', 'poi_address']
    collected_data = []

    for _, station in tqdm(stations_df.iterrows(), total=stations_df.shape[0], desc="Processing Stations"):
        try:
            yelp_response = get_nearby_yelp(station['latitude'], station['longitude'])

            if 'businesses' in yelp_response:
                for business in yelp_response['businesses']:
                    collected_data.append([
                        station['name'], 
                        station['usage_percentage'], 
                        station['total_bikes'], 
                        business['name'], 
                        business['distance'], 
                        business['location']['address1']
                    ])

                    if max_responses and len(collected_data) >= max_responses:
                        return pd.DataFrame(collected_data, columns=columns)

            # else: # Uncomment for warnings
            #     print(f"Warning: No 'businesses' key in response for station {station['name']}")

        except Exception as e:
            pass # Placeholder for handling exceptions, replace with logging if needed

    return pd.DataFrame(collected_data, columns=columns)

# Limit to 100 responses
df_yelp_poi = collect_nearby_yelp_poi_data(stations_df, max_responses=100)


Processing Stations:  15%|█▌        | 223/1461 [00:53<04:58,  4.15it/s]


KeyboardInterrupt: 

In [None]:
df_yelp_poi.shape


In [None]:
df_yelp_poi.info


### Data cleaning - checking for null values


In [None]:
print(df_yelp_poi.isnull().sum())


Put your parsed results into a DataFrame


In [None]:
df_yelp_poi


In [None]:
# Save dataframe to CSV
df_yelp_poi.to_csv('../data/yelp_poi.csv', index=False)


# Comparing Results


Yelp tend to be more generous. 


Get the top 10 restaurants according to their rating


In [None]:
# Define constants
YELP_API_KEY = os.getenv('YELP_key')
YELP_API_URL = "https://api.yelp.com/v3/businesses/search"
RESULTS_LIMIT = 20
RADIUS_METERS = 1000  # Set the radius to 1000 meters


def get_top_restaurants_and_poi(location, api_key):
    headers = {
        'Authorization': f'Bearer {api_key}'
    }

    # Define common parameters
    common_params = {
        'location': location,
        'limit': RESULTS_LIMIT,
        'open_now': True,
        'radius': RADIUS_METERS  # Add the 'radius' parameter with the specified value
    }

    # Define separate parameters for restaurants and POI
    restaurant_params = {
        'term': 'restaurants',
        **common_params
    }

    poi_params = {
        'term': 'points of interest',
        **common_params
    }

    try:
        # Retrieve top-rated restaurants
        response_restaurants = requests.get(
            YELP_API_URL, headers=headers, params=restaurant_params)
        response_restaurants.raise_for_status()

        data = response_restaurants.json()
        restaurants = data.get('businesses', [])

        # Sort restaurants by rating, descending
        top_restaurants = sorted(
            restaurants, key=lambda x: x.get('rating', 0), reverse=True)

        # Create a DataFrame for restaurants
        restaurant_df = pd.DataFrame(top_restaurants[:RESULTS_LIMIT])
        restaurant_df = restaurant_df[['name', 'rating', 'coordinates']]

        # Extract 'latitude' and 'longitude' from the 'coordinates' column
        restaurant_df['latitude'] = restaurant_df['coordinates'].apply(
            lambda x: x['latitude'])
        restaurant_df['longitude'] = restaurant_df['coordinates'].apply(
            lambda x: x['longitude'])

        # Save restaurant_df as CSV if needed
        # restaurant_csv_file_path = '../data/restaurant_data.csv'
        # restaurant_df.to_csv(restaurant_csv_file_path, index=False)

        # Retrieve top-rated POI
        response_poi = requests.get(
            YELP_API_URL, headers=headers, params=poi_params)
        response_poi.raise_for_status()

        data = response_poi.json()
        raw_poi = data.get('businesses', [])

        # Create a DataFrame for POI
        raw_poi_df = pd.DataFrame(raw_poi[:RESULTS_LIMIT])
        raw_poi_df = raw_poi_df[['name', 'rating', 'coordinates']]

        # Extract 'latitude' and 'longitude' from the 'coordinates' column
        raw_poi_df['latitude'] = raw_poi_df['coordinates'].apply(
            lambda x: x['latitude'])
        raw_poi_df['longitude'] = raw_poi_df['coordinates'].apply(
            lambda x: x['longitude'])

        # Drop the 'coordinates' column from raw_poi_df
        raw_poi_df.drop('coordinates', axis=1, inplace=True)

        # Concatenate raw_poi_df and restaurant_df into poi_df
        poi_df = pd.concat([raw_poi_df, restaurant_df], ignore_index=True)

        # Save poi_df as CSV
        csv_file_path = '../data/poi_data.csv'
        poi_df.to_csv(csv_file_path, index=False)

        return restaurant_df  # Return restaurant_df

    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")


# Example usage
location = 'Paris'
# Assign the returned DataFrame to restaurant_df
restaurant_df = get_top_restaurants_and_poi(location, YELP_API_KEY)


In [None]:
# View the first 10 restaurants with only 'name' and 'rating' columns
print("Top 10 Restaurants:")
for idx, row in restaurant_df.head(10).iterrows():
    name = row['name']
    rating = row['rating']
    # Use str.ljust to left-align the numbers with a width of 2
    idx_str = str(idx + 1).ljust(2)
    print(f"{idx_str}. Name: {name}, Rating: {rating:.1f}")


Top 10 Restaurants:
1 . Name: Grand Hôtel du Palais Royal, Rating: 4.5
2 . Name: De Voltaire à Rousseau, Rating: 4.5
3 . Name: Grand Bay Café, Rating: 4.5
4 . Name: La Tour de Montlhéry ou chez Denise, Rating: 4.0
5 . Name: Le Terminus du Châtelet, Rating: 4.0
6 . Name: Au Pied de Cochon, Rating: 3.5
7 . Name: Le Tambour, Rating: 3.5
8 . Name: Chacha, Rating: 3.0
9 . Name: Le Départ Saint Michel, Rating: 3.0 10. Name: Le Buci, Rating: 3.0
