Import required libraries


In [1]:
import os
import pandas as pd
import json
import requests
import folium
from tqdm import tqdm


#### Load stations dataframe


In [2]:
stations_df = pd.read_csv('../data/stations.csv')


# Foursquare


Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice.


### Foursquare iteration over bike tations


In [3]:
# Constants
FSQ_API_URL = "https://api.foursquare.com/v3/places/search"


def get_nearby_poi(latitude, longitude, radius=1000, open_now=True):
    """
    Retrieves nearby points of interest using Foursquare API.

    Args:
    latitude (float): Latitude of the location.
    longitude (float): Longitude of the location.
    radius (int, optional): Search radius in meters. Default is 1000.
    open_now (bool, optional): Whether to search only for places that are open now. Default is True.
    sort (str, optional): Sorting method of the results. Default is 'DISTANCE'.

    Returns:
    list: A list of dictionaries with POI information.

    Raises:
    Exception: If the API request fails.
    """
    params = {
        "radius": str(radius),
        "ll": f"{latitude},{longitude}",
        "open_now": "true" if open_now else "false"
    }

    # Securely load the API key
    api_key = os.getenv('FSQ_key')
    if not api_key:
        raise Exception("API key not found")

    headers = {
        "Accept": "application/json",
        "Authorization": api_key
    }

    try:
        response = requests.get(FSQ_API_URL, params=params, headers=headers)
        response.raise_for_status()
        return json.loads(response.text)
    except requests.RequestException as e:
        raise Exception(f"Error fetching data: {e}")

# Example usage
# nearby_pois = get_nearby_poi(40.7128, -74.0060)


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)


### Create a dataframe named 'df_fspoi' that consolidates details of both stations and points of interest. This dataframe includes columns for station name, latitude, longitude, and usage, as well as columns for the name, distance, rating, and address of each point of interest


In [4]:
def collect_nearby_poi_data(stations_df, max_responses=None):
    df_fspoi = pd.DataFrame(columns=[
                            'station', 'usage', 'total_bikes', 'poi_name', 'poi_distance', 'poi_address'])
    rows_to_append = []
    response_count = 0  # Keep track of the number of responses collected

    for index, station in tqdm(stations_df.iterrows(), total=len(stations_df), desc="Processing Stations"):
        myjson = get_nearby_poi(station['latitude'], station['longitude'])
        for business in myjson['results']:
            fs_info_data = {
                "name": business['name'],
                "distance": business['distance'],
                "address": business['location']['formatted_address']
            }
            row = [station['name'], station['usage_percentage'], station['total_bikes'], fs_info_data['name'],
                   fs_info_data['distance'], fs_info_data['address']]
            rows_to_append.append(row)
            response_count += 1

            if max_responses is not None and response_count >= max_responses:
                return pd.DataFrame(rows_to_append, columns=df_fspoi.columns)

    df_fspoi = pd.concat([df_fspoi, pd.DataFrame(
        rows_to_append, columns=df_fspoi.columns)], ignore_index=True)

    return df_fspoi


# Limit to 100 responses
df_fspoi = collect_nearby_poi_data(stations_df, max_responses=100)


Processing Stations:   1%|          | 9/1462 [00:11<30:32,  1.26s/it]


Put your parsed results into a DataFrame


In [5]:
df_fspoi


Unnamed: 0,station,usage,total_bikes,poi_name,poi_distance,poi_address
0,Benjamin Godard - Victor Hugo,0.914286,35,Square Lamartine,94,"3 Square Lamartine, 75016 Paris"
1,Benjamin Godard - Victor Hugo,0.914286,35,Boulangerie Schou,122,"96 rue de la Faisanderie (Rue Dufrénoy), 75016..."
2,Benjamin Godard - Victor Hugo,0.914286,35,Le Zinc du 16,246,"58 rue de la Faisanderie, 75116 Paris"
3,Benjamin Godard - Victor Hugo,0.914286,35,Laboratoire Cohen Mme Vincent M.C,52,"187 avenue Victor Hugo, 75116 Paris"
4,Benjamin Godard - Victor Hugo,0.914286,35,Tabac Lamartine,52,"178 avenue Victor Hugo, 75116 Paris"
...,...,...,...,...,...,...
95,Jouffroy d'Abbans - Wagram,0.815789,38,Le Central,144,"65 rue de Prony (Rue Jouffroy-d'Abbans), 75017..."
96,Jouffroy d'Abbans - Wagram,0.815789,38,Eyal LEVY,101,"4 rue Théodore de Banville, 75017 Paris"
97,Jouffroy d'Abbans - Wagram,0.815789,38,Clinique vétérinaire Wagranville,101,"4 rue Th\u00e9odore de Banville, 75017 Paris"
98,Jouffroy d'Abbans - Wagram,0.815789,38,Brasserie le Diplomate,367,"110 boulevard de Courcelles, 75017 Paris"


In [6]:
# I'm saving the dataframe to CSV for the next section
df_fspoi.to_csv('../data/fsq_poi.csv', index=False)


In [7]:
df_fspoi.shape


(100, 6)

In [8]:
df_fspoi.info


<bound method DataFrame.info of                           station     usage  total_bikes  \
0   Benjamin Godard - Victor Hugo  0.914286           35   
1   Benjamin Godard - Victor Hugo  0.914286           35   
2   Benjamin Godard - Victor Hugo  0.914286           35   
3   Benjamin Godard - Victor Hugo  0.914286           35   
4   Benjamin Godard - Victor Hugo  0.914286           35   
..                            ...       ...          ...   
95     Jouffroy d'Abbans - Wagram  0.815789           38   
96     Jouffroy d'Abbans - Wagram  0.815789           38   
97     Jouffroy d'Abbans - Wagram  0.815789           38   
98     Jouffroy d'Abbans - Wagram  0.815789           38   
99     Jouffroy d'Abbans - Wagram  0.815789           38   

                             poi_name  poi_distance  \
0                    Square Lamartine            94   
1                   Boulangerie Schou           122   
2                       Le Zinc du 16           246   
3   Laboratoire Cohen Mme V

In [9]:
df_fspoi.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   station       100 non-null    object 
 1   usage         100 non-null    float64
 2   total_bikes   100 non-null    int64  
 3   poi_name      100 non-null    object 
 4   poi_distance  100 non-null    int64  
 5   poi_address   100 non-null    object 
dtypes: float64(1), int64(2), object(3)
memory usage: 4.8+ KB


Checking for Null Values


In [10]:
print(df_fspoi.isnull().sum())


station         0
usage           0
total_bikes     0
poi_name        0
poi_distance    0
poi_address     0
dtype: int64


# Yelp


Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice.


In [11]:
def get_nearby_yelp(latitude, longitude):

    # Define my API Key, My Endpoint, and My Header
    API_KEY = os.getenv('YELP_key')
    ENDPOINT = 'https://api.yelp.com/v3/businesses/search'
    HEADERS = {'Authorization': 'bearer %s' % API_KEY}

    # BUSINESS SEARCH PARAMETERS
    PARAMETERS = {'latitude': f"{latitude}",
                  'longitude': f"{longitude}",
                  'radius': 1000}

    # Make a request to the Yelp API
    response = requests.get(url=ENDPOINT,
                            params=PARAMETERS,
                            headers=HEADERS)

    yelp_data = response.json()

    return yelp_data


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)


In [12]:
def collect_nearby_yelp_poi_data(stations_df, max_responses=None):
    columns = ['station', 'usage', 'total_bikes', 'poi_name', 'poi_distance', 'poi_address']
    collected_data = []

    for _, station in tqdm(stations_df.iterrows(), total=stations_df.shape[0], desc="Processing Stations"):
        try:
            yelp_response = get_nearby_yelp(station['latitude'], station['longitude'])

            if 'businesses' in yelp_response:
                for business in yelp_response['businesses']:
                    collected_data.append([
                        station['name'], 
                        station['usage_percentage'], 
                        station['total_bikes'], 
                        business['name'], 
                        business['distance'], 
                        business['location']['address1']
                    ])

                    if max_responses and len(collected_data) >= max_responses:
                        return pd.DataFrame(collected_data, columns=columns)
            else:
                print(f"Warning: No 'businesses' key in response for station {station['name']}")

    return pd.DataFrame(collected_data, columns=columns)

# Limit to 100 responses
df_yelp_poi = collect_nearby_yelp_poi_data(stations_df, max_responses=100)



Processing Stations:   0%|          | 1/1462 [00:00<10:48,  2.25it/s]

{'businesses': [{'id': 'WHHt_Jb8Tgidn9mW7oDnIg', 'alias': 'la-coïncidence-paris-4', 'name': 'La Coïncidence', 'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/JIS7KFrJpPLjxAT-Zpe4Lg/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/la-co%C3%AFncidence-paris-4?adjust_creative=v58aGL3xcKISGlGiUDxxtg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=v58aGL3xcKISGlGiUDxxtg', 'review_count': 509, 'categories': [{'alias': 'french', 'title': 'French'}], 'rating': 4.5, 'coordinates': {'latitude': 48.868105, 'longitude': 2.284365}, 'transactions': [], 'price': '€€', 'location': {'address1': '15 rue Mesnil', 'address2': '', 'address3': '', 'city': 'Paris', 'zip_code': '75116', 'country': 'FR', 'state': '75', 'display_address': ['15 rue Mesnil', '75116 Paris', 'France']}, 'phone': '+33147559644', 'display_phone': '+33 1 47 55 96 44', 'distance': 674.5838847008364}, {'id': '9Dg5IhmhqeQhriJfMeNZ3g', 'alias': 'le-poincaré-paris-2', 'name': 'Le Poincaré', 'image_ur

Processing Stations:   0%|          | 2/1462 [00:00<11:43,  2.08it/s]

{'businesses': [{'id': 'cEjF41ZQB8-SST8cd3EsEw', 'alias': 'l-avant-comptoir-paris-3', 'name': "L'Avant Comptoir", 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/V38oU4jfwMlY8Ue3vb-dBQ/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/l-avant-comptoir-paris-3?adjust_creative=v58aGL3xcKISGlGiUDxxtg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=v58aGL3xcKISGlGiUDxxtg', 'review_count': 657, 'categories': [{'alias': 'tapas', 'title': 'Tapas Bars'}, {'alias': 'wine_bars', 'title': 'Wine Bars'}], 'rating': 4.5, 'coordinates': {'latitude': 48.85202, 'longitude': 2.3388}, 'transactions': [], 'price': '€€', 'location': {'address1': "3 carrefour de l'Odéon", 'address2': '', 'address3': '', 'city': 'Paris', 'zip_code': '75006', 'country': 'FR', 'state': '75', 'display_address': ["3 carrefour de l'Odéon", '75006 Paris', 'France']}, 'phone': '+33142384755', 'display_phone': '+33 1 42 38 47 55', 'distance': 194.1463062267755}, {'id': 'n-XIbgR-Gwb1CqBMyjwj0w',

Processing Stations:   0%|          | 3/1462 [00:01<12:01,  2.02it/s]

{'businesses': [{'id': 'Aq_Bik-n7vJv4IyMPbSo9g', 'alias': 'l-aller-retour-paris', 'name': "L'Aller Retour", 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/VMZ_47Bagj1AgYnVpfvwCQ/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/l-aller-retour-paris?adjust_creative=v58aGL3xcKISGlGiUDxxtg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=v58aGL3xcKISGlGiUDxxtg', 'review_count': 81, 'categories': [{'alias': 'wine_bars', 'title': 'Wine Bars'}, {'alias': 'steak', 'title': 'Steakhouses'}], 'rating': 4.5, 'coordinates': {'latitude': 48.865353, 'longitude': 2.363151}, 'transactions': [], 'price': '€€€', 'location': {'address1': '5 rue Charles François Dupuis', 'address2': '', 'address3': '', 'city': 'Paris', 'zip_code': '75003', 'country': 'FR', 'state': '75', 'display_address': ['5 rue Charles François Dupuis', '75003 Paris', 'France']}, 'phone': '+33142780121', 'display_phone': '+33 1 42 78 01 21', 'distance': 307.92717701385504}, {'id': '2b1VL9X67L0T68g

Processing Stations:   0%|          | 4/1462 [00:01<11:24,  2.13it/s]

{'businesses': [{'id': '-0iLH7iQNYtoURciDpJf6w', 'alias': 'le-comptoir-de-la-gastronomie-paris', 'name': 'Le Comptoir de la Gastronomie', 'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/xT4YkCm_cZWbKbz9AVEnaA/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/le-comptoir-de-la-gastronomie-paris?adjust_creative=v58aGL3xcKISGlGiUDxxtg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=v58aGL3xcKISGlGiUDxxtg', 'review_count': 1299, 'categories': [{'alias': 'french', 'title': 'French'}], 'rating': 4.5, 'coordinates': {'latitude': 48.8645157999652, 'longitude': 2.34540185646608}, 'transactions': [], 'price': '€€', 'location': {'address1': '34 rue Montmartre', 'address2': '', 'address3': '', 'city': 'Paris', 'zip_code': '75001', 'country': 'FR', 'state': '75', 'display_address': ['34 rue Montmartre', '75001 Paris', 'France']}, 'phone': '+33142333132', 'display_phone': '+33 1 42 33 31 32', 'distance': 1183.0595587559897}, {'id': 'ctP4c3mwVO5oOzLI48LtuQ', 'al

Processing Stations:   0%|          | 4/1462 [00:02<14:20,  1.69it/s]

{'businesses': [{'id': 'ZpVf9wbJMKAogWgJhcMqZg', 'alias': 'pink-mamma-paris', 'name': 'Pink Mamma', 'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/okfnivvKXKSDrtY4qUqoWg/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/pink-mamma-paris?adjust_creative=v58aGL3xcKISGlGiUDxxtg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=v58aGL3xcKISGlGiUDxxtg', 'review_count': 408, 'categories': [{'alias': 'steak', 'title': 'Steakhouses'}, {'alias': 'bbq', 'title': 'Barbeque'}, {'alias': 'italian', 'title': 'Italian'}], 'rating': 4.0, 'coordinates': {'latitude': 48.88187, 'longitude': 2.3346}, 'transactions': [], 'price': '€€€', 'location': {'address1': '20 bis rue de Douai', 'address2': '', 'address3': None, 'city': 'Paris', 'zip_code': '75009', 'country': 'FR', 'state': '75', 'display_address': ['20 bis rue de Douai', '75009 Paris', 'France']}, 'phone': '+33983559452', 'display_phone': '+33 9 83 55 94 52', 'distance': 358.534355262584}, {'id': 'EbdyO1nigcLi2E




In [13]:
df_yelp_poi.shape


(100, 6)

In [14]:
df_yelp_poi.info


<bound method DataFrame.info of                           station     usage  total_bikes  \
0   Benjamin Godard - Victor Hugo  0.914286           35   
1   Benjamin Godard - Victor Hugo  0.914286           35   
2   Benjamin Godard - Victor Hugo  0.914286           35   
3   Benjamin Godard - Victor Hugo  0.914286           35   
4   Benjamin Godard - Victor Hugo  0.914286           35   
..                            ...       ...          ...   
95             Toudouze - Clauzel  0.800000           20   
96             Toudouze - Clauzel  0.800000           20   
97             Toudouze - Clauzel  0.800000           20   
98             Toudouze - Clauzel  0.800000           20   
99             Toudouze - Clauzel  0.800000           20   

                              poi_name  poi_distance  \
0                       La Coïncidence    674.583885   
1                          Le Poincaré    796.383746   
2                           L'Astrance   1193.905748   
3   Crêperie Framboise 

### Data cleaning - checking for null values


In [15]:
print(df_yelp_poi.isnull().sum())


station         0
usage           0
total_bikes     0
poi_name        0
poi_distance    0
poi_address     0
dtype: int64


Put your parsed results into a DataFrame


In [16]:
df_yelp_poi


Unnamed: 0,station,usage,total_bikes,poi_name,poi_distance,poi_address
0,Benjamin Godard - Victor Hugo,0.914286,35,La Coïncidence,674.583885,15 rue Mesnil
1,Benjamin Godard - Victor Hugo,0.914286,35,Le Poincaré,796.383746,22 avenue Raymond Poincaré
2,Benjamin Godard - Victor Hugo,0.914286,35,L'Astrance,1193.905748,4 rue Beethoven
3,Benjamin Godard - Victor Hugo,0.914286,35,Crêperie Framboise Passy-Trocadéro,1046.423395,10 bd Delessert
4,Benjamin Godard - Victor Hugo,0.914286,35,Sgabetti,581.102661,15 rue Sablons
...,...,...,...,...,...,...
95,Toudouze - Clauzel,0.800000,20,Clasico Argentino,122.682395,22 rue Henry Monnier
96,Toudouze - Clauzel,0.800000,20,The Hardware Société,997.498508,10 rue Lamarck
97,Toudouze - Clauzel,0.800000,20,Sizin,176.359336,47 rue Saint-Georges
98,Toudouze - Clauzel,0.800000,20,Pi Hour,189.153410,32 rue de la Rochefoucauld


In [17]:
# Save dataframe to CSV
df_yelp_poi.to_csv('../data/yelp_poi.csv', index=False)


# Comparing Results


Yelp tend to be more generous. 


Get the top 10 restaurants according to their rating


In [18]:
# Define constants
YELP_API_KEY = os.getenv('YELP_key')
YELP_API_URL = "https://api.yelp.com/v3/businesses/search"
RESULTS_LIMIT = 20
RADIUS_METERS = 1000  # Set the radius to 1000 meters


def get_top_restaurants_and_poi(location, api_key):
    headers = {
        'Authorization': f'Bearer {api_key}'
    }

    # Define common parameters
    common_params = {
        'location': location,
        'limit': RESULTS_LIMIT,
        'open_now': True,
        'radius': RADIUS_METERS  # Add the 'radius' parameter with the specified value
    }

    # Define separate parameters for restaurants and POI
    restaurant_params = {
        'term': 'restaurants',
        **common_params
    }

    poi_params = {
        'term': 'points of interest',
        **common_params
    }

    try:
        # Retrieve top-rated restaurants
        response_restaurants = requests.get(
            YELP_API_URL, headers=headers, params=restaurant_params)
        response_restaurants.raise_for_status()

        data = response_restaurants.json()
        restaurants = data.get('businesses', [])

        # Sort restaurants by rating, descending
        top_restaurants = sorted(
            restaurants, key=lambda x: x.get('rating', 0), reverse=True)

        # Create a DataFrame for restaurants
        restaurant_df = pd.DataFrame(top_restaurants[:RESULTS_LIMIT])
        restaurant_df = restaurant_df[['name', 'rating', 'coordinates']]

        # Extract 'latitude' and 'longitude' from the 'coordinates' column
        restaurant_df['latitude'] = restaurant_df['coordinates'].apply(
            lambda x: x['latitude'])
        restaurant_df['longitude'] = restaurant_df['coordinates'].apply(
            lambda x: x['longitude'])

        # Save restaurant_df as CSV if needed
        # restaurant_csv_file_path = '../data/restaurant_data.csv'
        # restaurant_df.to_csv(restaurant_csv_file_path, index=False)

        # Retrieve top-rated POI
        response_poi = requests.get(
            YELP_API_URL, headers=headers, params=poi_params)
        response_poi.raise_for_status()

        data = response_poi.json()
        raw_poi = data.get('businesses', [])

        # Create a DataFrame for POI
        raw_poi_df = pd.DataFrame(raw_poi[:RESULTS_LIMIT])
        raw_poi_df = raw_poi_df[['name', 'rating', 'coordinates']]

        # Extract 'latitude' and 'longitude' from the 'coordinates' column
        raw_poi_df['latitude'] = raw_poi_df['coordinates'].apply(
            lambda x: x['latitude'])
        raw_poi_df['longitude'] = raw_poi_df['coordinates'].apply(
            lambda x: x['longitude'])

        # Drop the 'coordinates' column from raw_poi_df
        raw_poi_df.drop('coordinates', axis=1, inplace=True)

        # Concatenate raw_poi_df and restaurant_df into poi_df
        poi_df = pd.concat([raw_poi_df, restaurant_df], ignore_index=True)

        # Save poi_df as CSV
        csv_file_path = '../data/poi_data.csv'
        poi_df.to_csv(csv_file_path, index=False)

        return restaurant_df  # Return restaurant_df

    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")


# Example usage
location = 'Paris'
# Assign the returned DataFrame to restaurant_df
restaurant_df = get_top_restaurants_and_poi(location, YELP_API_KEY)


In [19]:
# View the first 10 restaurants with only 'name' and 'rating' columns
print("Top 10 Restaurants:")
for idx, row in restaurant_df.head(10).iterrows():
    name = row['name']
    rating = row['rating']
    # Use str.ljust to left-align the numbers with a width of 2
    idx_str = str(idx + 1).ljust(2)
    print(f"{idx_str}. Name: {name}, Rating: {rating:.1f}")


Top 10 Restaurants:
1 . Name: Le Café Laurent, Rating: 5.0
2 . Name: Au Plaisir, Rating: 5.0
3 . Name: Café Blanc, Rating: 4.5
4 . Name: Le P'tit Bistro, Rating: 4.5
5 . Name: Pret A Manger, Rating: 4.5
6 . Name: Grand Hôtel du Palais Royal, Rating: 4.5
7 . Name: Colin Régis, Rating: 4.5
8 . Name: L'Imprimerie, Rating: 4.0
9 . Name: Le Bistrot des Halles, Rating: 4.0
10. Name: Restaurant Bachaumont, Rating: 4.0


Top 10 Restaurants:
1 . Name: Grand Hôtel du Palais Royal, Rating: 4.5
2 . Name: De Voltaire à Rousseau, Rating: 4.5
3 . Name: Grand Bay Café, Rating: 4.5
4 . Name: La Tour de Montlhéry ou chez Denise, Rating: 4.0
5 . Name: Le Terminus du Châtelet, Rating: 4.0
6 . Name: Au Pied de Cochon, Rating: 3.5
7 . Name: Le Tambour, Rating: 3.5
8 . Name: Chacha, Rating: 3.0
9 . Name: Le Départ Saint Michel, Rating: 3.0 10. Name: Le Buci, Rating: 3.0
