In [None]:
# imports
import os
import pandas as pd
import json
import requests
import folium
from tqdm import tqdm


#### Load stations dataframe


In [None]:
stations_df = pd.read_csv('stations.csv')


# Foursquare


Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice.


### Foursquare iteration over bike tations


In [None]:
import requests
import os
import json

# Constants
FSQ_API_URL = "https://api.foursquare.com/v3/places/search"

def getNearbyPOI(latitude, longitude, radius=1000, open_now=True):
    """
    Retrieves nearby points of interest using Foursquare API.

    Args:
    latitude (float): Latitude of the location.
    longitude (float): Longitude of the location.
    radius (int, optional): Search radius in meters. Default is 1000.
    open_now (bool, optional): Whether to search only for places that are open now. Default is True.
    sort (str, optional): Sorting method of the results. Default is 'DISTANCE'.

    Returns:
    list: A list of dictionaries with POI information.

    Raises:
    Exception: If the API request fails.
    """
    params = {
        "radius": str(radius),
        "ll": f"{latitude},{longitude}",
        "open_now": "true" if open_now else "false"
    }

    # Securely load the API key
    api_key = os.getenv('FSQ_key')
    if not api_key:
        raise Exception("API key not found")

    headers = {
        "Accept": "application/json",
        "Authorization": api_key
    }

    try:
        response = requests.get(FSQ_API_URL, params=params, headers=headers)
        response.raise_for_status()
        return json.loads(response.text)
    except requests.RequestException as e:
        raise Exception(f"Error fetching data: {e}")

# Example usage
# nearby_pois = getNearbyPOI(40.7128, -74.0060)




Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)


### Create a dataframe named 'df_fspoi' that consolidates details of both stations and points of interest. This dataframe includes columns for station name, latitude, longitude, and usage, as well as columns for the name, distance, rating, and address of each point of interest


In [None]:
df_fspoi = pd.DataFrame(columns=['station', 'usage', 'total_bikes', 'poi_name', 'poi_distance', 'poi_address'])
rows_to_append = []  # Create an empty list to store rows

for index, station in tqdm(stations_df.iterrows(), total=len(stations_df), desc="Processing Stations"):
    myjson = getNearbyPOI(station['latitude'], station['longitude'])
    for business in myjson['results']:
        fs_info_data = {
            "name": business['name'],
            "distance": business['distance'],
            "address": business['location']['formatted_address']
        }
        row = [station['name'], station['usage_percentage'], station['total_bikes'], fs_info_data['name'],
               fs_info_data['distance'], fs_info_data['address']]
        rows_to_append.append(row)  # Append rows to the list

df_fspoi = pd.concat([df_fspoi, pd.DataFrame(rows_to_append, columns=df_fspoi.columns)], ignore_index=True)


Put your parsed results into a DataFrame


In [None]:
df_fspoi


In [None]:
# I'm saving the dataframe to CSV for the next section
df_fspoi.to_csv('FourSquarePOI.csv', index=False)


In [None]:
df_fspoi.shape


In [None]:
df_fspoi.info


In [None]:
df_fspoi.info()


### Data cleaning - checking for duplicates in points of interest is will be important, since stations are within 300 metres. Stations have no duplicates as checked earlier


Checking for Null Values


In [None]:
print(df_fspoi.isnull().sum())


# Yelp


Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice.


In [None]:
# imports
import pandas as pd
import json
import requests
print ("libraries imported")


In [None]:
def getNearbyYelp(latitude, longitude):
    
    
    # Define my API Key, My Endpoint, and My Header
    API_KEY = os.getenv('YELP_key')
    ENDPOINT = 'https://api.yelp.com/v3/businesses/search'
    HEADERS = {'Authorization': 'bearer %s' % API_KEY}
   
    
    # BUSINESS SEARCH PARAMETERS 
    PARAMETERS = {'latitude': f"{latitude}",
              'longitude': f"{longitude}",
              'radius': 1000}   
    
    
    # Make a request to the Yelp API
    response = requests.get(url = ENDPOINT,
                        params = PARAMETERS,
                        headers = HEADERS)   
    
    yelp_data = response.json() 
    
    return yelp_data



Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)


In [None]:
from tqdm import tqdm

# Assuming stations_df is already defined and populated
df_yelp_poi = pd.DataFrame(columns=['station', 'usage', 'total_bikes', 'poi_name', 'poi_distance', 'poi_address'])
rows_to_append = []

for index, station in tqdm(stations_df.iterrows(), total=len(stations_df), desc="Processing Stations"):
    yelpjson = getNearbyYelp(station['latitude'], station['longitude'])
    
    # Debugging: Print out the response to check its structure
    print(yelpjson)

    # Proceed only if 'businesses' key exists
    if 'businesses' in yelpjson:
        for business in yelpjson['businesses']:
            yelp_info_data = {
                "name": business['name'],
                "distance": business['distance'],
                "address": business['location']['address1']
            }
            row = [station['name'], station['usage_percentage'], station['total_bikes'], yelp_info_data['name'],
                   yelp_info_data['distance'], yelp_info_data['address']]
            rows_to_append.append(row)
    else:
        print(f"Warning: No 'businesses' key in response for station {station['name']}")

df_yelp_poi = pd.concat([df_yelp_poi, pd.DataFrame(rows_to_append, columns=df_yelp_poi.columns)], ignore_index=True)



In [None]:
df_yelp_poi.shape


In [None]:
df_yelp_poi.info


### Data cleaning - checking for duplicates in points of interest is to be expected, since some stations are within 2000 metres of others. Stations have no duplicates as checked earlier


### Data cleaning - checking for null values


In [None]:
print(df_yelp_poi.isnull().sum())


Put your parsed results into a DataFrame


In [None]:
df_yelp_poi


In [None]:
# I'm saving the dataframe to CSV for the next section
df_yelp_poi.to_csv('YelpPOI.csv', index=False)


# Comparing Results


Which API provided you with more complete data? Provide an explanation.


### Generally yelp seems to provide a higher quality of information considering they give a rating to the businesses involved, also they simply have many more businesses tracked as points of interest in Fort Lauderdale than foursquare does.


Get the top 10 restaurants according to their rating


In [None]:
import os
import requests
import pandas as pd
import json

# Define constants
YELP_API_KEY = os.getenv('YELP_key')
YELP_API_URL = "https://api.yelp.com/v3/businesses/search"
RESULTS_LIMIT = 20
RADIUS_METERS = 1000  # Set the radius to 1000 meters

def get_top_restaurants_and_poi(location, api_key):
    headers = {
        'Authorization': f'Bearer {api_key}'
    }

    # Define common parameters
    common_params = {
        'location': location,
        'limit': RESULTS_LIMIT,
        'open_now': True,
        'radius': RADIUS_METERS  # Add the 'radius' parameter with the specified value
    }

    # Define separate parameters for restaurants and POI
    restaurant_params = {
        'term': 'restaurants',
        **common_params
    }

    poi_params = {
        'term': 'points of interest',
        **common_params
    }

    try:
        # Retrieve top-rated restaurants
        response_restaurants = requests.get(YELP_API_URL, headers=headers, params=restaurant_params)
        response_restaurants.raise_for_status()

        data = response_restaurants.json()
        restaurants = data.get('businesses', [])

        # Sort restaurants by rating, descending
        top_restaurants = sorted(restaurants, key=lambda x: x.get('rating', 0), reverse=True)

        # Create a DataFrame for restaurants
        restaurant_df = pd.DataFrame(top_restaurants[:RESULTS_LIMIT])
        restaurant_df = restaurant_df[['name', 'rating', 'coordinates']]

        # Extract 'latitude' and 'longitude' from the 'coordinates' column
        restaurant_df['latitude'] = restaurant_df['coordinates'].apply(lambda x: x['latitude'])
        restaurant_df['longitude'] = restaurant_df['coordinates'].apply(lambda x: x['longitude'])

        # Save restaurant_df as CSV if needed
        #restaurant_csv_file_path = '../data/restaurant_data.csv'
        #restaurant_df.to_csv(restaurant_csv_file_path, index=False)

        # Retrieve top-rated POI
        response_poi = requests.get(YELP_API_URL, headers=headers, params=poi_params)
        response_poi.raise_for_status()

        data = response_poi.json()
        raw_poi = data.get('businesses', [])

        # Create a DataFrame for POI
        raw_poi_df = pd.DataFrame(raw_poi[:RESULTS_LIMIT])
        raw_poi_df = raw_poi_df[['name', 'rating', 'coordinates']]

        # Extract 'latitude' and 'longitude' from the 'coordinates' column
        raw_poi_df['latitude'] = raw_poi_df['coordinates'].apply(lambda x: x['latitude'])
        raw_poi_df['longitude'] = raw_poi_df['coordinates'].apply(lambda x: x['longitude'])

        # Drop the 'coordinates' column from raw_poi_df
        raw_poi_df.drop('coordinates', axis=1, inplace=True)

        # Concatenate raw_poi_df and restaurant_df into poi_df
        poi_df = pd.concat([raw_poi_df, restaurant_df], ignore_index=True)

        # Save poi_df as CSV
        csv_file_path = '../data/poi_data.csv'
        poi_df.to_csv(csv_file_path, index=False)

        return restaurant_df  # Return restaurant_df

    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")

# Example usage
location = 'Paris'
restaurant_df = get_top_restaurants_and_poi(location, YELP_API_KEY)  # Assign the returned DataFrame to restaurant_df

               
 

In [None]:
# View the first 10 restaurants with only 'name' and 'rating' columns
print("Top 10 Restaurants:")
for idx, row in restaurant_df.head(10).iterrows():
    name = row['name']
    rating = row['rating']
    # Use str.ljust to left-align the numbers with a width of 2
    idx_str = str(idx + 1).ljust(2)
    print(f"{idx_str}. Name: {name}, Rating: {rating:.1f}")


### Top 10 restaurants according to Yelp ratings for Fort Lauderdale are listed above. Yelp documentation indicates that the: "rating sort is NOT strictly sorted by the rating value, but by an adjusted rating value that takes into account the number of ratings, similar to a Bayesian average. This is to prevent skewing results to businesses with a single review". Due to this (favourable) detail of the Yelp rating sort, I felt is was suitable to just show the top 10 results.


Top 10 Restaurants:
1 . Name: Grand Hôtel du Palais Royal, Rating: 4.5
2 . Name: De Voltaire à Rousseau, Rating: 4.5
3 . Name: Grand Bay Café, Rating: 4.5
4 . Name: La Tour de Montlhéry ou chez Denise, Rating: 4.0
5 . Name: Le Terminus du Châtelet, Rating: 4.0
6 . Name: Au Pied de Cochon, Rating: 3.5
7 . Name: Le Tambour, Rating: 3.5
8 . Name: Chacha, Rating: 3.0
9 . Name: Le Départ Saint Michel, Rating: 3.0 10. Name: Le Buci, Rating: 3.0
