In [2]:
#Importing Packages 
import pandas as pd
import numpy as np
import json
import requests
import os

In [3]:
FS_KEY = os.getenv('FOURSQUARE_API_KEY')
YELP_KEY = os.getenv('API_key_YELP')

In [4]:
stations_df_bhopal = pd.read_csv('bhopal_bike_stations.csv')

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [13]:
def get_nearby_venues_fs(latitude, longitude, radius, categories):
    """
    Get venues from foursquare with a specified place type and coordinates.
    Args:
        latitude (float): latitude for query (must be combined with longitude)
        longitude (float): longitude for query (must be combined with latitude)
        api_key (str): foursquare API to use for query
        categories (str) : Foursquare-recognized place type. If not passed no place_type will be specified. 
    Extra Args:
        Rating: A numerical rating (from 0.0 to 10.0) of the FSQ Place, based on user votes, likes/dislikes, tips sentiment, and visit data
        Popularity: A measure of the FSQ Place's popularity, by foot traffic. This score is on a 0 to 1 scale and uses a 6-month span of POI visits for a given geographic area.
    Returns:
        response: response object from the requests library.
    """
     
    url = "https://api.foursquare.com/v3/places/search"
    # Join categories list into a comma-separated string
    category_string = ','.join(categories)
    
    params = {"categories": category_string, # below, we will search for four category types
              "radius":radius,
              "ll": f"{latitude},{longitude}",
              "limit":50, # upper limit
              "fields": "name,rating,popularity"   
             }
    # Dict for headers 
    headers = {"Accept": "application/json"}
    #Add API Key
    headers['Authorization'] = FS_KEY
    responses = requests.get(url, params=params, headers=headers)
    fs_data = responses.json()
    return fs_data


In [14]:
#Testing the function(using categoryid for restaurants)
test_results = get_nearby_venues_fs(stations_df_bhopal['latitude'][0], stations_df_bhopal['longitude'][0], 1000, ["13065", "10027"])


In [15]:
viewit = pd.json_normalize(test_results['results'])
viewit

Unnamed: 0,name,popularity,rating
0,Sagar Gaire,0.791228,7.5
1,KFC,0.81864,6.4
2,The Garlic Bread,0.279386,
3,Shri Krishna Dairy,0.08443,
4,Veera Di Hatti,0.642763,
5,Regional Science Museum,0.21886,
6,Swaad The Taste Of India,0.45614,
7,Naidu Restaurant,,
8,Gayatri Restaurant,,
9,Cook Dooh Kooh,,


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

## Functions for aggregating the responses for the different categories

After looking at the documentation on the foursquare website, I have decided that for the purpose of this project, I would like to query the following categories. I have obtained the catgeory IDs from the FSQ website. 
These are the following categories along with their category IDs
1. Restaurants : 13065
2. Bars : 13003
3. Hotels : 19014
4. Museums : 10027

Source: https://docs.foursquare.com/data-products/docs/categories


In [8]:
# Function to count the results for each category returned by the API call
def category_count(category):
    """
    Loop through the stations_df_bhopal dataframe and return a list of counts of the number of items returned in the get_venues_fs function that corresponds with the input category
    
    Args:
        category (integer): the category integer of what POI we are searching for

    Returns:
        response: count_of_category: a list of the number of POI's which match the category integer from the get_venues_fs function
    """
    c_counts = []
    for row in stations_df_bhopal.itertuples(index=False):

        fs_result = get_nearby_venues_fs(row.latitude, row.longitude, 1000, category)
        norm_result = pd.json_normalize(fs_result['results'])    
        c_counts.append(len(norm_result))
    return c_counts

In [9]:
#Function to get the highest rated venue and its rating for each category 
def highest_rated(category):
    """
    Loop through the stations_df_bhopal dataframe and return a list of the highest rated item returned in the get_venues_fs function that corresponds with the input category
    
    Args:
        category (integer): the category integer of what POI we are searching for

    Returns:
        response: highest_rated: a list of the highest rated POI's which match the category integer from the get_venues_fs function
    """
    highest_rated = []
    for row in stations_df_bhopal.itertuples(index=False):
        fs_result = get_nearby_venues_fs(row.latitude, row.longitude, 1000, category)
        norm_result = pd.json_normalize(fs_result['results'])
        highest_rated_dict = {}
        if len(norm_result) > 0 and 'rating' in norm_result.columns:
            # Find the maximum rating
            max_rating = norm_result['rating'].max()
            # Find all venues with the maximum rating
            highest_rated_venue = norm_result[norm_result['rating'] == max_rating].iloc[0]
            highest_rated_dict = {highest_rated_venue['name']: highest_rated_venue['rating']}

            highest_rated.append(highest_rated_dict) 
        else:
            highest_rated.append(0)


    return highest_rated

In [10]:
#Function to get the most popular venue and its popularity for each category
def most_popular(category):
    """
    Loop through the stations_df_bhopal dataframe and return a list of the most popular item returned in the get_venues_fs function that corresponds with the input category
    
    Args:
        category (integer): the category integer of what POI we are searching for

    Returns:
        response: most_popular: a list of the most popular POI's which match the category integer from the get_venues_fs function
    """
    most_popular = []
    for row in stations_df_bhopal.itertuples(index=False):
        fs_result = get_nearby_venues_fs(row.latitude, row.longitude, 1000, category)
        norm_result = pd.json_normalize(fs_result['results'])
        most_popular_dict = {}
        if len(norm_result) > 0 and 'popularity' in norm_result.columns:
            # Find the maximum popularity
            max_popularity = norm_result['popularity'].max()
            # Find all venues with the maximum popularity
            most_popular_venue = norm_result[norm_result['popularity'] == max_popularity].iloc[0]
            most_popular_dict = {most_popular_venue['name']: most_popular_venue['popularity']}
            most_popular.append(most_popular_dict) 
        else:
            most_popular.append(0)

    return most_popular

Put your parsed results into a DataFrame

### RESTAURANTS

In [11]:
#Running the functions for restaurants
stations_df_bhopal['num_restaurants'] = category_count(13065)
stations_df_bhopal['highest_rated_restaurant'] = highest_rated(13065)
stations_df_bhopal['most_popular_restaurant'] = most_popular(13065)

### BARS

In [12]:
stations_df_bhopal['num_bars'] = category_count(13003)
stations_df_bhopal['highest_rated_bar'] = highest_rated(13003)
stations_df_bhopal['most_popular_bar'] = most_popular(13003)

### MUSEUMS

In [13]:
stations_df_bhopal['num_museums'] = category_count(10027)
stations_df_bhopal['highest_rated_museum'] = highest_rated(10027)
stations_df_bhopal['most_popular_museum'] = most_popular(10027)

### HOTELS

In [14]:
stations_df_bhopal['num_hotels'] = category_count(19014)
stations_df_bhopal['highest_rated_hotel'] = highest_rated(19014)
stations_df_bhopal['most_popular_hotel'] = most_popular(19014)

In [16]:
#Save to dataframe
stations_df_fsq = stations_df_bhopal.to_csv('stations_df_fsq_bhopal.csv', index=False)

In [50]:
stations_df_bhopal

Unnamed: 0,station_id,station_name,latitude,longitude,empty_slots,free_bikes,e_bikes,timestamp,station_uid,station_number,...,most_popular_restaurant,num_bars,highest_rated_bar,most_popular_bar,num_museums,highest_rated_museum,most_popular_museum,num_hotels,highest_rated_hotel,most_popular_hotel
0,b371ef2499d92f6f39aecd14aa834f73,NANDAN KANAN PARK,23.222624,77.424051,2,4,0,2024-04-17 00:16:33.608000+00:00,122,9871,...,{'The Garlic Bread': 0.875438596491228},1,{'Coffee And U': 6.5},{'Coffee And U': 0.27587719298245617},1,0,{'Regional Science Museum': 0.1736842105263158},10,0,{'OYO 4229 Rishi Homes Stay': 0.3796052631578947}
1,9ca6adc741b5a7e4e2914913ba967604,FOUNTAIN PARK MINAAL,23.275564,77.454774,3,3,0,2024-04-17 00:16:33.607000+00:00,119,9868,...,{'Mughal Restaurant': 0.9438596491228071},0,0,0,0,0,0,2,0,{'Hotel Awadh Palace': 0.3875}
2,154a47ad488f8c67ea69615613277002,AVADHPURI TIRAHA,23.230224,77.485648,1,5,0,2024-04-17 00:16:33.616000+00:00,131,9880,...,0,0,0,0,0,0,0,1,0,0
3,36761a60c92613a6e5f7abc3e89e3b8c,AKRITI BLUE SKY,23.167347,77.438082,5,3,0,2024-04-17 00:16:33.609000+00:00,117,9866,...,0,0,0,0,0,0,0,1,0,0
4,e2a4ce935da75f9eb2d0730033ddc5bb,VARDHMAAN PARK,23.249605,77.395605,0,0,1,2024-04-17 00:16:33.612000+00:00,75,9823,...,{'Hotel Afghan': 0.8682017543859649},0,0,0,1,0,0,22,0,{'The Ten': 0.9855263157894737}
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,2402e44d882d0c75e247265430587341,AIIMS 2 (NURSING HOTEL),23.205616,77.460985,1,5,0,2024-04-17 00:16:33.728000+00:00,207,9894,...,0,0,0,0,0,0,0,1,0,0
70,1963bb2716298c99b134c17283d62834,SAIR SAPATA,23.216699,77.376212,1,9,0,2024-04-17 00:16:33.729000+00:00,79,9827,...,{'Machan': 0.9923245614035088},1,{'Infinity': 7.1},{'Infinity': 0.9809210526315789},0,0,0,6,{'Jehan Numa Retreat': 8.4},{'Taj Lakefront': 0.9958333333333333}
71,f724389651392c750199500952f05138,VINDYACHAL BHAWAN,23.237441,77.421214,2,4,0,2024-04-17 00:16:33.729000+00:00,91,9839,...,{'Goodricke Tea Pot': 0.9486842105263158},0,0,0,2,0,0,2,{'Courtyard by Marriott': 7.8},{'Courtyard by Marriott': 0.9756578947368421}
72,c389846553dcc6b9b602013f95f8e358,CHARTERED BIKE WORKSHOP BHOPAL,23.228904,77.444074,505,25,0,2024-04-17 00:16:33.727000+00:00,196,10002,...,{'Rice Bowl': 0.9671052631578947},4,0,{'Sheesha Lounge': 0.4888157894736842},0,0,0,50,0,{'Hotel Rajhans': 0.9912280701754386}


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [21]:
def get_nearby_venues_yelp(latitude, longitude, radius, categories):
    """
    Get venues from yelp with a specified place type and coordinates.
    Args:
        latitude (float): latitude for query (must be combined with longitude)
        longitude (float): longitude for query (must be combined with latitude)
        api_key (str): yelp API to use for query
        categories (str) : Yelp-recognized place type. If not passed no place_type will be specified. 
    Returns:
        response: response object from the requests library.
    """
     
    url = "https://api.yelp.com/v3/businesses/search"
    
    params = {"latitude": latitude,
              "longitude": longitude,
              "radius" : radius,
              "limit":50, # upper limit  
              "categories": categories
             }
    #Add API Key for Yelp 
    headers = {
        "Authorization": f"Bearer {YELP_KEY}",
        "Accept": "application/json"
    }
    responses = requests.get(url, params=params, headers=headers)
    yelp_data = responses.json()
    return yelp_data

In [7]:
#Test function with Restaurants 
test_results_yelp = get_nearby_venues_yelp(stations_df_bhopal['latitude'][0], stations_df_bhopal['longitude'][0], 1000, 'hotels')
test_results_yelp

{'businesses': [],
 'total': 0,
 'region': {'center': {'longitude': 77.424051, 'latitude': 23.222624}}}

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

The categories (in line with the FourSquare POIs) are: 
1. 'restaurants'
2. 'bars'
3. 'museums'
4. 'hotels'

Source: https://docs.developer.yelp.com/docs/resources-categories


In [22]:
#Repeating functions created to loop through the YELP results and get counts, highest rated and most popular venues for each category
def category_count_yelp(category):
    """
    Loop through the stations_df_bhopal dataframe and return a list of counts of the number of items returned in the get_venues_yelp function that corresponds with the input category
    
    Args:
        category (str): the category string of what POI we are searching for

    Returns:
        response: count_of_category: a list of the number of POI's which match the category string from the get_venues_yelp function
    """
    c_counts = []
    for row in stations_df_bhopal.itertuples(index=False):

        yelp_result = get_nearby_venues_yelp(row.latitude, row.longitude, 1000, category)
        norm_result = pd.json_normalize(yelp_result['businesses'])    
        c_counts.append(len(norm_result))
    return c_counts

In [23]:
def highest_rated_yelp(category):
    """
    Loop through the stations_df_bhopal dataframe and return a list of the highest rated item returned in the get_venues_yelp function that corresponds with the input category
    
    Args:
        category (str): the category string of what POI we are searching for

    Returns:
        response: highest_rated: a list of the highest rated POI's which match the category string from the get_venues_yelp function
    """
    highest_rated = []
    for row in stations_df_bhopal.itertuples(index=False):
        yelp_result = get_nearby_venues_yelp(row.latitude, row.longitude, 1000, category)
        norm_result = pd.json_normalize(yelp_result['businesses'])
        highest_rated_dict = {}
        if len(norm_result) > 0 and 'rating' in norm_result.columns:
            # Find the maximum rating
            max_rating = norm_result['rating'].max()
            # Find all venues with the maximum rating
            highest_rated_venue = norm_result[norm_result['rating'] == max_rating].iloc[0]
            highest_rated_dict = {highest_rated_venue['name']: highest_rated_venue['rating']}

            highest_rated.append(highest_rated_dict) 
        else:
            highest_rated.append(0)


    return highest_rated

In [24]:
def most_reviews_yelp(category):
    """
    Loop through the stations_df_bhopal dataframe and return a list of the most reviewed item returned in the get_venues_yelp function that corresponds with the input category
    
    Args:
        category (str): the category string of what POI we are searching for

    Returns:
        response: most_reviews: a list of the most reviewed POI's which match the category string from the get_venues_yelp function
    """
    most_reviews = []
    for row in stations_df_bhopal.itertuples(index=False):
        yelp_result = get_nearby_venues_yelp(row.latitude, row.longitude, 1000, category)
        norm_result = pd.json_normalize(yelp_result['businesses'])
        most_popular_dict = {}
        if len(norm_result) > 0 and 'review_count' in norm_result.columns:
            # Find the maximum review count
            max_review_count = norm_result['review_count'].max()
            # Find all venues with the maximum review count
            most_popular_venue = norm_result[norm_result['review_count'] == max_review_count].iloc[0]
            most_popular_dict = {most_popular_venue['name']: most_popular_venue['review_count']}
            most_reviews.append(most_popular_dict) 
        else:
            most_reviews.append(0)

    return most_reviews

In [51]:
#Testing the function 
test_yelp_func = most_reviews_yelp('hotels')
print(test_yelp_func)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


Put your parsed results into a DataFrame

Bhopal has no YELP coverage as we can see with only the NULL values and through my research online so I made a new notebook with a new city and did the project all over again. 

In [None]:
# stations_df_bhopal['num_restaurants_yelp'] = category_count_yelp('restaurants')
# stations_df_bhopal['highest_rated_restaurant_yelp'] = highest_rated_yelp('restaurants')
# stations_df_bhopal['most_reviews_restaurant_yelp'] = most_reviews_yelp('restaurants')


In [None]:
# stations_df_bhopal['num_bars_yelp'] = category_count_yelp('bars')
# stations_df_bhopal['highest_rated_bar_yelp'] = highest_rated_yelp('bars')
# stations_df_bhopal['most_reviews_bar_yelp'] = most_reviews_yelp('bars')

In [None]:
# stations_df_bhopal['num_museums_yelp'] = category_count_yelp('museums')
# stations_df_bhopal['highest_rated_museum_yelp'] = highest_rated_yelp('museums')
# stations_df_bhopal['most_reviews_museum_yelp'] = most_reviews_yelp('museums')


In [None]:
# stations_df_bhopal['num_hotels_yelp'] = category_count_yelp('hotels')
# stations_df_bhopal['highest_rated_hotel_yelp'] = highest_rated_yelp('hotels')
# stations_df_bhopal['most_reviews_hotel_yelp'] = most_reviews_yelp('hotels')

In [None]:
# stations_df_fs_and_yelp = stations_df_bhopal.to_csv('stations_df_fs_and_yelp_bhop.csv', index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Get the top 10 restaurants according to their rating

In [44]:
#To reiterate the function from before 
def get_nearby_venues_fs_2(latitude, longitude, radius, categories):
    """
    Get venues from foursquare with a specified place type and coordinates.
    Args:
        latitude (float): latitude for query (must be combined with longitude)
        longitude (float): longitude for query (must be combined with latitude)
        api_key (str): foursquare API to use for query
        categories (str) : Foursquare-recognized place type. If not passed no place_type will be specified. 
    Extra Args:
        Rating: A numerical rating (from 0.0 to 10.0) of the FSQ Place, based on user votes, likes/dislikes, tips sentiment, and visit data
        Popularity: A measure of the FSQ Place's popularity, by foot traffic. This score is on a 0 to 1 scale and uses a 6-month span of POI visits for a given geographic area.
    Returns:
        response: response object from the requests library.
    """
     
    url = "https://api.foursquare.com/v3/places/search"
    
    params = {"categories": categories, # below, we will search for four category types
              "radius":radius,
              "ll": f"{latitude},{longitude}",
              "limit":50, # upper limit
              "fields": "name,rating,fsq_id"   
             }
    # Dict for headers 
    headers = {"Accept": "application/json"}
    #Add API Key
    headers['Authorization'] = FS_KEY
    responses = requests.get(url, params=params, headers=headers)
    fs_data = responses.json()
    return fs_data


In [45]:
restaurants_fs = pd.DataFrame() #Empty Dataframe
restaurants_fsq = [] #Empty List
#Loop through the stations dataframe and get the restaurants within 1000m of the station
for row in stations_df_bhopal.itertuples(index=False):
    fs_result = get_nearby_venues_fs_2(row.latitude, row.longitude, 1000, 13065) #restaurants category id (13065)
    norm_result = pd.json_normalize(fs_result['results'])
    restaurants_fsq.append(norm_result)
    
restaurants_fs = pd.concat(restaurants_fsq, ignore_index=True)

In [46]:
restaurants_fs = restaurants_fs.fillna(0)
restaurants_fs = restaurants_fs.drop_duplicates(subset=['fsq_id'], keep='first')
restaurants_fs = restaurants_fs[['name', 'rating']]
restaurants_fs

Unnamed: 0,name,rating
0,Sagar Gaire,7.5
1,KFC,6.4
2,The Garlic Bread,0.0
3,Shri Krishna Dairy,0.0
4,Veera Di Hatti,0.0
...,...,...
2814,Mama Fruit Juice Bar,0.0
2824,Chappan Bhog,0.0
2838,Sweets Shop,0.0
2847,Aggarwal Chat Bhandar,0.0


In [47]:
top_10_restaurants = restaurants_fs.sort_values(by='rating', ascending=False).head(10)
#The top 10 restaurants within 1000 m of all city bike stations
top_10_restaurants

Unnamed: 0,name,rating
572,La Kuchina,8.1
1210,Hotel Jehanuma Palace,8.1
298,Wind and Waves,7.9
0,Sagar Gaire,7.5
2724,Machan,7.1
202,Indian Coffee House,6.8
399,Rainbow Treat,6.8
2435,Filfora,6.7
2584,Domino's Pizza,6.6
152,Domino's Pizza,6.5
