In [286]:
# imports
import requests
import json
import pandas as pd
import os
import time

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [308]:
data = pd.read_csv('df_city_bike.csv')

In [309]:
data.dropna(subset=['latitude', 'longitude'], inplace=True)
# creation of variable with lon and lat together
data['ll'] = data['latitude'].astype(str) + ',' + data['longitude'].astype(str)
data = data[data['ll'] != '0.0,0.0']

#list of longitude and latitude from bike station dataframe
bike_ll = list(set(data['ll']))

In [310]:
#replace , so we can add the ll of stations to URL
bike_stop_ll = [s.replace(',', '%2C') for s in bike_ll]

In [307]:
# went to https://location.foursquare.com/developer/reference/place-search and 
# https://location.foursquare.com/developer/reference/response-fields to check how API call should be made
# checked categories https://location.foursquare.com/places/docs/categories

#set the key
api_key = os.environ["FOURSQUARE_API_KEY"]

# Create dictionary for headers
headers = {"Accept": "application/json"}

# Add key with our API KEY
headers['Authorization'] = api_key

In [311]:
#make an empty list and add each station as element of the list
foursquare_list_rich_20 = []

# Getting data for places around bike stops
for station in bike_stop_ll:
    try:
        url = "https://api.foursquare.com/v3/places/search?radius=1000&fields=categories%2Crating%2Cgeocodes%2Ccategories%2Clocation%2Cname%2Cfsq_id%2Cstats%2Cprice&limit=20&ll="+ station
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            foursquare_list_rich_20.append(response.json())
        else:
             print('Error occurred during the API request')
    except Exception as e:
        print(e)
        if e == "Quota exceeded":
            print("Exceeded quota: waiting for an hour")
            time.sleep(3600)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
df_foursquare_20 = pd.DataFrame(columns=['row_from_station', 'fsq_id', 'category_id', 'name', 'latitude', 'longitude', 'rating', 'total_ratings', 'total_photos', 'total_tips'])

for station in range(len(foursquare_list_rich_20)):
    for result in range(len(foursquare_list_rich_20[station]['results'])):
        if 'fsq_id' in foursquare_list_rich_20[station]['results'][result]:
            fsq_id = foursquare_list_rich_20[station]['results'][result]['fsq_id']
        else:
            fsq_id = None
            
        if 'categories' in foursquare_list_rich_20[station]['results'][result] and len(foursquare_list_rich_20[station]['results'][result]['categories']) > 0:
            category_id = foursquare_list_rich_20[station]['results'][result]['categories'][0].get('id', None)
        else:
            category_id = None
            
        if 'name' in foursquare_list_rich_20[station]['results'][result]:
            name = foursquare_list_rich_20[station]['results'][result]['name']
        else:
            name = None
            
        if 'geocodes' in foursquare_list_rich_20[station]['results'][result] and 'main' in foursquare_list_rich_20[station]['results'][result]['geocodes']:
            latitude = foursquare_list_rich_20[station]['results'][result]['geocodes']['main'].get('latitude', None)
            longitude = foursquare_list_rich_20[station]['results'][result]['geocodes']['main'].get('longitude', None)
        else:
            latitude = None
            longitude = None
            
        if 'rating' in foursquare_list_rich_20[station]['results'][result]:
            rating = foursquare_list_rich_20[station]['results'][result]['rating']
        else:
            rating = None
            
        if 'stats' in foursquare_list_rich_20[station]['results'][result]:
            total_ratings = foursquare_list_rich_20[station]['results'][result]['stats'].get('total_ratings', None)
            total_photos = foursquare_list_rich_20[station]['results'][result]['stats'].get('total_photos', None)
            total_tips = foursquare_list_rich_20[station]['results'][result]['stats'].get('total_tips', None)
        else:
            total_ratings = None
            total_photos = None
            total_tips = None
        
        df_foursquare_20 = df_foursquare_20.append({
            'row_from_station': station,
            'fsq_id': fsq_id,
            'category_id': category_id,
            'name': name,
            'latitude': latitude,
            'longitude': longitude,
            'rating': rating,
            'total_ratings': total_ratings,
            'total_photos': total_photos,
            'total_tips': total_tips
        }, ignore_index=True)

In [327]:
#foursquare_list (foursquare_basic.js) is a file that has only the basic information about business

#foursquare_list_rich_50 (foursquare_50.js) is file that has the same code block as foursquare_rich_20, except limit for number of businesses was 50
# df_foursquare_50.csv is dataframe for same file

Put your parsed results into a DataFrame

In [320]:
# I already made DataFrame when parsing through foursquare json file

df_foursquare_20.to_csv('df_foursquare_20.csv', index=False)

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [111]:
#visited https://docs.developer.yelp.com/reference/v3_business_search to check API structure

#set the key
yelp_api_key = os.environ["YELP_API_KEY"]

# Create dictionary for headers
headers = {"Accept": "application/json"}

# Add key with our API KEY
headers['Authorization'] = yelp_api_key

headers = {
    "accept": "application/json",
    "Authorization": "Bearer " + yelp_api_key
}

In [112]:
yelp_list = []

# Getting data for places around bike stops
for index in range(len(bike_stop_ll)):
    try:
        latitude = data.iloc[index]["latitude"]
        longitude = data.iloc[index]["longitude"]
        yelp_url = "https://api.yelp.com/v3/businesses/search?latitude=" + str(latitude) + "&longitude=" + str(longitude) + "&term=park&radius=1000&categories=&sort_by=best_match&limit=20"
        yelp_response = requests.get(yelp_url, headers=headers)
        
        if yelp_response.status_code == 200:
            yelp_list.append(yelp_response.json())
        else:
             print('Error occurred during the API request')
    except Exception as e:
        print(e)
        if e == "Quota exceeded":
            print("Exceeded quota: waiting for an hour")
            time.sleep(3600)

#yelp_list saved as js file

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [207]:
df_yelp = pd.DataFrame(columns=['row_from_station', 'id', 'name', 'rating', 'review_count', 'latitude', 'longitude'])

In [None]:
#parsing through 'yelp_list' from previous step to get columns for dataframe (id, name, rating, review_count, latitude and longitude)
for station in range(len(yelp_list)):
    for business in range(len(yelp_list[station]['businesses'])):
        df_yelp = df_yelp.append({
            'row_from_station': station,
            'id': yelp_list[station]['businesses'][business]['id'],
            'name': yelp_list[station]['businesses'][business]['name'],
            'rating': yelp_list[station]['businesses'][business]['rating'],
            'review_count': yelp_list[station]['businesses'][business]['review_count'],
            'latitude': yelp_list[station]['businesses'][business]['coordinates']['latitude'],
            'longitude': yelp_list[station]['businesses'][business]['coordinates']['longitude']
            }, ignore_index=True)

In [213]:
#checking shape and first 5 rows of dataframe
print(df_yelp.shape)
print(df_yelp.head())

(2978, 7)
  row_from_station                      id                           name  \
0                1  ctNVGh_5lE6oiu3GQhmd3Q             Square Saint-Louis   
1                1  mqxddLiYeUp7IM9wHhUF-w           Place Émilie-Gamelin   
2                1  sK3Rb2TyDZQBhwu5l3sezw          Place Jacques-Cartier   
3                1  lmQg96Lly7OGsbrrum03rA           Esplanade Tranquille   
4                1  pZLKYd4glk9ow2_sUfVLtg  Centre de plein air de l'UQAM   

   rating review_count   latitude  longitude  
0     4.5           25  45.516898 -73.570043  
1     4.0           18  45.515356 -73.560004  
2     4.0           13  45.510918 -73.552780  
3     4.5            3  45.509597 -73.564954  
4     4.5            3  45.511899 -73.559646  


Put your parsed results into a DataFrame

df_yelp.to_csv('df_yelp.csv', index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

The Foursquare API offers more total ratings, enhancing the validity of the ratings, and its inclusion of category IDs simplifies filtering, making it more user-friendly and efficient. On average, Foursquare has 107 reviews compared to Yelp's 61.

Get the top 10 restaurants according to their rating

In [339]:
#remove duplicate rows
df_foursquare_20_clean = df_foursquare_20.drop_duplicates(subset=["fsq_id"], keep='first')
df_yelp_clean = df_yelp.drop_duplicates(subset=["id"], keep='first')

In [348]:
df_foursquare_filter = df_foursquare_20_clean[
    (df_foursquare_20_clean.apply(lambda row: row.astype(str).str.contains('Parc', case=False).any(), axis=1)) &
    (df_foursquare_20_clean['total_ratings'] > 10)
]
print('Mean for total ratings is: ' + str(df_foursquare_filter['total_ratings'].mean()))
df_foursquare_filter.sort_values(by="rating", ascending=False).head(10)

Mean for total ratings is: 106.64285714285714


Unnamed: 0,row_from_station,fsq_id,category_id,name,latitude,longitude,rating,total_ratings,total_photos,total_tips
9997,499,4ad8f749f964a520871621e3,16039,Mount Royal Park (Parc du Mont-Royal),45.503951,-73.587551,9.6,1195,1934,178
112,5,4ad4c06cf964a520cef920e3,16032,Parc la Fontaine,45.526246,-73.569666,9.5,774,1114,101
145,7,4bd49c8b6798ef3b6e02628d,16032,Parc Sir-Wilfrid-Laurier,45.531911,-73.587195,9.5,258,337,29
694,34,4ad4c06bf964a520b3f920e3,16000,Parc Jean-Drapeau,45.503715,-73.528995,9.4,439,892,46
6015,300,4ad8f5cbf964a520741621e3,16032,Parc Jarry,45.535206,-73.628544,9.4,244,453,26
1445,72,4b1b4905f964a5208bfa23e3,16032,Parc Westmount Park,45.48292,-73.599263,9.4,101,197,17
5616,280,4ad4c06cf964a520cff920e3,16039,Parc Maisonneuve,45.561433,-73.55508,9.3,159,267,22
13720,686,4bec8d6175b2c9b6b4af438d,16032,Parc-nature de l'Île de la Visitation,45.578625,-73.655992,9.2,69,124,10
2401,120,4de2cde718385df2b02d61ab,16032,Parc du Canal Lachine,45.496242,-73.553215,9.1,29,61,4
3091,154,4ad4c06cf964a520eaf920e3,16032,Parc Jeanne-Mance,45.516572,-73.584326,9.1,214,405,16


In [346]:
df_yelp_clean_filter = df_yelp_clean.query('review_count > 10')
print("Mean for review_count is: " + str(df_yelp_clean_filter['review_count'].mean()))
df_yelp_clean.query('review_count > 10').sort_values(by="rating", ascending=False).head(10)


Mean for review_count is: 63.1


Unnamed: 0,row_from_station,id,name,rating,review_count,latitude,longitude
0,1,ctNVGh_5lE6oiu3GQhmd3Q,Square Saint-Louis,4.5,25,45.516898,-73.570043
49,24,E14VLV7RMLpoLhEVpPVQcQ,Vieux-Port de Montréal,4.5,113,45.499981,-73.553377
357,150,mWLqPts4kWK7-yRtQqOTJQ,Parc Sir-Wilfrid-Laurier,4.5,18,45.532297,-73.587525
265,111,EqPnsibvW20il8ArvvhUXg,Canal Lachine,4.5,23,45.467354,-73.590889
240,100,P5kh5APoGjBKNjnTw5gNlA,L'Oratoire Saint-Joseph du Mont-Royal,4.5,112,45.492446,-73.61755
234,99,7EnzO970Zdrd_gjIJJLfrQ,Parc Jarry,4.5,22,45.533015,-73.627192
124,63,pW5ZoeXfCfnrVWBXetKGaA,Parc René-Lévesque,4.5,13,45.428115,-73.67589
71,30,8KSAQCNTdGu7LIL0aRaP2A,Parc Angrignon,4.5,11,45.444536,-73.601743
61,24,iADEKQUoAJoK9llrj1cEtg,Habitat 67,4.5,14,45.498989,-73.544182
44,21,GQpPwAKp_z7Xdh67rrCd3g,Parc la Fontaine,4.5,55,45.527054,-73.569465
