In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# imports libraries to access foursquare and yelp data
import numpy as np
import pandas as pd
import json
import requests
import sys

In [3]:
# define the data path:
data_path = '../data/'

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [4]:
# define the task:

# define a dictionary to store the POIs around each station
# since each POI may have multiple categories from the API, we will use the self-defined category_name as well
# documentation here: https://location.foursquare.com/places/docs/categories

# 'Restaurant': 13065
# 'Bar': 13003
# 'Park': 16032
# 'Transport Hub': 19030
# 'Fuel Station': 19007
# 'EV Charging Station' :19006
# 'Parking': 19020

# Reference:
# dict_POI_yelp = {'Restaurant': 'restaurants, All', 'Bar': 'bars, All', 'Park': 'parks, All', 
#                  'Transport Hub': 'transport, All', 'Fuel Station': 'servicestations, All', 'EV Charging Station': 'evchargingstations, All'}

dict_POI_FS = {'Restaurant': 13065, 'Bar': 13003, 'Park': 16032, 'Transport Hub': 19030, 'Fuel Station': 19007, 'EV Charging Station': 19006, 'Parking': 19020}

# define the radius and number of returns for each query
radius_value = 1000
return_limit = 50


In [5]:
# declare API keys as global variables, so they can be used in any function, they are not visible in the notebook.
global api_key_FS
# read the api key from the only value in the csv file
api_key_FS = pd.read_csv('~/foursquare api key.csv', header=None).values[0][0]

In [6]:
# read the target city's stations from the prvious step:
# DF_target_city_stations from the csv file
DF_target_city_stations = pd.read_csv(data_path+'target_city_stations_DF.csv')

In [7]:
# define the function to get the POIs around each station, by ll and POI category_ID:

def get_venues_ll_category_FS(ll,category_ID = '13003', radius_value=1000,result_FS_limit=50):
    '''
    This function gets the venues around a given location, by ll and category_ID.
    
    Parameters:
    ll: the location of the station, in the format of 'lat,lng'
    category_ID: the category_ID of the POI, default is '13003' for bars
    radius_value: the radius around the station to search for POIs, default is 1000m
    result_FS_limit: the maximum number of POIs to return, default is 50, which is also the maximum value allowed by the API free version.
    
    Reference: 
    https://developer.foursquare.com/docs/api-reference/venues/search/
    https://location.foursquare.com/places/docs/categories

    13065	Dining and Drinking > Restaurant
    13003	Dining and Drinking > Bar
    16032	Landmarks and Outdoors > Park
    19030	Travel and Transportation > Transport Hub
    19007	Travel and Transportation > Fuel Station
    19006	Travel and Transportation > Electric Vehicle Charging Station
    19020	Travel and Transportation > Parking

    Example:
    get_venues_ll_category_FS('43.6532,-79.3832','13003',1000,50)
    '''
    
    # requests URL:
    URL = 'https://api.foursquare.com/v3/places/search?'
    
    # course example:
    # https://data.compass.lighthouselabs.ca/days/w02d4/activities/160

    # define the headers and parameters:
    headers = {
        "accept": "application/json",
        "Authorization": api_key_FS
    }
    params = {}
    params['ll'] = ll
    params['radius'] = radius_value
    params['categories'] = category_ID
    params['limit'] = result_FS_limit
   
    response = requests.get(URL, headers=headers, params=params)
    data = response.json()
    return data



Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

In [8]:
# before the looping, delete any existing csv file with the name like:
for POI_name, POI_ID in dict_POI_FS.items():
    # delete the existing csv file
    # replace the space in the POI_name with '_', so the file name is valid
    d = data_path+'POI_FS_'+POI_name.replace(' ', '_')+'_'+str(POI_ID)+'.csv'
    !rm $d

# call the function to get the POIs around each station, for each stations, for each POI category:

# for each POI category, save the result into a csv file, named as POI_FS_{category_ID}_{category_Name}.csv

# for loop for dict_POI_FS, key and value:
for POI_name, POI_ID in dict_POI_FS.items():
    # print to separate the output of each POI category, in yellow color:
    print('\033[93m' + '\n===============================' + '\033[0m')
    print("name: ", POI_name, "id: ", POI_ID)


    df_FS_POIs_around_stations = pd.DataFrame()

    for i in range(len(DF_target_city_stations)): # debug # range(10): #
        
        print('\n===============================')      
        print('station: ', DF_target_city_stations['station_name'][i]) 
        print("name: ", POI_name, "id: ", POI_ID)# debug
        
        # One option to tranfer the ll as an augument to the function:
        # cat the longitude and latitude of the station into a string, in the format of 'lat,lng' from the DF_target_city_stations
        # cat_string = str(DF_target_city_stations['latitude'][i])+','+str(DF_target_city_stations['longitude'][i])
        # call the function to get the POIs around each station, by ll and POI category_ID:
        # try: 
        result_FS = get_venues_ll_category_FS(str(DF_target_city_stations['latitude'][i])+','+str(DF_target_city_stations['longitude'][i]), str(POI_ID), radius_value, return_limit)

        # However, it requires more calculation, and the function can be more general, so another option is to pass the ll as a string.
        
        df_FS_normalize_1run_method = pd.json_normalize(result_FS['results'], record_path=['categories'], meta=['fsq_id',  'distance', 'link', 'name',
        'timezone'], record_prefix='categories_', errors='ignore') # 'chains', column is not always returned, so ignore it.

        # add the station_ID to  df_FS_normalize_1run_method, and fill it with the station ID from DF_target_city_stations['station_id'][i]
        df_FS_normalize_1run_method['station_id'] = DF_target_city_stations['station_id'][i]

        # add a station_name collumn to  df_FS_normalize_1run_method, and fill it with the station name from DF_target_city_stations['station_name'][i]
        df_FS_normalize_1run_method['station_name'] = DF_target_city_stations['station_name'][i]
        
        # add a POI_ID collumn to  df_FS_normalize_1run_method, and fill it with the POI_ID from dict_POI_FS[POI_ID]
        df_FS_normalize_1run_method['POI_category_ID'] = dict_POI_FS[POI_name]
        
        # add a POI_name collumn to  df_FS_normalize_1run_method, and fill it with the POI_name from dict_POI_FS[POI_ID]
        df_FS_normalize_1run_method['POI_category_name'] = POI_name
        
        # concat the  df_FS_normalize_1run_method to df_FS_POIs_around_stations
        df_FS_POIs_around_stations = pd.concat([df_FS_POIs_around_stations, df_FS_normalize_1run_method], axis=0)
        print(df_FS_POIs_around_stations.shape)
            
        # except:
            # print the error type:
            # print('Error code: ', sys.exc_info()[0])
        


    # re-order the columns in df_FS_POIs_around_stations, make it permanent. inplace=True
    # if df_FS_POIs_around_stations is empty, then skip this step
    # try except: if exception, show the error message, but continue the loop
    try:
        df_FS_POIs_around_stations = df_FS_POIs_around_stations[['POI_category_name', 'POI_category_ID', 'station_id', 'station_name', 'fsq_id', 'name', 'distance', 'timezone', 'link', 'categories_id', 'categories_name', 'categories_icon.prefix', 'categories_icon.suffix']]    
    except:
        print('Error code: ', sys.exc_info()[0])
    # rename the "name" column to "POI_name"
    df_FS_POIs_around_stations.rename(columns={'name': 'POI_name'}, inplace=True)
        
    # save the df_FS_POIs_around_stations to a csv file with file name surfix as POI_FS_{category_ID}_{category_Name}.csv
    # if exists, overwrite it.
    df_FS_POIs_around_stations.to_csv(data_path+'POI_FS_'+POI_name.replace(' ', '_')+'_'+str(POI_ID)+'.csv', index=False)

print('\nFS loop Done!')
    
    
    
    
    
    


rm: ../data/POI_FS_Restaurant_13065.csv: No such file or directory
rm: ../data/POI_FS_Bar_13003.csv: No such file or directory
rm: ../data/POI_FS_Park_16032.csv: No such file or directory
rm: ../data/POI_FS_Transport_Hub_19030.csv: No such file or directory
rm: ../data/POI_FS_Fuel_Station_19007.csv: No such file or directory
rm: ../data/POI_FS_EV_Charging_Station_19006.csv: No such file or directory
rm: ../data/POI_FS_Parking_19020.csv: No such file or directory
[93m
name:  Restaurant id:  13065

station:  10th & Cambie
name:  Restaurant id:  13065
(89, 13)

station:  Yaletown-Roundhouse Station
name:  Restaurant id:  13065
(170, 13)

station:  Dunsmuir & Beatty
name:  Restaurant id:  13065
(264, 13)

station:  12th & Yukon (City Hall)
name:  Restaurant id:  13065
(351, 13)

station:  8th & Ash
name:  Restaurant id:  13065
(429, 13)

station:  Spyglass & Seawall
name:  Restaurant id:  13065
(517, 13)

station:  Stamps Landing
name:  Restaurant id:  13065
(598, 13)

station:  Ontario &

In [41]:
# read back the csv files, into dataframe with the same name as the csv file name

# Find all the names under the data_path and like 'POI_FS_*.csv'
file_name_list_FS = !ls $data_path | grep 'POI_FS_.*.csv'

# create a list of dataframes, named as POI_FS_* and read the corresponding csv files
for file in file_name_list_FS:
    shape_of_it = None
    # print the file path and name
    print(file[:-4]) # .replace(' ', '_')[:-4])
    # create a dataframe named as file.replace(' ', '_')
    # read the csv file with the file name, withou the '.csv' surfix
    try:
        exec(file[:-4] + ' = pd.read_csv(data_path+file)')
        # check the shape of the dataframe, save to a variable named as shape_of_it
        exec('shape_of_it = '+file[:-4]+'.shape')
        print('shape_of_it: ', shape_of_it)    
        # if shape_of_it is NoneType, then print the error message
        if shape_of_it[0] == 0:
            print('\033[1;31m' + file[:-4] + ' is Empty!' + '\033[0m')
    except:
        print('Error code: ', sys.exc_info()[0])
        print('\033[1;31m' + file[:-4] + ' is Empty!' + '\033[0m')
        



POI_FS_Bar_13003
shape_of_it:  (14053, 13)
POI_FS_EV_Charging_Station_19006
shape_of_it:  (156, 13)
POI_FS_Fuel_Station_19007
shape_of_it:  (824, 13)
POI_FS_Park_16032
shape_of_it:  (3739, 13)
POI_FS_Parking_19020
shape_of_it:  (2650, 13)
POI_FS_Restaurant_13065
shape_of_it:  (21012, 13)
POI_FS_Transport_Hub_19030
shape_of_it:  (4996, 13)


In [10]:
# There do exist some stations that do not have any POIs in certain categories around them, so the dataframes are not empty. 
# This need to be noticed when doing the analysis.


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [11]:
# define the task:

# define a dictionary to store the POIs around each station
# since each POI may have multiple categories from the API, we will use the self-defined category_name as well
# documentation here: https://docs.developer.yelp.com/docs/resources-categories

# Restaurants (restaurants, All)
# Bars (bars, All)
# Parks (parks, All)
# Transportation (transport, All)
# Gas Stations (servicestations, All)
# EV Charging Stations (evchargingstations, All)
# Parking (parking, All)

dict_POI_yelp = {'Restaurant': 'restaurants, All', 'Bar': 'bars, All', 'Park': 'parks, All', 
                 'Transport Hub': 'transport, All', 'Fuel Station': 'servicestations, All', 'EV Charging Station': 'evchargingstations, All', 'Parking': 'parking, All'}


# define the radius and number of returns for each query
radius_value = 1000
return_limit = 50

In [12]:
# declare global variables
global api_key_yelp

api_key_yelp = pd.read_csv('~/yelp api key.csv', header=None).values[0][0]

api_key_yelp = "Bearer " + api_key_yelp

In [13]:
# read the target city's stations from the prvious step:
# DF_target_city_stations from the csv file
DF_target_city_stations = pd.read_csv(data_path+'target_city_stations_DF.csv')

In [14]:
def get_venues_ll_category_yelp(latitude, longitude ,category, radius_value=1000,result_limit=50):
    '''
    This function is to query the venues around a given latitude and longitude, with a given category
    
    Parameters:
    latitude: float, the latitude of the location
    longitude: float, the longitude of the location
    radius_value: int, the radius of the query, in meters
    result_limit: int, the number of results to return, max is 50
    
    Reference:
    https://docs.developer.yelp.com/reference/v3_business_search
    https://docs.developer.yelp.com/docs/resources-categories

    # Restaurants (restaurants, All)
    # Bars (bars, All)
    # Parks (parks, All)
    # Transportation (transport, All)
    # Gas Stations (servicestations, All)
    # EV Charging Stations (evchargingstations, All)    
    # Parking (parking, All)
    
    Example:
    get_venues_ll_category_yelp(40.730610, -73.935242, 'bars, All', 1000, 50)
    '''    
    
    
    URL = "https://api.yelp.com/v3/businesses/search?"

    headers = {
        "accept": "application/json",
        "Authorization": api_key_yelp
    }
    params = {}
    params['latitude'] = latitude
    params['longitude'] = longitude
    params['radius'] = radius_value
    params['categories'] = category 
    params['limit'] = result_limit
    params['sort_by'] =  'distance' # 'best_match' # 'rating', 'review_count', 'distance'
   

    response = requests.get(URL, headers=headers, params = params) # requests using the params_string

    data = response.json()

    return data


In [15]:
# before the looping, delete any existing csv file with the name like:
for POI_name, POI_ID in dict_POI_yelp.items():
    # delete the existing csv file
    # replace the space in the POI_name with '_', so the file name is valid
    # replace the ',_' in the POI_ID with '_', so the file name is valid
    d = data_path+'POI_yelp_'+POI_name.replace(' ', '_')+'_'+POI_ID.replace(', ', '_')+'.csv'
    !rm $d

# call the function to get the POIs around each station, for each stations, for each POI category:

rm: ../data/POI_yelp_Restaurant_restaurants_All.csv: No such file or directory
rm: ../data/POI_yelp_Bar_bars_All.csv: No such file or directory
rm: ../data/POI_yelp_Park_parks_All.csv: No such file or directory
rm: ../data/POI_yelp_Transport_Hub_transport_All.csv: No such file or directory
rm: ../data/POI_yelp_Fuel_Station_servicestations_All.csv: No such file or directory
rm: ../data/POI_yelp_EV_Charging_Station_evchargingstations_All.csv: No such file or directory
rm: ../data/POI_yelp_Parking_parking_All.csv: No such file or directory


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

In [16]:
for POI_name, POI_ID in dict_POI_yelp.items():
    print('\033[93m' + '\n===============================' + '\033[0m')    
    print("name: ", POI_name, "id: ", POI_ID)
    
    # for each entry in DF_target_city_stations, find the POIs around it:
    df_yelp_POIs_around_stations = pd.DataFrame()

    for i in range(len(DF_target_city_stations)): # debug, only run for the first station # range(3) : # 

        print('\n===============================')       
        print('station: ', DF_target_city_stations['station_name'][i]) 
        print("name: ", POI_name, "id: ", POI_ID)# debug
        # try:
        result_yelp = get_venues_ll_category_yelp(DF_target_city_stations['latitude'][i], DF_target_city_stations['longitude'][i], POI_ID)

        df_yelp_normalize_1run_method = pd.json_normalize(result_yelp['businesses'], record_path=['categories'], meta=['id', 'alias', 'name', 'image_url', 'is_closed', 'url', 'review_count', 'rating', 'price', 'phone',
                                        'display_phone', 'distance'], record_prefix='categories_', errors='ignore') # 'transactions', column is not always returned and raising error, so ignore it.

        # add the station_ID to  df_yelp_normalize_1run_method, and fill it with the station ID from DF_target_city_stations['station_id'][i]
        df_yelp_normalize_1run_method['station_id'] = DF_target_city_stations['station_id'][i]

        # add a station_name collumn to  df_yelp_normalize_1run_method, and fill it with the station name from DF_target_city_stations['station_name'][i]
        df_yelp_normalize_1run_method['station_name'] = DF_target_city_stations['station_name'][i]
        
        # add a POI_ID collumn to  df_FS_normalize_1run_method, and fill it with the POI_ID from dict_POI_FS[POI_ID]
        df_yelp_normalize_1run_method['POI_category_ID'] = dict_POI_FS[POI_name]
        
        # add a POI_name collumn to  df_FS_normalize_1run_method, and fill it with the POI_name from dict_POI_FS[POI_ID]
        df_yelp_normalize_1run_method['POI_category_name'] = POI_name
        
        
        # concat the  df_yelp_normalize_1run_method to df_yelp_POIs_around_stations
        df_yelp_POIs_around_stations = pd.concat([df_yelp_POIs_around_stations, df_yelp_normalize_1run_method], axis=0)
        print(df_yelp_POIs_around_stations.shape)
            
        # except:
            # print('Error code: ', sys.exc_info()[0])

    
    try:
        df_yelp_POIs_around_stations = df_yelp_POIs_around_stations[['POI_category_name', 'POI_category_ID', 'station_id', 'station_name', 'id', 'alias', 'name',
                'image_url', 'is_closed', 'url', 'review_count', 'rating', 'price',
                'phone', 'display_phone', 'distance', 'categories_alias', 'categories_title']]
    except:
        print('Error code: ', sys.exc_info()[0])
    
    # rename columns: id -> POI_id, alias -> POI_alias, name -> POI_name
    df_yelp_POIs_around_stations = df_yelp_POIs_around_stations.rename(columns={'id': 'POI_id', 'alias': 'POI_alias', 'name': 'POI_name'})
    
    # save to a csv file corresponding to the POI category
    df_yelp_POIs_around_stations.to_csv(data_path+'POI_yelp_'+POI_name.replace(' ', '_')+'_'+POI_ID.replace(', ', '_').replace(' ', '_')+'.csv', index=False)



print('\nyelp loop Done!')

[93m
name:  Restaurant id:  restaurants, All

station:  10th & Cambie
name:  Restaurant id:  restaurants, All
(93, 18)

station:  Yaletown-Roundhouse Station
name:  Restaurant id:  restaurants, All
(193, 18)

station:  Dunsmuir & Beatty
name:  Restaurant id:  restaurants, All
(280, 18)

station:  12th & Yukon (City Hall)
name:  Restaurant id:  restaurants, All
(372, 18)

station:  8th & Ash
name:  Restaurant id:  restaurants, All
(465, 18)

station:  Spyglass & Seawall
name:  Restaurant id:  restaurants, All
(569, 18)

station:  Stamps Landing
name:  Restaurant id:  restaurants, All
(668, 18)

station:  Ontario & Seawall
name:  Restaurant id:  restaurants, All
(771, 18)

station:  Canada Place
name:  Restaurant id:  restaurants, All
(864, 18)

station:  Granville & Georgia
name:  Restaurant id:  restaurants, All
(957, 18)

station:  Pender & Burrard
name:  Restaurant id:  restaurants, All
(1040, 18)

station:  Marinaside & Davie
name:  Restaurant id:  restaurants, All
(1140, 18)

stat

In [17]:
# read back the data and check the shape:

# Find all the names under the data_path and like 'POI_yelp_*.csv'
file_name_list_yelp = !ls $data_path | grep 'POI_yelp_.*.csv'
file_name_list_yelp

# create a list of dataframes, named as POI_yelp_* and read the corresponding csv files
for file in file_name_list_yelp:
    shape_of_it = None
    # print the file path and name
    print(file[:-4]) # .replace(' ', '_')[:-4])
    # create a dataframe named as file.replace(' ', '_')
    # read the csv file with the file name, withou the '.csv' surfix
    try:
        exec(file[:-4] + ' = pd.read_csv(data_path+file)')
        # check the shape of the dataframe, save to a variable named as shape_of_it
        exec('shape_of_it = '+file[:-4]+'.shape')
        print('shape_of_it: ', shape_of_it)    
        # if shape_of_it is NoneType, then print the error message
        if shape_of_it[0] == 0:
            print('\033[1;31m' + file[:-4] + ' is Empty!' + '\033[0m')
    except:
        print('Error code: ', sys.exc_info()[0])
        print('\033[1;31m' + file[:-4] + ' is Empty!' + '\033[0m')
        


['POI_yelp_Bar_bars_All.csv',
 'POI_yelp_EV_Charging_Station_evchargingstations_All.csv',
 'POI_yelp_Fuel_Station_servicestations_All.csv',
 'POI_yelp_Park_parks_All.csv',
 'POI_yelp_Parking_parking_All.csv',
 'POI_yelp_Restaurant_restaurants_All.csv',
 'POI_yelp_Transport_Hub_transport_All.csv']

POI_yelp_Bar_bars_All
shape_of_it:  (15243, 18)
POI_yelp_EV_Charging_Station_evchargingstations_All
shape_of_it:  (99, 18)
POI_yelp_Fuel_Station_servicestations_All
shape_of_it:  (904, 18)
POI_yelp_Park_parks_All
shape_of_it:  (2902, 18)
POI_yelp_Parking_parking_All
shape_of_it:  (1023, 18)
POI_yelp_Restaurant_restaurants_All
shape_of_it:  (22723, 18)
POI_yelp_Transport_Hub_transport_All
shape_of_it:  (5120, 18)


In [74]:
# compare the shape of those dataframs by the variable names: _yelp, _FS_ generate a dataframe to store the shape of each dataframe

# extract the string between the 2nd '_' and the 3rd '_' from the keys of dict_POI_yelp, into a list keep the order
table_columns = [file_name_list_FS[i].split('_')[2] for i in range(len(file_name_list_FS))]

# table value to eval the shape of the dataframe with the name in the list: file_name_list_FS
table_value_FS = [eval(file_name_list_FS[i][:-4] + '.shape') for i in range(len(file_name_list_FS))]

table_value_yelp = [eval(file_name_list_yelp[i][:-4] + '.shape') for i in range(len(file_name_list_yelp))]

# create a dataframe using: table_columns as columns; table_value_FS and table_value_yelp as values
# table_columns 's 1st row the table_value_FS, 2nd row the table_value_yelp

table_compare = pd.DataFrame([table_value_FS, table_value_yelp],columns=table_columns, index=['FS', 'yelp'])

# add one column 'Total' to: sum the first elements of each tuple as the first elements of the result tuple, and average of the second element of each tuple as the second element of the result tuple
table_compare['Total'] = table_compare.apply(lambda x: (sum([x[i][0] for i in range(len(x))]), round(sum([x[i][1] for i in range(len(x))])/len(x))), axis=1)

# in a new table called table_compare_readable, compare the two values in the two rows in each column, if the first element of tuptle is larger,write a string "win" in the cell, else write a string "lose" in the cell
# table_compare_readable = table_compare.copy()
table_compare_readable = table_compare.copy()
for i in range(len(table_compare.columns)):
    if table_compare.iloc[0,i][0] > table_compare.iloc[1,i][0]:
        table_compare_readable.iloc[0,i] = 'win'
        table_compare_readable.iloc[1,i] = 'lose'
    else:
        table_compare_readable.iloc[0,i] = 'lose'
        table_compare_readable.iloc[1,i] = 'win'

# count the win or lose each row get, and add a new column 'Total' to the table_compare_readable
table_compare_readable['Total WinLose'] = table_compare_readable.apply(lambda x: (sum([x[i] == 'win' for i in range(len(x))]), sum([x[i] == 'lose' for i in range(len(x))])), axis=1)

table_compare_readable

Unnamed: 0,Bar,EV,Fuel,Park,Parking,Restaurant,Transport
FS,"(14053, 13)","(156, 13)","(824, 13)","(3739, 13)","(2650, 13)","(21012, 13)","(4996, 13)"
yelp,"(15243, 18)","(99, 18)","(904, 18)","(2902, 18)","(1023, 18)","(22723, 18)","(5120, 18)"


Unnamed: 0,Bar,EV,Fuel,Park,Parking,Restaurant,Transport,Total
FS,"(14053, 13)","(156, 13)","(824, 13)","(3739, 13)","(2650, 13)","(21012, 13)","(4996, 13)","(47430, 13)"
yelp,"(15243, 18)","(99, 18)","(904, 18)","(2902, 18)","(1023, 18)","(22723, 18)","(5120, 18)","(48014, 18)"


Unnamed: 0,Bar,EV,Fuel,Park,Parking,Restaurant,Transport,Total,Total WinLose
FS,lose,win,lose,win,win,lose,lose,lose,"(3, 5)"
yelp,win,lose,win,lose,lose,win,win,win,"(5, 3)"


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

* By similar query parameters, we can see that the yelp data has 18 columns, while the FS data has 13 columns. It means yelp return more data columns than FS, for example, the yelp data has the column 'review_count' and 'rating'.

* By comparing the number of returns in each category and subtotal, we can get the win/lose summary for the two sites. Yelp wins in 4 categories and the subtotal, while FS wins in 3 categories and loses the subtotal.

* Therefore, we can conclude that the yelp data is more comprehensive than the FS data.

Get the top 10 restaurants according to their rating

In [87]:
# There are too many 5 star rating restaurants, so the review_count is needed to be considered.
# sort the review and POI_yelp_Restaurant_restaurants_All, by rating, review_count. list the three columns: POI_name, rating, review_count
# due to the duplicated categories, subset it by the columns: 'POI_name', 'rating', 'review_count'. Then drop the duplicated rows. inplace = False
POI_yelp_Restaurant_restaurants_All[['POI_name', 'rating', 'review_count']].drop_duplicates().sort_values(by=['rating', 'review_count'], ascending=False).head(10)




Unnamed: 0,POI_name,rating,review_count
2251,Manoush'eh,5.0,224
3176,Incognito Coffee,5.0,146
15002,Number e food,5.0,135
1537,Smithe Salad,5.0,62
3600,Mazahr Lebanese Kitchen,5.0,38
10470,The Garden Strathcona,5.0,36
10505,Caffe La Tana,5.0,30
1891,Arike Restaurant,5.0,25
750,Blaze Gourmet Burgers,5.0,19
4933,Good Dogs Plant Foods,5.0,19
