In [2]:
# import necessary libraries
import numpy as np
import pandas as pd
import json
import requests as req
import os

# set the path where the data is stored
assets_path = '../data/'

# read the city bikes data and select the columns 'Latitude' and 'Longitude'
cb = pd.read_csv(assets_path + 'citybikes.csv',
                 usecols=['Latitude', 'Longitude', 'Id'])

# set the Point of Interest (POI) types and their corresponding codes
POI = {'restaurants': 13065, 'bars': 13003,
       'parking': 19020, 'museums': 10027,
       'servicestations': 19007}


# set the search parameters: limit of results and search radius
limit = 50
radius = 200

# set the name of the city to search for POIs
city = 'Vancouver'


# Foursquare


- Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice.
- Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)
- Put your parsed results into a DataFrame

In [14]:
# Retrieve the Foursquare API key from environment variables
FOUR_SQUARE_API = os.getenv('FOURSQUARE')

# Define the base URL and endpoint for the Foursquare API
base_url = "https://api.foursquare.com"
endpoint = '/v3/places/search'

# Define a function to retrieve data from the Foursquare API
def get_fs_data(df, category_id):

    # Initialize an empty list to store the raw data from Foursquare
    foursquare_raw = []

    # Iterate over the rows in the input dataframe
    for index, row in df.iterrows():

        # Define the parameters for the API request
        params = f"?ll={row['Latitude']}%2C{row['Longitude']}&radius={radius}&categories={str(category_id)}&limit={limit}"
        full_url = f"{base_url}{endpoint}{params}"

        # Define the headers for the API request
        headers = {
            "accept": "application/json",
            "Authorization": f"{FOUR_SQUARE_API}"
        }

        # Send the API request and retrieve the results
        res = req.get(full_url, headers=headers).json()['results']

        # Iterate over the results and add the relevant data to the foursquare_raw list
        for i in res:
            try:
                distance = int(i.get('distance', None))
            except KeyError:
                print("Distance not found, skipping iteration...")
                continue
            if distance > radius:
                continue    
            foursquare_raw.append({
                "fsq_id": i['fsq_id'],
                "category_id": category_id,
                "chains": i['chains'],
                "distance": distance,
                "latitude": float(i['geocodes']['main']['latitude']),
                "longitude": float(i['geocodes']['main']['longitude']),
                "location_country": i.get('location', {}).get('country', None),
                "location_cross_street": i.get('location', {}).get('cross_street', None),
                "location_formatted_address": i['location']['formatted_address'],
                "location_locality": i.get('location', {}).get('locality', None),
                "location_postcode": i.get('location', {}).get('postcode', None),
                "location_region": i.get('location', {}).get('region', None),
                "location_timezone": i.get('timezone', None),
                "name": i['name'],
                "bike_station_id": row['Id'],
            })

    # Return the raw data from Foursquare
    return foursquare_raw


In [15]:
# Calling a function 'get_fs_data' to retrieve financial statement data and creating a dataframe 'fs_data' to store it
fs_data = pd.DataFrame(get_fs_data(cb,POI['restaurants']))

# Saving the dataframe 'fs_data' as a CSV file to the specified path
fs_data.to_csv(assets_path + 'fs_data.csv', index=False)

# Reading the saved CSV file from the specified path and storing the data in the dataframe 'fs_data'
fs_data = pd.read_csv(assets_path + 'fs_data.csv')

# Displaying the first few rows of the dataframe 'fs_data'
fs_data.head()

Unnamed: 0,fsq_id,category_id,chains,distance,latitude,longitude,location_country,location_cross_street,location_formatted_address,location_locality,location_postcode,location_region,location_timezone,name,bike_station_id
0,4c05d5ec761ac9b6fc2a2074,13065,[],12,49.281113,-123.104179,CA,at Hastings St.,"428 Carrall St (at Hastings St.), Vancouver BC...",Vancouver,V6B 2J7,BC,America/Vancouver,Calabash Bistro,97b25faf822b93fdbf72da1d594c0288
1,4fae01d3e4b0861eac7a985a,13065,[],80,49.281911,-123.10449,CA,btwn Cordova & Hastings,"350 Carrall St (btwn Cordova & Hastings), Vanc...",Vancouver,V6B 2J3,BC,America/Vancouver,Pidgin,97b25faf822b93fdbf72da1d594c0288
2,4dc9a94cd22d2af63302269c,13065,[],114,49.282251,-123.104519,CA,btwn Cordova & Hastings,"315 Carrall St (btwn Cordova & Hastings), Vanc...",Vancouver,V6B 2J4,BC,America/Vancouver,Nelson the Seagull,97b25faf822b93fdbf72da1d594c0288
3,5165db75e4b09027efef5d80,13065,[],112,49.282194,-123.104514,CA,btwn Cordova & Hastings,"319 Carrall St (btwn Cordova & Hastings), Vanc...",Vancouver,V6B 2J4,BC,America/Vancouver,East Van Roasters,97b25faf822b93fdbf72da1d594c0288
4,52f53570498ee24640cf0d17,13065,[],190,49.282729,-123.104884,CA,btwn Abbott & Carrall,"15 Cordova St W (btwn Abbott & Carrall), Vanco...",Vancouver,V6B 1C8,BC,America/Vancouver,Tacofino,97b25faf822b93fdbf72da1d594c0288


# Yelp


- Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice.
- Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)
- Put your parsed results into a DataFrame


In [22]:
# Retrieving Yelp API key from environment variables
YELP_API = os.getenv('YELP')

# Setting base URL and endpoint for Yelp API
base_url = "https://api.yelp.com"
endpoint = '/v3/businesses/search'

# Defining function to retrieve Yelp data
def get_yelp_data(df, category_name):
    # Initializing empty list to store Yelp data
    yelp_raw = []

    for index, row in df.iterrows():
            # Building API query parameters
            params = f"?latitude={row['Latitude']}&longitude={row['Longitude']}&radius={radius}&categories={category_name}&sort_by=best_match&limit={limit}"
            # Combining base URL, endpoint, and query parameters to form full API URL
            full_url = f"{base_url}{endpoint}{params}"
            # Adding authorization header with Yelp API key
            headers = {
                "accept": "application/json",
                "Authorization": f"Bearer {YELP_API}"
            }
            # Making API request and retrieving JSON response
            res = req.get(full_url, headers=headers).json()['businesses']
            
            # Looping through each business in the response and adding relevant data to the list
            for i in res:
                
                try:
                    distance = int(i.get('distance', None))
                except KeyError:
                    print("Distance not found, skipping iteration...")
                    continue
                if distance > radius:
                    continue             
                # try:
                #     price = i['price']
                # except KeyError:
                #     print(json.dumps(i, indent=6))
                #     price = None
                yelp_raw.append({
                    'name': i['name'],
                    "yelp_id": i.get('id', None),
                    "category_id": POI[category_name],
                    'rating': float(i.get('rating', None)),
                    'review_count': int(i.get('review_count', None)),
                    'price': i.get('price', None),
                    'distance': distance,
                    "latitude": float(i.get('coordinates', {}).get('latitude', None)),
                    "longitude": float(i.get('coordinates', {}).get('longitude', None)),
                    "bike_station_id": row['Id'],
                })

    # Returning the list of Yelp data
    return yelp_raw


In [23]:
# Create a pandas DataFrame object by calling the `get_yelp_data()` function, passing in two arguments: 
# `cb` (a Yelp API client object) and `POI` (a point of interest location).
yelp_data = pd.DataFrame(get_yelp_data(cb,'restaurants'))

# Save the DataFrame as a CSV file, excluding the index column, to the specified file path in the assets directory.
yelp_data.to_csv(assets_path + 'yelp_data.csv', index=False)

# Read the CSV file containing Yelp data into a new DataFrame object.
yelp_data = pd.read_csv(assets_path + 'yelp_data.csv')

# Display the first 5 rows of the DataFrame to check that data was loaded successfully.
yelp_data.head()


Unnamed: 0,name,yelp_id,category_id,rating,review_count,price,distance,latitude,longitude,bike_station_id
0,Calabash Bistro,nNhGmSP8xcBAWokkNv0m1w,13065,4.0,285,$$,23,49.281004,-123.104228,97b25faf822b93fdbf72da1d594c0288
1,PiDGiN,H4h_nwZW7f2Yj9ONdzGxHQ,13065,4.0,315,$$$,83,49.281907,-123.104189,97b25faf822b93fdbf72da1d594c0288
2,Aiyaohno Cafe,JoUEZWB0y7tREmRPVJj24A,13065,4.5,9,,28,49.28108,-123.1049,97b25faf822b93fdbf72da1d594c0288
3,Caveman Cafe,ljnjap6GNVMvS1DNzgDOYA,13065,4.5,270,$$,194,49.280802,-123.107035,97b25faf822b93fdbf72da1d594c0288
4,Di Beppe,G42wPpAtYNSoX5QxEt6pVg,13065,4.0,133,$$,134,49.282377,-123.1045,97b25faf822b93fdbf72da1d594c0288


# Comparing Results


Which API provided you with more complete data? Provide an explanation.


- FS provides only companies names, categories and coordinates whereas YELP gives basic coordinates to find place and on top of that rating, review count, price category and others(not included)
- FS returns more data, but Yelp's data is more comprehensive.

In [63]:
fs_data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 632 entries, 0 to 631
Data columns (total 15 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   fsq_id                      632 non-null    object 
 1   category_id                 632 non-null    int64  
 2   chains                      632 non-null    object 
 3   distance                    632 non-null    int64  
 4   latitude                    632 non-null    float64
 5   longitude                   632 non-null    float64
 6   location_country            632 non-null    object 
 7   location_cross_street       301 non-null    object 
 8   location_formatted_address  632 non-null    object 
 9   location_locality           632 non-null    object 
 10  location_postcode           610 non-null    object 
 11  location_region             632 non-null    object 
 12  location_timezone           470 non-null    object 
 13  name                        632 non

In [64]:
yelp_data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 526 entries, 0 to 525
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   name             526 non-null    object 
 1   yelp_id          526 non-null    object 
 2   category_id      526 non-null    int64  
 3   rating           526 non-null    float64
 4   review_count     526 non-null    int64  
 5   price            308 non-null    object 
 6   distance         526 non-null    float64
 7   latitude         526 non-null    float64
 8   longitude        526 non-null    float64
 9   bike_station_id  526 non-null    object 
dtypes: float64(4), int64(2), object(4)
memory usage: 41.2+ KB


Get the top 10 restaurants according to their rating


In [65]:
yelp_data.sort_values('rating', ascending=False).head(10)


Unnamed: 0,name,yelp_id,category_id,rating,review_count,price,distance,latitude,longitude,bike_station_id
313,The Golden Horn Turkish Bakery & Cafe,rZNgrcmNQWqxS54FiNvxJw,13065,5.0,9,,45.371624,49.259811,-123.115283,07b13214cb69f2ec007d888450f3d6bc
14,Adriatic Divine Sandwiches,i2p09JzKNnHsy_QwxJvKnQ,13065,5.0,1,,154.080927,49.281616,-123.102443,97b25faf822b93fdbf72da1d594c0288
28,Juicy Joe’s,wyp6wDEkl7zMlrcP9pIalA,13065,5.0,1,,192.950886,49.26599,-123.138767,57e928d8c30fab0ebffb3ede850f89d6
117,Tamam To Go,HwdvolydGGGqQSpheBrNsA,13065,5.0,1,,175.872023,49.27461,-123.06918,b562f08327af465e4531e20226789489
25,Carnitas El Rolys,4yivO9_j79NhBXMta0suvg,13065,5.0,1,,60.758199,49.267359,-123.141279,57e928d8c30fab0ebffb3ede850f89d6
24,The Frying Irishman,0vjKqJuAM69B8psdq2NUoA,13065,5.0,1,,60.758199,49.26736,-123.14128,57e928d8c30fab0ebffb3ede850f89d6
146,Manoush'eh,K1nbiOrySlw_-XG-3NmErQ,13065,5.0,225,$,145.373687,49.276671,-123.125701,e9b37f2d9b7b2e2e3ade73f13acb69b2
132,VV Tapas Lounge,6qrRd4bXgVcJjXQEnpfi-Q,13065,5.0,11,,118.246633,49.281163,-123.083047,b428c0e64d9b9fdb3bf8e85c95ffdf6b
260,Livni Café & Bakery,tUh4NuINdnkbVF16iPc2wQ,13065,5.0,4,,64.821245,49.264018,-123.155748,2681455ba2d97283998a806b14d3e95c
253,Gong cha,zSI_0yNVgHyrcgV3rciOvg,13065,5.0,1,,168.117529,49.27905,-123.11695,0e0aef6fabaa808b3a3cef24657257a4
