In [1]:
# imports

import requests
import pandas as pd


# Foursquare

## Julie's Notes:
From Foursquare categories list (https://location.foursquare.com/places/docs/categories), I compiled the following categories on which I will do a count of number of each

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

# Yelp

## Julie's Notes:

From Yelp categories list (https://docs.developer.yelp.com/docs/resources-categories), I compiled the following categories (sometimes combined) which I will send with the "categories" query parameter for each bike station location:

<img src='../images/yelp_categories.png'>




Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [5]:
import requests
import os

YELP_KEY = os.getenv('YELP_API_KEY')

In [60]:
# Substitute this until we can get it working
import json
json_file = open('C:/Users/raref/Lighthouse/W05D02_Data_Wrangling_Challenge_Walkthrough/Other_data_types_exercise/payload_postman_cambie_yelp_coffee.json', 'r')
payload_dict = json.load(json_file)
payload_dict

{'businesses': [{'id': '6iOAgzJ0DRZNSKA3FSrrOg',
   'alias': 'la-taqueria-pinche-taco-shop-vancouver',
   'name': 'La Taqueria Pinche Taco Shop',
   'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/fjhIj3XKuQ4mquD4Mg8OoQ/o.jpg',
   'is_closed': False,
   'url': 'https://www.yelp.com/biz/la-taqueria-pinche-taco-shop-vancouver?adjust_creative=5xMXIdPreqyQMiwJq_zdCg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=5xMXIdPreqyQMiwJq_zdCg',
   'review_count': 683,
   'categories': [{'alias': 'mexican', 'title': 'Mexican'}],
   'rating': 4.0,
   'coordinates': {'latitude': 49.263559, 'longitude': -123.112736},
   'transactions': [],
   'price': '$$',
   'location': {'address1': '2450 Yukon Street',
    'address2': '',
    'address3': '',
    'city': 'Vancouver',
    'zip_code': 'V5Z 3V6',
    'country': 'CA',
    'state': 'BC',
    'display_address': ['2450 Yukon Street',
     'Vancouver, BC V5Z 3V6',
     'Canada']},
   'phone': '+16045582549',
   'display_phone': 

In [61]:
# Define function that will make the GET request to Yelp
def yelp_get_request_business_search(station_latitude, station_longitude, radius, categories, API_KEY):

    base_yelp_endpoint = 'https://api.yelp.com/v3'
    business_search = '/businesses/search'

    # Query Parameters
    limit = 50  # Always get as many as allowed
    sort_by = 'best_match'

    # Craft the request_url:
    request_url = base_yelp_endpoint + business_search + \
    '?' + \
    'latitude=' + str(station_latitude) + \
    '&longitude=' + str(station_longitude) + \
    '&radius=' + str(radius) + \
    '&categories=' + categories + \
    '&limit=' + str(limit) + \
    '&sort_by=' + sort_by

    header_dict = {
        'accept': 'application/json',
        'Authorization' : 'Bearer ' + API_KEY
        }
    
    print(f"Inside yelp_get_request: request_url = {request_url}, header_dict = {header_dict}")

    # Make the call, get response out
    response = requests.get(request_url, headers=header_dict)

    # Return the payload_dict to caller
    return response.json()
    

# Define function that will create the default yelp_dict for each GET request:
def create_default_yelpdict(yelpdict):
    yelpdict.clear()
    yelpdict['station_id']= []
    yelpdict['place_id'] = []
    yelpdict['name'] = []
    yelpdict['distance'] = []
    yelpdict['address'] = []
    yelpdict['city'] = []
    yelpdict['postal'] = []
    yelpdict['review_count'] = []
    yelpdict['rating'] = []
    yelpdict['category_id'] = []
    yelpdict['category_name'] = []
    yelpdict['query_categories'] = []


# Define function that will parse the JSON-formatted response
#def yelpdict_from_response(stationid, category_id, category_name, yelpdict, jsonpayload):
def yelpdict_from_response(stationid, yelpdict, jsonpayload, query_categories):

    #print(f"I'm in yelpdict_from_response, and yelpdict is:\n{yelpdict}")
    businesses_array = jsonpayload['businesses']
    for biz in businesses_array:
        yelpdict['station_id'].append(stationid)
        yelpdict['place_id'].append(biz['id'])
        yelpdict['name'].append(biz['name'])
        yelpdict['distance'].append(biz['distance'])
        yelpdict['address'].append(biz['location']['address1'])
        yelpdict['city'].append(biz['location']['city'])
        yelpdict['postal'].append(biz['location']['zip_code'])
        yelpdict['review_count'].append(biz['review_count'])
        yelpdict['rating'].append(biz['rating'])

        categories_array = biz['categories']
        alias_string = ''
        title_string = ''
        for entry in categories_array:
            alias_string += entry['alias'] + '|'
            title_string += entry['title'] + '|'
        
        alias_string = alias_string[:-1]
        title_string = title_string[:-1]
        yelpdict['category_id'].append(alias_string)
        yelpdict['category_name'].append(title_string)
        yelpdict['query_categories'].append(query_categories)

        # print(f"{stationid}")
        # print(f"{biz['id']}")
        # print(f"{biz['name']}")
        # print(f"{biz['distance']}")
        # print(f"{biz['location']['address1']}")
        # print(f"{biz['location']['city']}")
        # print(f"{biz['location']['zip_code']}")
        # print(f"{biz['review_count']}")
        # print(f"{biz['rating']}")
        # print(f"{category_id}")
        # print(f"{category_name}")

    return yelpdict

In [39]:
mystation_latitude = 49.274566
mystation_longitude = -123.121817
mystation_id = '32603a87cfca71d0f7dfa3513bad69d5'
myradius=1000
mycategories='beaches,parks,bicyclepaths,mountainbiking,museums,hostels,hotels'
myresponse = yelp_get_request_business_search(mystation_latitude, mystation_longitude, myradius, mycategories, YELP_KEY)
print(myresponse)
#yelp_get_request_business_search(station_latitude, station_longitude, radius, categories, API_KEY)

Inside yelp_get_request: request_url = https://api.yelp.com/v3/businesses/search?latitude=49.274566&longitude=-123.121817&radius=1000&categories=beaches,parks,bicyclepaths,mountainbiking,museums,hostels,hotels&limit=50&sort_by=best_match, header_dict = {'accept': 'application/json', 'Authorization': 'Bearer 6xI0usznZmRlsdcnN1DIEq7JF-iQbgkdkbfFbMrM99-4E2DWENHZnL2WmBZ0iUTcLxYksUg1AOMK9Q3FZzlJkDBXoMPTO6ngG_8FvSguqNZ3tBvYVDLT5Z7AHZsxZXYx'}
{'businesses': [{'id': 'DIf1ux1zR8cHp9neCEoyYg', 'alias': 'opus-hotel-vancouver-vancouver-2', 'name': 'OPUS Hotel Vancouver', 'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/NY1Y1pQyXN9B49KodFqZHA/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/opus-hotel-vancouver-vancouver-2?adjust_creative=5xMXIdPreqyQMiwJq_zdCg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=5xMXIdPreqyQMiwJq_zdCg', 'review_count': 179, 'categories': [{'alias': 'hotels', 'title': 'Hotels'}], 'rating': 4.5, 'coordinates': {'latitude': 49.274667

In [42]:
type(myresponse)
bus_array = myresponse['businesses']
len(bus_array)

50

In [81]:
# Define Yelp query string categories / category names that will be sent in GET requests
# Adjusted - this was too many API calls
# category_mapping = {
#     "beaches,parks": "Beaches, Parks",
#     "bicyclepaths,mountainbiking": "Bicycle Paths, Mountain Biking",
#     "museums": "Museums",
#     "coffee": "Coffee & Tea",
#     "juicebars": "Juice Bars & Smoothies",
#     "hostels,hotels": "Hotels, Hotels"
# }

# Temporary 2nd YELP KEY with jtwleung:  TODO:  Don't forget to delete below
#YELP_KEY = 'wHKvbUKt45iPv0vitouFdnQ5M75dq8bw8WD1RfBpIvE11xMwXATMRbsQ80iL9nKpVJU4JZKbINPdznr9DaMDbeWQ_prpxl3amhNLSVoaWBaOPTiSQen3SEZ0oCEyZXYx'

# Categories
#categories = ['beaches,parks,bicyclepaths,mountainbiking,museums,hostels,hotels', 'coffee']
categories = ['beaches,parks,bicyclepaths,mountainbiking', 'museums', 'hostels,hotels']

# Radius
radius = 1000

# Load the citybikes dataframe
stations_df = pd.read_csv('../data/citybikes_vancouver.csv')

#print(f"stations_df.shape[0] = {stations_df.shape[0]}")

# Generate the list of station_ids
station_ids_list = stations_df['id'].tolist()
station_ids_list = sorted(station_ids_list)

# Break into partition sizes to maximize usage of Yelp's 500 daily call limit (resets at 6pm Mountain == midnight UTC) and 3 calls per station_id in 245 stations
partition_1_size = 122
partition_2_size = 36

partition_1 = station_ids_list[:partition_1_size]
partition_2 = station_ids_list[partition_1_size: partition_1_size + partition_2_size]
partition_3 = station_ids_list[partition_1_size + partition_2_size:]  # is 87 ids long
#print (f"len(station_ids_list) = {len(station_ids_list)}")

# Set up an empty dataframe 'rolling_df' which will collect each individual API call into the larger dataframe
rolling_df = pd.DataFrame()

# temp list
#station_id_list = ['7a19c49f486d7c0c02b3685d7b240448', '32603a87cfca71d0f7dfa3513bad69d5']

# for station_id in station_id_list:
#     for category_id, category_name in category_mapping.items():
#         print(f"category_id = {category_id}, category_name={category_name}")
#         # payload_dict = call GET REQUEST function
#         yelpdict = dict()
#         create_default_yelpdict(yelpdict)
#         #print(f"BEFORE! yelpdict = {yelpdict}")
#         yelpdict_from_response(station_id, category_id, category_name, yelpdict, payload_dict)
#         #|print(f"yelpdict is now:\n{yelpdict}")
#         temp_df = pd.DataFrame(yelpdict)
#         rolling_df = pd.concat([rolling_df, temp_df], ignore_index=True)

#for station_id in station_id_list:  # Can't use this because Yelp's daily limit
#for station_id in partition_1:  # Used this on 2023-10-19
#for station_id in partition_2:  # Used this on 2023-10-19 to hit remainder of calls for the day
for station_id in partition_3:  # Used this on 2023-10-20 to hit remainder of calls for the day
    print(f"************** New Station! **************")
    for entry_category in categories:
        filt_station = (stations_df['id'] == station_id)
        station_lat = stations_df.loc[filt_station]['lat'].values[0]
        station_long = stations_df.loc[filt_station]['long'].values[0]
        print (f"Working on station_id: {station_id}, categories being sent is: {entry_category}")
        print (f"     station_lat, station_long = ({station_lat}, {station_long})")

        # Call the function to do YELP GET request from the API
        #payload_dict = yelp_get_request_business_search(station_lat, station_long, radius, entry_category, YELP_KEY)
        
        # Define/reset the yelpdict dictionary, which holds the parsed JSON from the REST GET API call
        yelpdict = dict()
        create_default_yelpdict(yelpdict)

        # Parse the JSON from the payload from the API call
        yelpdict_from_response(station_id, yelpdict, payload_dict, entry_category)
        
        # Create a temp_df dataframe
        temp_df = pd.DataFrame(yelpdict)

        # Add the just-generated dataframe from this singular previous API call, to the cumulative "rolling_df" dataframe for the entirety of the station_ids
        rolling_df = pd.concat([rolling_df, temp_df], ignore_index=True)


print(rolling_df.shape)
rolling_df['category_id'].value_counts()
rolling_df.head(100)



************** New Station! **************
Working on station_id: a74744ce4bb7ea2aa9f406ac8bff95d8, categories being sent is: beaches,parks,bicyclepaths,mountainbiking
     station_lat, station_long = (49.280977, -123.035969)
Inside yelp_get_request: request_url = https://api.yelp.com/v3/businesses/search?latitude=49.280977&longitude=-123.035969&radius=1000&categories=beaches,parks,bicyclepaths,mountainbiking&limit=50&sort_by=best_match, header_dict = {'accept': 'application/json', 'Authorization': 'Bearer wHKvbUKt45iPv0vitouFdnQ5M75dq8bw8WD1RfBpIvE11xMwXATMRbsQ80iL9nKpVJU4JZKbINPdznr9DaMDbeWQ_prpxl3amhNLSVoaWBaOPTiSQen3SEZ0oCEyZXYx'}
Working on station_id: a74744ce4bb7ea2aa9f406ac8bff95d8, categories being sent is: museums
     station_lat, station_long = (49.280977, -123.035969)
Inside yelp_get_request: request_url = https://api.yelp.com/v3/businesses/search?latitude=49.280977&longitude=-123.035969&radius=1000&categories=museums&limit=50&sort_by=best_match, header_dict = {'accept': '

Unnamed: 0,station_id,place_id,name,distance,address,city,postal,review_count,rating,category_id,category_name,query_categories
0,a74744ce4bb7ea2aa9f406ac8bff95d8,EByyWFFnSnrmQZct_j6MBg,New Brighton Park,948.481468,3201 New Brighton Road,Vancouver,V5K 5J7,16.0,4.5,beaches|parks,Beaches|Parks,"beaches,parks,bicyclepaths,mountainbiking"
1,a74744ce4bb7ea2aa9f406ac8bff95d8,_BkYJw9plA2tYLoToa9mGQ,Il Giardino Italiano,78.531770,2901 E Hastings St,Vancouver,V5K 5J1,2.0,4.0,parks,Parks,"beaches,parks,bicyclepaths,mountainbiking"
2,a74744ce4bb7ea2aa9f406ac8bff95d8,_UWGRNA9Jkbt72_3NXpNcw,Hastings Park,291.862881,2901 E Hastings Street,Vancouver,V5K 5J1,9.0,4.0,parks,Parks,"beaches,parks,bicyclepaths,mountainbiking"
3,a74744ce4bb7ea2aa9f406ac8bff95d8,d4PXZQGnXuCE7akD7PQ_TA,Rupert Park Pitch & Putt,959.110609,3402 Charles Street,Vancouver,V5K 5H9,9.0,4.0,parks|golf,Parks|Golf,"beaches,parks,bicyclepaths,mountainbiking"
4,a74744ce4bb7ea2aa9f406ac8bff95d8,LBVCFvtVtDwzOc2R0EqwfA,Adanac Park,875.246088,1025 Boundary Road,Vancouver,V5K 4T2,1.0,3.0,parks,Parks,"beaches,parks,bicyclepaths,mountainbiking"
...,...,...,...,...,...,...,...,...,...,...,...,...
95,aa1cbf97abc3cd50515bc54633a9bb2a,8snL4a-AN2JHWxgK62riUQ,Hyatt Regency Vancouver,585.993327,655 Burrard Street,Vancouver,V6C 2R7,302.0,4.0,hotels|venues,Hotels|Venues & Event Spaces,"hostels,hotels"
96,aa1cbf97abc3cd50515bc54633a9bb2a,E1FGWviFkHNH9NfnxAEaVw,Executive Hotel LeSoleil,416.380860,567 Hornby Street,Vancouver,V6C 2E8,88.0,4.5,hotels,Hotels,"hostels,hotels"
97,aa1cbf97abc3cd50515bc54633a9bb2a,KwMakQ-ozY7U7OySlRb-Kw,L'Hermitage Hotel,668.474308,788 Richards Street,Vancouver,V6B 3A4,149.0,4.5,hotels,Hotels,"hostels,hotels"
98,aa1cbf97abc3cd50515bc54633a9bb2a,1ZWwVOwu2BI7v7Hh-PlDGQ,Vancouver Marriott Pinnacle Downtown Hotel,651.654435,1128 West Hastings Street,Vancouver,V6E 4R5,185.0,4.0,hotels,Hotels,"hostels,hotels"


In [50]:
len(station_ids_list)
print(station_ids_list)

['7a19c49f486d7c0c02b3685d7b240448', '32603a87cfca71d0f7dfa3513bad69d5', '6d42fa40360f9a6b2bf641c7b8bb2862', '66f873d641d448bd1572ab086665a458', '485d4d24c803cfde829ab89699fed833', 'b07d513f87897cff7319a0e59d7e567c', 'd1c9556d92f329703229fb3e33899a3b', '95e624191c655f50e401d280cd39a9ad', '3e112f9fe877e0003780386359060ae6', '6993b3dbb0758927967592ea612a2b1e', 'bf8408067b0e0c963f3ff526977bcef3', 'd2244f1a81d317c76cabd65e81250835', '0b543fc4e694fe07a54dac48bb1b3390', 'fbb1d30d7f30b049873f5be5688563d4', '7181df50ee1d1a05559399211382b7c8', '2ef000bc9d4650d8cc6198c4d570b19f', 'b7683a86874e6300fd89f4cc0b075f57', '3367c1e1dbf73caf7c10e4c96a92f58d', '1d7e73b3321ebb6713774d376247d9f9', '664502c28e8147615f17b8ea355f40d0', '6c24a62b5ee0a2a8014eb0dfb6d40025', 'fad9ebcf614dd1a72593a34072ff76f8', '3a4131fa94946d93d3529136efd62413', '25318ec3e06fae1397a3a2f645104149', '444991edc9550be0908f032da5e3c95f', '434b6f4e7fc30198f901b60fc905f609', 'f97d5bf41f35aa181288429fb52e3488', 'fef69fb400210d861107a61db9

In [51]:
print(sorted(station_ids_list))

['00fa94ad698dc4a9e4d708d6fd32f294', '012d3e06901cc222b1c2cf0a2ace3a29', '029a505bd4422a1afd127987757f71a6', '0438114d2e6b96118de69bc9775bb21e', '0459b7e93703980b853cd65a9dc60596', '04f64fde5ba1ebde8ad78c74914adb34', '06487275cd19694152824c7c3088e8ff', '064a7b39b1dbc4bfc4cf8f454e360c57', '066ecc3584913fa035ec1417323beb80', '06bdc4d55f34b2a491305534fd5defda', '07b13214cb69f2ec007d888450f3d6bc', '085a0e120a08a80a58c0ae0ff9da565a', '08c7a90bf297fde8c1f1156db13987d4', '0a43242bee10cd33f539af2c1bf5ecbf', '0b543fc4e694fe07a54dac48bb1b3390', '0c42f45e4a14957ad4a6d521d0ba8bc3', '0c9ad3e3703e5869b82619ecdf54f43e', '0cee06d7d10f7d3162cea03d22b7beed', '0cf6cd1809fa7f510812f5b052f578c4', '0d1b5c62f8d22e3650811df55596e25b', '0e0aef6fabaa808b3a3cef24657257a4', '13021bba7bbeca1ccd8726ca17da59e6', '1648ead5ea617d2cea115869d9516f5e', '1675a77b9af656faf527b4e6de53e63b', '175a074bf65455d1633639511dc129c8', '17a3320a52bcc62161908967f8a06613', '198810b9b0af5d229d1b6fa12c620e06', '1bca0e944a4faf170db8279341

In [52]:
station_ids_list = sorted(station_ids_list)
print(station_ids_list)

['00fa94ad698dc4a9e4d708d6fd32f294', '012d3e06901cc222b1c2cf0a2ace3a29', '029a505bd4422a1afd127987757f71a6', '0438114d2e6b96118de69bc9775bb21e', '0459b7e93703980b853cd65a9dc60596', '04f64fde5ba1ebde8ad78c74914adb34', '06487275cd19694152824c7c3088e8ff', '064a7b39b1dbc4bfc4cf8f454e360c57', '066ecc3584913fa035ec1417323beb80', '06bdc4d55f34b2a491305534fd5defda', '07b13214cb69f2ec007d888450f3d6bc', '085a0e120a08a80a58c0ae0ff9da565a', '08c7a90bf297fde8c1f1156db13987d4', '0a43242bee10cd33f539af2c1bf5ecbf', '0b543fc4e694fe07a54dac48bb1b3390', '0c42f45e4a14957ad4a6d521d0ba8bc3', '0c9ad3e3703e5869b82619ecdf54f43e', '0cee06d7d10f7d3162cea03d22b7beed', '0cf6cd1809fa7f510812f5b052f578c4', '0d1b5c62f8d22e3650811df55596e25b', '0e0aef6fabaa808b3a3cef24657257a4', '13021bba7bbeca1ccd8726ca17da59e6', '1648ead5ea617d2cea115869d9516f5e', '1675a77b9af656faf527b4e6de53e63b', '175a074bf65455d1633639511dc129c8', '17a3320a52bcc62161908967f8a06613', '198810b9b0af5d229d1b6fa12c620e06', '1bca0e944a4faf170db8279341

In [53]:
partition_1 = station_ids_list[:122]
partition_2 = station_ids_list[122:]

In [54]:
len(partition_1)

122

In [55]:
len(partition_2)

123

In [56]:
print(partition_1)

['00fa94ad698dc4a9e4d708d6fd32f294', '012d3e06901cc222b1c2cf0a2ace3a29', '029a505bd4422a1afd127987757f71a6', '0438114d2e6b96118de69bc9775bb21e', '0459b7e93703980b853cd65a9dc60596', '04f64fde5ba1ebde8ad78c74914adb34', '06487275cd19694152824c7c3088e8ff', '064a7b39b1dbc4bfc4cf8f454e360c57', '066ecc3584913fa035ec1417323beb80', '06bdc4d55f34b2a491305534fd5defda', '07b13214cb69f2ec007d888450f3d6bc', '085a0e120a08a80a58c0ae0ff9da565a', '08c7a90bf297fde8c1f1156db13987d4', '0a43242bee10cd33f539af2c1bf5ecbf', '0b543fc4e694fe07a54dac48bb1b3390', '0c42f45e4a14957ad4a6d521d0ba8bc3', '0c9ad3e3703e5869b82619ecdf54f43e', '0cee06d7d10f7d3162cea03d22b7beed', '0cf6cd1809fa7f510812f5b052f578c4', '0d1b5c62f8d22e3650811df55596e25b', '0e0aef6fabaa808b3a3cef24657257a4', '13021bba7bbeca1ccd8726ca17da59e6', '1648ead5ea617d2cea115869d9516f5e', '1675a77b9af656faf527b4e6de53e63b', '175a074bf65455d1633639511dc129c8', '17a3320a52bcc62161908967f8a06613', '198810b9b0af5d229d1b6fa12c620e06', '1bca0e944a4faf170db8279341

In [82]:
rolling_df['category_id'].value_counts()

category_id
hotels                               1144
parks                                 599
museums                               116
hotels|venues                         111
hostels                               107
hotels|bedbreakfast                    73
dog_parks                              62
parks|playgrounds                      48
beaches                                22
resorts|hotels|vacation_rentals        22
gardens|parks                          20
artmuseums                             19
museums|galleries                      19
catering|hotels                        18
landmarks|parks                        17
casinos|hotels|venues                  15
playgrounds|dog_parks                  15
beaches|parks                          15
hotels|divebars                        12
hiking|mountainbiking|bikerentals      12
waterparks|parks|playgrounds           11
parks|theater                          10
museums|galleries|venues                9
skate_parks           

In [83]:
rolling_df.shape
rolling_df.head(50)

Unnamed: 0,station_id,place_id,name,distance,address,city,postal,review_count,rating,category_id,category_name,query_categories
0,a74744ce4bb7ea2aa9f406ac8bff95d8,EByyWFFnSnrmQZct_j6MBg,New Brighton Park,948.481468,3201 New Brighton Road,Vancouver,V5K 5J7,16.0,4.5,beaches|parks,Beaches|Parks,"beaches,parks,bicyclepaths,mountainbiking"
1,a74744ce4bb7ea2aa9f406ac8bff95d8,_BkYJw9plA2tYLoToa9mGQ,Il Giardino Italiano,78.53177,2901 E Hastings St,Vancouver,V5K 5J1,2.0,4.0,parks,Parks,"beaches,parks,bicyclepaths,mountainbiking"
2,a74744ce4bb7ea2aa9f406ac8bff95d8,_UWGRNA9Jkbt72_3NXpNcw,Hastings Park,291.862881,2901 E Hastings Street,Vancouver,V5K 5J1,9.0,4.0,parks,Parks,"beaches,parks,bicyclepaths,mountainbiking"
3,a74744ce4bb7ea2aa9f406ac8bff95d8,d4PXZQGnXuCE7akD7PQ_TA,Rupert Park Pitch & Putt,959.110609,3402 Charles Street,Vancouver,V5K 5H9,9.0,4.0,parks|golf,Parks|Golf,"beaches,parks,bicyclepaths,mountainbiking"
4,a74744ce4bb7ea2aa9f406ac8bff95d8,LBVCFvtVtDwzOc2R0EqwfA,Adanac Park,875.246088,1025 Boundary Road,Vancouver,V5K 4T2,1.0,3.0,parks,Parks,"beaches,parks,bicyclepaths,mountainbiking"
5,a74744ce4bb7ea2aa9f406ac8bff95d8,62f1QlwgVZr3pFHdjS542A,Callister Park,894.985004,2875 Oxford Street,Vancouver,V5K 1N6,1.0,3.0,parks,Parks,"beaches,parks,bicyclepaths,mountainbiking"
6,a74744ce4bb7ea2aa9f406ac8bff95d8,w_JY6JHlUtDw09puGdEPxw,Atrium Inn,679.261964,2889 East Hastings Street,Vancouver,V5K 2A1,58.0,3.0,hotels|venues,Hotels|Venues & Event Spaces,"hostels,hotels"
7,a9f0b06d07f89b75e92c0cf686223aea,GlYICUHh-vsbdBsdjnRwMg,Harbour Green Park,415.206448,1199 W Cordova Street,Vancouver,V6C 0A1,15.0,4.5,parks,Parks,"beaches,parks,bicyclepaths,mountainbiking"
8,a9f0b06d07f89b75e92c0cf686223aea,-fHqwIU4S-uD9O-ouzhE_A,Rainbow Park,847.369121,872 Richards Street,Vancouver,V6B 3B4,4.0,5.0,parks,Parks,"beaches,parks,bicyclepaths,mountainbiking"
9,a9f0b06d07f89b75e92c0cf686223aea,KiRXFRtSzRbgU_55Nj845Q,Portal Park,428.915352,1099 W Hastings Street,Vancouver,V6E 4E2,1.0,5.0,parks,Parks,"beaches,parks,bicyclepaths,mountainbiking"


In [84]:
#rolling_df.to_csv('../data/yelp_vancouver_partition1.csv', index=False)  # Saved on 2023-10-20 evening for partition_1
#rolling_df.to_csv('../data/yelp_vancouver_partition2.csv', index=False)  # Saved on 2023-10-20 evening for partition_2
rolling_df.to_csv('../data/yelp_vancouver_partition3.csv', index=False)  # Saved on 2023-10-21 for partition_3

In [71]:
rolling_df[rolling_df['name']=='Hotel BLU Vancouver']

Unnamed: 0,station_id,place_id,name,distance,address,city,postal,review_count,rating,category_id,category_name,query_categories
237,0b543fc4e694fe07a54dac48bb1b3390,-xPVpKPBUbdSMaYCSw4ZjA,Hotel BLU Vancouver,743.605664,177 Robson Street,Vancouver,V6B,80.0,4.0,hotels,Hotels,"hostels,hotels"
391,0e0aef6fabaa808b3a3cef24657257a4,-xPVpKPBUbdSMaYCSw4ZjA,Hotel BLU Vancouver,357.640901,177 Robson Street,Vancouver,V6B,80.0,4.0,hotels,Hotels,"hostels,hotels"
468,1648ead5ea617d2cea115869d9516f5e,-xPVpKPBUbdSMaYCSw4ZjA,Hotel BLU Vancouver,615.423757,177 Robson Street,Vancouver,V6B,80.0,4.0,hotels,Hotels,"hostels,hotels"
524,175a074bf65455d1633639511dc129c8,-xPVpKPBUbdSMaYCSw4ZjA,Hotel BLU Vancouver,1066.880096,177 Robson Street,Vancouver,V6B,80.0,4.0,hotels,Hotels,"hostels,hotels"
600,1d4153659a0a0a2089078c739df6737d,-xPVpKPBUbdSMaYCSw4ZjA,Hotel BLU Vancouver,475.06524,177 Robson Street,Vancouver,V6B,80.0,4.0,hotels,Hotels,"hostels,hotels"
790,257715b845828e027ff35dd0f11c971e,-xPVpKPBUbdSMaYCSw4ZjA,Hotel BLU Vancouver,736.510802,177 Robson Street,Vancouver,V6B,80.0,4.0,hotels,Hotels,"hostels,hotels"
883,2ef000bc9d4650d8cc6198c4d570b19f,-xPVpKPBUbdSMaYCSw4ZjA,Hotel BLU Vancouver,92.985699,177 Robson Street,Vancouver,V6B,80.0,4.0,hotels,Hotels,"hostels,hotels"
1000,32603a87cfca71d0f7dfa3513bad69d5,-xPVpKPBUbdSMaYCSw4ZjA,Hotel BLU Vancouver,678.65342,177 Robson Street,Vancouver,V6B,80.0,4.0,hotels,Hotels,"hostels,hotels"
1084,32f70f7ae14ba87523c6286a6c4d5c37,-xPVpKPBUbdSMaYCSw4ZjA,Hotel BLU Vancouver,878.297394,177 Robson Street,Vancouver,V6B,80.0,4.0,hotels,Hotels,"hostels,hotels"
1143,3367c1e1dbf73caf7c10e4c96a92f58d,-xPVpKPBUbdSMaYCSw4ZjA,Hotel BLU Vancouver,733.901255,177 Robson Street,Vancouver,V6B,80.0,4.0,hotels,Hotels,"hostels,hotels"


In [76]:
# Break into partition sizes to maximize usage of Yelp's 500 daily call limit (resets at 6pm Mountain == midnight UTC) and 3 calls per station_id in 245 stations
partition_1_size = 122
partition_2_size = 36

partition_1 = station_ids_list[:partition_1_size]
partition_2 = station_ids_list[partition_1_size: partition_1_size + partition_2_size]
partition_3 = station_ids_list[partition_1_size + partition_2_size:]

print(len(partition_1))
print(len(partition_2))
print(len(partition_3))

total_ids_in_partitions = len(partition_1) + len(partition_2) + len(partition_3)
print(f"total ids in all partitions ({total_ids_in_partitions}) == stations_df.shape[0]: {total_ids_in_partitions == stations_df.shape[0]}")

122
36
87
total ids in all partitions (245) == stations_df.shape[0]: True


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

In [86]:
# Need to pull each of the 3 partitions from 3 .csv files, into a singular dataframe for Yelp
partition1_df = pd.read_csv('../data/yelp_vancouver_partition1.csv')
partition2_df = pd.read_csv('../data/yelp_vancouver_partition1.csv')
partition3_df = pd.read_csv('../data/yelp_vancouver_partition1.csv')

yelp_df = pd.concat([partition1_df, partition2_df, partition3_df], ignore_index=True)

In [87]:
yelp_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9789 entries, 0 to 9788
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   station_id        9789 non-null   object 
 1   place_id          9789 non-null   object 
 2   name              9789 non-null   object 
 3   distance          9789 non-null   float64
 4   address           9720 non-null   object 
 5   city              9789 non-null   object 
 6   postal            9744 non-null   object 
 7   review_count      9789 non-null   float64
 8   rating            9789 non-null   float64
 9   category_id       9789 non-null   object 
 10  category_name     9789 non-null   object 
 11  query_categories  9789 non-null   object 
dtypes: float64(3), object(9)
memory usage: 917.8+ KB


In [88]:
yelp_df.head(10)

Unnamed: 0,station_id,place_id,name,distance,address,city,postal,review_count,rating,category_id,category_name,query_categories
0,00fa94ad698dc4a9e4d708d6fd32f294,kajMc2fkWKdzKJ1M4pm47Q,Stanley Park,978.386841,1166 Stanley Park Drive,Vancouver,V6G,1091.0,5.0,parks,Parks,"beaches,parks,bicyclepaths,mountainbiking"
1,00fa94ad698dc4a9e4d708d6fd32f294,VoziJj_Fw67OtZtdDzrpQg,English Bay Beach Park,783.428693,1700 Beach Avenue,Vancouver,V6E 1V3,68.0,4.5,parks,Parks,"beaches,parks,bicyclepaths,mountainbiking"
2,00fa94ad698dc4a9e4d708d6fd32f294,XHJTdq8QJp6_9oCj5hU85w,Vancouver Seawall,663.404115,,Vancouver,,101.0,5.0,hiking|parks,Hiking|Parks,"beaches,parks,bicyclepaths,mountainbiking"
3,00fa94ad698dc4a9e4d708d6fd32f294,EGZABxCmlA3PNwbSYXhLbA,Morton Park,426.335401,1800 Morton Avenue,Vancouver,V6G 1Z1,14.0,4.5,parks,Parks,"beaches,parks,bicyclepaths,mountainbiking"
4,00fa94ad698dc4a9e4d708d6fd32f294,AVulOVkLG2LIRaOdOAmdlA,Lost Lagoon,328.531508,Lagoon Dr,Vancouver,V6G,18.0,4.5,parks|lakes,Parks|Lakes,"beaches,parks,bicyclepaths,mountainbiking"
5,00fa94ad698dc4a9e4d708d6fd32f294,2CxBAbnFIOfjRASbWcHC4w,Stanley Park 2nd Beach Picnic Area,715.128216,Ceperly 2nd Beach,Vancouver,V6G 3E2,8.0,3.5,beaches,Beaches,"beaches,parks,bicyclepaths,mountainbiking"
6,00fa94ad698dc4a9e4d708d6fd32f294,4563XS_PrPJivPv_R5sW3Q,Alexandra Park,721.708957,1755 Beach avenue,Vancouver,V6E 1V3,1.0,5.0,parks,Parks,"beaches,parks,bicyclepaths,mountainbiking"
7,00fa94ad698dc4a9e4d708d6fd32f294,kRl_c-eObP6vf3KbJ0fulw,Movies in the Park,701.08881,Stanley Park Dr,Vancouver,V6G,6.0,4.5,parks|arts,Parks|Arts & Entertainment,"beaches,parks,bicyclepaths,mountainbiking"
8,00fa94ad698dc4a9e4d708d6fd32f294,42Tg2jf217mRb_rqYpKAbw,Stanley Park Shuffleboard Court Area - Gated O...,395.498272,2000 W Georgia Street,Vancouver,V6G,1.0,3.0,dog_parks,Dog Parks,"beaches,parks,bicyclepaths,mountainbiking"
9,00fa94ad698dc4a9e4d708d6fd32f294,TBcn1EwTCv3EsF4SEI3s4w,Lovers Walk Trail,1287.4175,Lovers Walk,Vancouver,V6G,2.0,5.0,hiking|parks,Hiking|Parks,"beaches,parks,bicyclepaths,mountainbiking"


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

1. Look at the difference in number and granularity of categories (eye-ball):'
Yelp:  https://docs.developer.yelp.com/docs/resources-categories
Foursquare:  https://location.foursquare.com/places/docs/categories

2.  Look at the different types of APIs:
Yelp:
APIs (Yelp Fusion) Overview:  https://docs.developer.yelp.com/docs/fusion-intro
    - Can get Businesses, Reviews, Events, Available Categories, Brands, and Autocomplete (typeahead search service)
    - Out of the above, the "Businesses Search" (https://docs.developer.yelp.com/reference/v3_business_search) seems most useful
        - Limitation:  Won't return any businesses without reviews
        - Accepts lat & long
        - Has Max 50 limit
    - "Businesses Reviews" (https://docs.developer.yelp.com/reference/v3_business_reviews) could be useful, but only returns up to 3 review exercepts and does not return businesses without reviews, which could be limiting.
        -The fact it returns up to 3 reviews will skew our data because businesses that are very popular with more than 3 reviews won't be differentiable in the dataset.
        - It does not seem to return review ratings/numbers so it's not that easy or accurate to easily generate a sentiment, either, for use in a model
    - Events Search (https://docs.developer.yelp.com/reference/v3_events_search) could be useful to see if more events in the immediate station radius would impact how many 
        - TODO: Will need to determine if it gives all events across the year, or only within a certain timeframe around the request time (which is less useful for statistical model)
        - 50 limit may be limiting


Foursquare: 
APIs Overview:  https://location.foursquare.com/developer/reference/api-overview
    - Places API:  https://location.foursquare.com/developer/reference/places-api-overview#endpoints
        - Place Search (https://location.foursquare.com/developer/reference/place-search):  "Search for places in the FSQ Places database using a location and querying by name, category name, telephone number, taste label, or chain name. For example, search for "coffee" to get back a list of recommended coffee shops ... You may pass a location with your request by using one of the following options."
        - Place Details (https://location.foursquare.com/developer/reference/place-details):  "Retrieve comprehensive information and metadata for a FSQ Place using the fsq_id."
        - Place Photos (https://location.foursquare.com/developer/reference/place-photos):  "Retrieve photos for a FSQ Place using the fsq_id."
        - Place Tips (https://location.foursquare.com/developer/reference/place-tips):  "Retrieve tips for a FSQ Place using the fsq_id."
        - Place Match (https://location.foursquare.com/developer/reference/place-match): "Return the Foursquare record of a POI (via FSQ_ID) given a Name and Location. Provide a Location by using all the Address parameters, or by LL."
    - Studio Data API (geospatial assets - not useful to us for this exercise)
    - Geofence API (user-configured geofences - not useful to us for this exercise)

    - The Places API and MAYBE the Place Tips would be the most useful to us.  How many categories available would be next.  Max 50 limit in return string could be a limiting factor.

Get the top 10 restaurants according to their rating