In [4]:
# imports
import requests
import os
import pandas as pd
import json  


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [6]:
# Assign Environment Variables
client_id = os.environ["FOURSQUARE_CLIENT_ID"]
client_secret = os.environ["FOURSQUARE_SECRET_CLIENT_ID"]
api_key = os.environ["FOURSQUARE_API_KEY"]

# Define the endpoint URL
url = "https://api.foursquare.com/v3/places/search"

# Create an empty list to store the results
results_restaurant = []
results_bar = []
results_art = []

# import city_bikes list of latitude and longitude
bike_stations_toronto = pd.read_csv(r'C:\Users\akans\Documents\Kierans_Documents\LHL\Statistical-Modelling-Project\data\bike_stations_toronto.csv')

# Define Headers for the API requests
headers = {
"Accept": "application/json",
"Authorization": api_key
}   

# Iterate through the DataFrame rows
for index, row in bike_stations_toronto.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']

    # Define parameters for the API request for this location query: restaurant
    params_restaurant = {
        'll': f'{latitude},{longitude}',
        'radius': 1000,
        'query': 'restaurant'
    }

    # Get requests for params_restaurant
    response_restaurant = requests.get(url, params=params_restaurant, headers=headers)

    # Check the response status code and add values to results list if exists
    if response_restaurant.status_code == 200:
        try: 
            data_restaurant = response_restaurant.json()
            results_restaurant.append(data_restaurant)
        except json.JSONDecodeError: pass

    # Define parameters for the API request for this location query: bar
    params_bar = {
        'll': f'{latitude},{longitude}',
        'radius': 1000,
        'query': 'bar'
    }

    # Get requests for params_bar
    response_bar = requests.get(url, params=params_bar, headers=headers)

    # Check the response status code and add values to results list if exists
    if response_bar.status_code == 200:
        try: 
            data_bar = response_bar.json()
            results_bar.append(data_bar)
        except json.JSONDecodeError: pass

    # Define parameters for the API request for this location query: performing arts venue
    params_art = {
        'll': f'{latitude},{longitude}',
        'radius': 1000,
        'query': 'Performing Arts Venue'
    }

    # Get requests for params_art
    response_art = requests.get(url, params=params_art, headers=headers)
    
    # Check the response status code and add values to results list if exists
    if response_art.status_code == 200:
        try: 
            data_art = response_art.json()
            results_art.append(data_art)
        except json.JSONDecodeError: pass            



Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)
Put your parsed results into a DataFrame

In [101]:
# Stage an empty dataframe
normalize_results_foursquare_df = pd.DataFrame(columns= ['name','distance', 'latitude', 'longitude', 'api', 'query'])

def normalize_results_foursquare (results_list, query):
  """
  Function to return the query results from foursquare (in list format) from the json hierarchy to a flattened dataframe, 
  retaining the desired values only, 
  and appending them to the normalize_results_foursquare_df
  """
  global normalize_results_foursquare_df

  # Iterate through each of the results of the query
  for indx_all_addresses_at_bike_station in range(len(results_list)):

    # Create dataframe of context information
    results_list_indx_context_df = pd.DataFrame()

    # Extract API request parameters within context
    context_latitude =    results_list[indx_all_addresses_at_bike_station]['context']['geo_bounds']['circle']['center']['latitude']
    context_longitude =   results_list[indx_all_addresses_at_bike_station]['context']['geo_bounds']['circle']['center']['longitude']
    context_radius =      results_list[indx_all_addresses_at_bike_station]['context']['geo_bounds']['circle']['radius']

    for indx_single_address_at_bike_station in range(len(results_list[indx_all_addresses_at_bike_station]['results'])):

      # Create a DataFrame matching the length of the results dictionary
      results_list_indx_dict = {
        'name':             [results_list[indx_all_addresses_at_bike_station]['results'][indx_single_address_at_bike_station]['name']], 
        'distance':         [results_list[indx_all_addresses_at_bike_station]['results'][indx_single_address_at_bike_station]['distance']],
        'latitude':         [context_latitude], 
        'longitude':        [context_longitude], 
        'api':              'foursquare',
        'query':            [query]
      }
      results_list_indx_df = pd.DataFrame(results_list_indx_dict)

      # Append to 
      normalize_results_foursquare_df = normalize_results_foursquare_df.append(results_list_indx_df, ignore_index=True)


In [80]:
# Run the normalized results
normalize_results_foursquare (results_bar, 'bar')
normalize_results_foursquare (results_art, 'art')
normalize_results_foursquare (results_restaurant, 'restaurant')
normalize_results_foursquare_df.to_csv(r'C:\Users\akans\Documents\Kierans_Documents\LHL\Statistical-Modelling-Project\data\results_four.csv')

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [81]:
# Assign Environment Variables
client_id = os.environ["YELP_CLIENT_ID"]
api_key = os.environ["YELP_API_KEY"]

# Define the endpoint URL
url = "https://api.yelp.com/v3/businesses/search"

# Create an empty list to store the results
results_yelp_restaurant = []
results_yelp_bar = []
results_yelp_art = []

# import city_bikes list of latitude and longitude
bike_stations_toronto = pd.read_csv(r'C:\Users\akans\Documents\Kierans_Documents\LHL\Statistical-Modelling-Project\data\bike_stations_toronto.csv')

# Define Headers for the API requests
headers = {
    'Authorization': f'Bearer {api_key}',
}


# Iterate through the DataFrame rows
for index, row in bike_stations_toronto.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']

    # Define parameters for the API request for this location query: restaurant
    params_restaurant = {
        'latitude': latitude,
        'longitude': longitude,
        'radius': 1000,
        'term': 'restaurant'
    }

    # Get requests for params_restaurant
    response_yelp_restaurant = requests.get(url, params=params_restaurant, headers=headers)

    # Check the response status code and add values to results list if exists
    if response_yelp_restaurant.status_code == 200:
        try: 
            data_restaurant = response_yelp_restaurant.json()
            results_yelp_restaurant.append(data_restaurant)
        except json.JSONDecodeError: pass

    # Define parameters for the API request for this location query: bar
    params_bar = {
        'latitude': latitude,
        'longitude': longitude,
        'radius': 1000,
        'term': 'bar'
    }

    # Get requests for params_bar
    response_yelp_bar = requests.get(url, params=params_bar, headers=headers)

    # Check the response status code and add values to results list if exists
    if response_yelp_bar.status_code == 200:
        try: 
            data_bar = response_yelp_bar.json()
            results_yelp_bar.append(data_bar)
        except json.JSONDecodeError: pass

    # Define parameters for the API request for this location query: performing arts venue
    results_yelp_art = {
        'latitude': latitude,
        'longitude': longitude,
        'radius': 1000,
        'term': 'Performing Arts Venue'
    }

    # Get requests for params_art
    response_yelp_art = requests.get(url, params=params_art, headers=headers)
    
    # Check the response status code and add values to results list if exists
    if response_yelp_art.status_code == 200:
        try: 
            data_art = response_yelp_art.json()
            results_yelp_art.append(data_art)
        except json.JSONDecodeError: pass            

Ran out of requests for APIs from yelp.  Could only retrieve 500 from the "restaurant" request.

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [112]:
normalize_results_yelp_df = pd.DataFrame(columns= ['name','rating','review_count',
  'distance', 'latitude', 'longitude', 'total','api', 'query']
)

def normalize_results_yelp (results_list, query):
  """
  Function to return the query results from yelp (in list format) from the json hierarchy to a flattened dataframe, 
  retaining the desired values only, 
  and appending them to the normalize_results_yelp_df
  """
  global normalize_results_yelp_df

  # Iterate through each of the results of the query
  for indx_all_addresses_at_bike_station in range(len(results_list)):

    # Extract API request parameters within region
    region_latitude =   results_list[indx_all_addresses_at_bike_station]['region']['center']['latitude']
    region_longitude =  results_list[indx_all_addresses_at_bike_station]['region']['center']['longitude']
    total_businesses =  results_list[indx_all_addresses_at_bike_station]['total']

    for indx_single_address_at_bike_station in range(len(results_list[indx_all_addresses_at_bike_station]['businesses'])):

      # Create a DataFrame matching the length of the results dictionary
      results_list_indx_dict = {
        'name':             [results_list[indx_all_addresses_at_bike_station]['businesses'][indx_single_address_at_bike_station]['name']], 
        'rating':           [results_list[indx_all_addresses_at_bike_station]['businesses'][indx_single_address_at_bike_station]['rating']],
        'review_count':     [results_list[indx_all_addresses_at_bike_station]['businesses'][indx_single_address_at_bike_station]['review_count']],
        'distance':         [results_list[indx_all_addresses_at_bike_station]['businesses'][indx_single_address_at_bike_station]['distance']],
        'latitude':         [region_latitude], 
        'longitude':        [region_longitude], 
        'total':            [total_businesses],
        'api':              'yelp',
        'query':            [query]
      }
      results_list_indx_df = pd.DataFrame(results_list_indx_dict)

      # Append to 
      normalize_results_yelp_df = normalize_results_yelp_df.append(results_list_indx_df, ignore_index=True)

Put your parsed results into a DataFrame

In [None]:
# Run the normalized results, would have done the first two below but ran out of API requests
# normalize_results_yelp (results_yelp_bar, 'bar')
# normalize_results_yelp (results_yelp_art, 'art')
normalize_results_yelp (results_yelp_restaurant, 'restaurant')
normalize_results_yelp_df.to_csv(r'C:\Users\akans\Documents\Kierans_Documents\LHL\Statistical-Modelling-Project\data\results_yelp.csv')

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

The Yelp data is preferred.  Not only are there more results per bike station, there are a few more key quantitative variables that can be best used for modelling, such as review_count and rating.

Get the top 10 restaurants according to their rating

In [120]:
# If you are defining the top 10 solely by rating, there are more than 10 with a rating of 5.0.
print(normalize_results_yelp_df.sort_values(by='rating', ascending=False).head(10))

# If you are defining the top restaurants by rating (5.0 rating)
print(normalize_results_yelp_df[normalize_results_yelp_df['rating']==5.0])

                     name  rating review_count    distance   latitude  \
3085    Galata Restaurant     5.0           12  669.434635  43.678401   
1506     Ladybug Wine Bar     5.0            3  638.023590  43.651318   
190     Grandma Loves You     5.0           76  588.720010  43.674991   
2926          Rikki Tikki     5.0           27  471.404303  43.657100   
304         East End Vine     5.0            3   55.437143  43.667214   
3034             Judaline     5.0            5   71.492205  43.677076   
704           Rikki Tikki     5.0           27  843.493091  43.661667   
2316           SeoulChild     5.0            1  671.997547  43.659575   
3069              Papyrus     5.0           52  579.919690  43.678401   
3196  Mapo Pocha Soju Bar     5.0            3  767.219603  43.661705   

      longitude total   api       query  dense_rank  
3085 -79.346289   169  yelp  restaurant         1.0  
1506 -79.360230   194  yelp  restaurant         1.0  
190  -79.396273   300  yelp  resta