# Part 2: Connecting to Foursquare and Yelp APIs
---

In [1]:
# imports
import requests
import pandas as pd
import os

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice.
___

In [2]:
FOURSQUARE_KEY = os.getenv('FOURSQUARE_API_KEY')

In [282]:
# Function to make an http request to foursqure for each bike station
def get_poi_fs(latitude, longitude, radius, api_key, categories):    
    url = f'https://api.foursquare.com/v3/places/search?ll={latitude},{longitude}&radius={radius}'

    # Create dictionary for headers
    headers = {"Accept": "application/json"}
    headers['Authorization'] = api_key
    reponse = requests.get(url, headers=headers)
    reponse_data = reponse.json()
    
    return reponse_data['results']

In [234]:
# testing
res = get_poi_fs(latitude=43.259126, longitude=-79.877212, radius=1000, api_key=FOURSQUARE_KEY, categories=dining_cat_ids)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)
___

In [275]:
# to explore the result of a single POI from the testing call
res[0]

{'fsq_id': '579b8d6b498ea8f02d7fc402',
 'categories': [{'id': 13055,
   'name': 'Fried Chicken Joint',
   'short_name': 'Fried Chicken',
   'plural_name': 'Fried Chicken Joints',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/friedchicken_',
    'suffix': '.png'}}],
 'chains': [],
 'closed_bucket': 'VeryLikelyOpen',
 'distance': 22,
 'geocodes': {'main': {'latitude': 43.259153, 'longitude': -79.877446},
  'roof': {'latitude': 43.259153, 'longitude': -79.877446}},
 'link': '/v3/places/579b8d6b498ea8f02d7fc402',
 'location': {'address': '274 King St W',
  'address_extended': '# 272',
  'country': 'CA',
  'cross_street': '',
  'formatted_address': '274 King St W, Hamilton ON L8P 1J6',
  'locality': 'Hamilton',
  'postcode': 'L8P 1J6',
  'region': 'ON'},
 'name': 'Coop Hamilton',
 'related_places': {},
 'timezone': 'America/Toronto'}

Put your parsed results into a DataFrame
___

In [2]:
# load the df of the bike stations containing the latitude and longitude
bike_stations_load1 = pd.read_csv('../data/bike_stations_hamilton.csv', index_col=0)

In [297]:
# Create an empty dataframe
foursquare_df = pd.DataFrame()

In [298]:
# Loop through the bike_stations df to extract the lat/long/id of each row and make a new http request
for index, row in bike_stations_load1.iterrows():
     # Make the http request using row attributes
    poi_fs = get_poi_fs(latitude=row['latitude'], longitude=row['longitude'], radius=1000, api_key=FOURSQUARE_KEY, categories=None)
    # Turn the response into a dataframe
    new_df = pd.json_normalize(poi_fs, record_path=['categories'], meta=['fsq_id', 'name', 'distance', ['location', 'address']], meta_prefix='poi_', errors='ignore')
    # Add a column with the corresponding bike station id
    new_df['station_id'] = row['id'] 
    # Add the new dataframe to yelp_df
    foursquare_df = pd.concat([foursquare_df, new_df])

In [300]:
# Drop unnecessary columns from dataframe
columns_to_drop_foursquare = ['short_name', 'plural_name', 'icon.prefix', 'icon.suffix']

foursquare_df = foursquare_df.drop(columns=columns_to_drop_foursquare)

# Rename columns
foursquare_df = foursquare_df.rename(columns={"id": "category_id", "name": "category_name", 'poi_location.address': 'poi_address'})

In [304]:
# Reorder columns
foursquare_df = foursquare_df[['poi_fsq_id', 'poi_name', 'category_id', 'category_name', 'poi_distance', 'poi_address', 'station_id']]

In [306]:
print(foursquare_df.shape, '\n')
print(foursquare_df.info())

(2343, 7) 

<class 'pandas.core.frame.DataFrame'>
Index: 2343 entries, 0 to 15
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   poi_fsq_id     2343 non-null   object
 1   poi_name       2343 non-null   object
 2   category_id    2343 non-null   int64 
 3   category_name  2343 non-null   object
 4   poi_distance   2343 non-null   object
 5   poi_address    2298 non-null   object
 6   station_id     2343 non-null   object
dtypes: int64(1), object(6)
memory usage: 146.4+ KB
None


In [307]:
foursquare_df.head()

Unnamed: 0,poi_fsq_id,poi_name,category_id,category_name,poi_distance,poi_address,station_id
0,579b8d6b498ea8f02d7fc402,Coop Hamilton,13055,Fried Chicken Joint,22,274 King St W,45dbb0009135e465f49f054517cbe74d
1,5796abdc498eb9b6b1d59458,Coop Wicked Chicken Hamilton,13055,Fried Chicken Joint,19,274 King St W,45dbb0009135e465f49f054517cbe74d
2,4c57494d30d82d7fd583d962,La Luna Restaurant Downtown,13298,Lebanese Restaurant,111,306 King St W,45dbb0009135e465f49f054517cbe74d
3,4c57494d30d82d7fd583d962,La Luna Restaurant Downtown,13302,Mediterranean Restaurant,111,306 King St W,45dbb0009135e465f49f054517cbe74d
4,4c57494d30d82d7fd583d962,La Luna Restaurant Downtown,13303,Mexican Restaurant,111,306 King St W,45dbb0009135e465f49f054517cbe74d


In [308]:
#save data to use in another notebook 
foursquare_df.to_csv('../data/bike_stations_hamilton_poi_foursquare.csv')

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice.
___

In [30]:
YELP_KEY = os.getenv('YELP_API_KEY')

In [31]:
# Create list with desired categories from Yelp
dining_cat_yelp = ['restaurants', 'bars', 'pubs']

In [163]:
# Function to make an http request to yelp for each bike station
def get_poi_yelp(latitude, longitude, radius, api_key, categories):
    url = f'https://api.yelp.com/v3/businesses/search?latitude={latitude}&longitude={longitude}&radius={radius}&categories={categories}&sort_by=best_match'
    
    # Create dictionary for headers
    headers = {
        'Authorization': 'Bearer %s' % api_key,
        'accept': 'application/json'
    }
    
    reponse = requests.get(url, headers=headers)
    reponse_data = reponse.json()  

    return reponse_data['businesses']

In [51]:
# testing
res_2 = get_poi_yelp(latitude=43.259126, longitude=-79.877212, radius=1000, api_key=YELP_KEY, categories=','.join(dining_cat_yelp))

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)
___

In [56]:
# to explore the result of a single POI from the testing call
print(res_2[0].keys(), '\n')
res_2[0]

dict_keys(['id', 'alias', 'name', 'image_url', 'is_closed', 'url', 'review_count', 'categories', 'rating', 'coordinates', 'transactions', 'price', 'location', 'phone', 'display_phone', 'distance']) 



{'id': 'vqyK2q3zJ74TIT1-7Bf3Tg',
 'alias': 'la-luna-hamilton',
 'name': 'La Luna',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/OBRpyj_wUqmDwKLtKU4pCA/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/la-luna-hamilton?adjust_creative=gFi5fjmEU0QolB5fod8Nww&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=gFi5fjmEU0QolB5fod8Nww',
 'review_count': 65,
 'categories': [{'alias': 'mideastern', 'title': 'Middle Eastern'}],
 'rating': 4.0,
 'coordinates': {'latitude': 43.25942165323912,
  'longitude': -79.87848757635106},
 'transactions': [],
 'price': '$$',
 'location': {'address1': '306 King Street W',
  'address2': '',
  'address3': '',
  'city': 'Hamilton',
  'zip_code': 'L8P 1B1',
  'country': 'CA',
  'state': 'ON',
  'display_address': ['306 King Street W', 'Hamilton, ON L8P 1B1', 'Canada']},
 'phone': '+19055770233',
 'display_phone': '+1 905-577-0233',
 'distance': 108.39989911183157}

Put your parsed results into a DataFrame
___

In [4]:
# Load the df of the bike stations containing the latitude and longitude
bike_stations_load2 = pd.read_csv('../data/bike_stations_hamilton.csv', index_col=0)

In [225]:
# Create an empty dataframe
yelp_df = pd.DataFrame()

In [226]:
# Loop through the bike_stations df to extract the lat/long/id of each row and make a new http request
for index, row in bike_stations_load2.iterrows():
     # Make the http request using row attributes
    poi_yelp = get_poi_yelp(latitude=row['latitude'], longitude=row['longitude'], radius=1000, api_key=YELP_KEY, categories=','.join(dining_cat_yelp))
    # Turn the response into a dataframe
    new_df = pd.json_normalize(poi_yelp)
    # Add a column with the corresponding bike station id
    new_df['station_id'] = row['id'] 
    # Add the new dataframe to yelp_df
    yelp_df = pd.concat([yelp_df, new_df])  

In [227]:
# Drop unnecessary columns from dataframe
columns_to_drop_yelp = [
    'alias', 'image_url', 'is_closed', 'url', 'transactions', 'phone',
    'display_phone', 'coordinates.latitude',
    'coordinates.longitude', 'location.address1', 'location.address2',
    'location.address3', 'location.city', 'location.zip_code',
    'location.country', 'location.state','categories']

yelp_df = yelp_df.drop(columns=columns_to_drop_yelp)

# Rename columns
yelp_df = yelp_df.rename(columns={"id": "poi_id", "location.display_address": "full_address"})

In [228]:
print(yelp_df.shape, '\n')
print(yelp_df.info())

(2768, 8) 

<class 'pandas.core.frame.DataFrame'>
Index: 2768 entries, 0 to 19
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   poi_id        2768 non-null   object 
 1   name          2768 non-null   object 
 2   review_count  2768 non-null   float64
 3   rating        2768 non-null   float64
 4   price         1700 non-null   object 
 5   distance      2768 non-null   float64
 6   full_address  2768 non-null   object 
 7   station_id    2768 non-null   object 
dtypes: float64(3), object(5)
memory usage: 194.6+ KB
None


In [229]:
yelp_df.head()

Unnamed: 0,poi_id,name,review_count,rating,price,distance,full_address,station_id
0,vqyK2q3zJ74TIT1-7Bf3Tg,La Luna,65.0,4.0,$$,108.42455,"[306 King Street W, Hamilton, ON L8P 1B1, Canada]",45dbb0009135e465f49f054517cbe74d
1,bHecMQ85o3ayw1t9hRA90g,The French,98.0,4.0,$$$,814.105154,"[37 King William Street, Hamilton, ON L8R 1A1,...",45dbb0009135e465f49f054517cbe74d
2,Q4oLgsU62VPR28pBm0vCXw,Earth To Table : Bread Bar,293.0,4.0,$$,1052.141521,"[258 Locke Street S, Hamilton, ON L8P 4B9, Can...",45dbb0009135e465f49f054517cbe74d
3,9oIPWjU3DvtSdORv6I2toQ,The Ship,209.0,4.0,$$,970.855528,"[23 Augusta Street, Hamilton, ON L8N 1P6, Canada]",45dbb0009135e465f49f054517cbe74d
4,752Fv2jKafftvoS3Twkqyg,Hambrgr,202.0,4.0,$$,858.672096,"[49 King William Street, Hamilton, ON L8R 1A2,...",45dbb0009135e465f49f054517cbe74d


In [230]:
#save data to use in another notebook 
yelp_df.to_csv('../data/bike_stations_hamilton_poi_yelp.csv')

# Comparing Results

Which API provided you with more complete data? Provide an explanation.
___

As seen in the samples below from both data frames, the Yelp API provides better details such as review count, rating, and price from a single http request based on latitude and longitude.
If we wanted to retrieve similar information from Foursquare, we would need to first make an http request based on latitude and longitude to get all the POI and then make another http request for every single POI for further details.
For this reason, the Yelp API is better. We get more detailed results with fewer steps.

In [351]:
print('Foursquare results')
foursquare_df.head()

Foursquare results


Unnamed: 0,poi_fsq_id,poi_name,category_id,category_name,poi_distance,poi_address,station_id
0,579b8d6b498ea8f02d7fc402,Coop Hamilton,13055,Fried Chicken Joint,22,274 King St W,45dbb0009135e465f49f054517cbe74d
1,5796abdc498eb9b6b1d59458,Coop Wicked Chicken Hamilton,13055,Fried Chicken Joint,19,274 King St W,45dbb0009135e465f49f054517cbe74d
2,4c57494d30d82d7fd583d962,La Luna Restaurant Downtown,13298,Lebanese Restaurant,111,306 King St W,45dbb0009135e465f49f054517cbe74d
3,4c57494d30d82d7fd583d962,La Luna Restaurant Downtown,13302,Mediterranean Restaurant,111,306 King St W,45dbb0009135e465f49f054517cbe74d
4,4c57494d30d82d7fd583d962,La Luna Restaurant Downtown,13303,Mexican Restaurant,111,306 King St W,45dbb0009135e465f49f054517cbe74d


In [350]:
print('Yelp results')
yelp_df.head()

Yelp results


Unnamed: 0,poi_id,name,review_count,rating,price,distance,full_address,station_id
0,vqyK2q3zJ74TIT1-7Bf3Tg,La Luna,65.0,4.0,$$,108.42455,"[306 King Street W, Hamilton, ON L8P 1B1, Canada]",45dbb0009135e465f49f054517cbe74d
1,bHecMQ85o3ayw1t9hRA90g,The French,98.0,4.0,$$$,814.105154,"[37 King William Street, Hamilton, ON L8R 1A1,...",45dbb0009135e465f49f054517cbe74d
2,Q4oLgsU62VPR28pBm0vCXw,Earth To Table : Bread Bar,293.0,4.0,$$,1052.141521,"[258 Locke Street S, Hamilton, ON L8P 4B9, Can...",45dbb0009135e465f49f054517cbe74d
3,9oIPWjU3DvtSdORv6I2toQ,The Ship,209.0,4.0,$$,970.855528,"[23 Augusta Street, Hamilton, ON L8N 1P6, Canada]",45dbb0009135e465f49f054517cbe74d
4,752Fv2jKafftvoS3Twkqyg,Hambrgr,202.0,4.0,$$,858.672096,"[49 King William Street, Hamilton, ON L8R 1A2,...",45dbb0009135e465f49f054517cbe74d


Get the top 10 restaurants according to their rating
___

In [343]:
# Make a copy of the yelp dataframe with poi only
yelp_unique_poi = yelp_df[['poi_id', 'name', 'review_count', 'rating', 'price']].copy()

# Drop duplicates
yelp_unique_poi.drop_duplicates(subset=None, keep='first', inplace=True, ignore_index=False)

In [344]:
# Filter sort by rating and review_count
yelp_unique_poi.sort_values(by=['rating', 'review_count'], ascending=False).head(10)

Unnamed: 0,poi_id,name,review_count,rating,price
17,XqZOIEbNX7mdIg1Cwb1RWw,Famous Recipe,9.0,5.0,
8,ccvY3bOziuqDnBZSAI6CoQ,Bon Temps,7.0,5.0,
4,Wb3Rs6B5y6Z-5Pdi4-j6tQ,Chung Chun Rice Hot Dog,7.0,5.0,
7,VD1YD4oALbOUWUspR4_3fQ,Mancala Monk Board Game Cafe,7.0,5.0,
4,wNHa-Hze5WuGQjFG9aqdFw,Tomah,7.0,5.0,
6,AhbQMW5iC9jhXNc7gYyH5w,Pita Pit,6.0,5.0,$
13,GL_qt7r-Cj078pGDbBpToA,Rustic Reuben,6.0,5.0,
10,XjV0R0eLX8FiUOIMB5FXFQ,Speedy Subs,6.0,5.0,$
17,c-H6M3ZYh86IsKQarfchUA,193 Bench Kitchen,6.0,5.0,
13,cJG7xTMdpy-Y4Lr58hzuyw,Hanma Japanese Foods,4.0,5.0,
