In [1]:
# Import necessary libraries
import os # use this to access your environment variables
import requests # use this to call the APIs
import pandas as pd # use this to create dataframe
import json # use this to save API output to JSON file

In [3]:
# Import API Keys
foursquare_api_key = os.environ['FOURSQUARE_API_KEY']
yelp_api_key = os.environ['YELP_API_KEY']

In [3]:
# Import csv file with city bikes data 
city_bikes_data = pd.read_csv('/Users/kthan/Desktop/LHL-Python/Python-Project/data/city_bikes_data.csv')
city_bikes_data.head(5)

Unnamed: 0,Station Name,Latitude,Longitude,Number of Bikes
0,Queen St E / Woodward Ave,43.665269,-79.319796,7
1,Primrose Ave / Davenport Rd,43.67142,-79.445947,13
2,Queen St. E / Rhodes Ave.,43.666224,-79.317693,14
3,Bond St / Queen St E,43.653236,-79.376716,0
4,Church St / Alexander St,43.663722,-79.380288,12


In [5]:
# Put Latitude and Longitude for each bike station into a separate list. 
# The list will be used when calling Foursquare and Yelp APIs
coordinates = city_bikes_data[['Latitude', 'Longitude']].values.tolist()

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [7]:
# Create dictionary for headers
headers = {
    'Accept': 'application/json',
    'Authorization': foursquare_api_key
}

# Create an empty list to store all URLs needed to make Foursquare API calls
urls_list_foursquare = []

# Create an empty list to store data from Foursquare API
data_foursquare = []

# The function below creates a list of urls to loop over when making the API call.
# The output retains the corresponding bike station's lat/lon which will make joining the datasets easier.
for i in range(len(coordinates)):
    lat = (coordinates[i][0])
    lon = (coordinates[i][1])
    url = 'https://api.foursquare.com/v3/places/search?ll='+str(lat)+"%2C"+str(lon)+'&radius=1000&categories=13065&limit=50' 
    response_foursquare = requests.get(url=url, headers=headers)
    data_dict_foursquare = response_foursquare.json()
    data_dict_foursquare['bike_station_lat'] = lat
    data_dict_foursquare['bike_station_lon'] = lon
    data_foursquare.append(data_dict_foursquare)
    urls_list_foursquare.append(url)
# Note: the output is not displayed in order to keep notebook clean

In [45]:
# Save API call raw data into a JSON file and store it in the project's data folder
with open('data/data_foursquare.json', 'w') as data_foursquare_json:
    json.dump(data_foursquare, data_foursquare_json)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [8]:
# Create empty lists to store parsed data prior to creating a dataframe
restaurant_name_foursquare = [] 
category_name_foursquare = []
latitude_foursquare = []
longitude_foursquare = []
distance_foursquare = []
bike_station_lat = []
bike_station_lon = []

# The function below parses the data into the empty lists above
for i in data_foursquare:
    for each_restaurant in i['results']:
       restaurant_name_foursquare.append(each_restaurant['name'])
       category_name_foursquare.append(each_restaurant['categories'][0]['name'])
       latitude_foursquare.append(each_restaurant['geocodes']['main']['latitude'])
       longitude_foursquare.append(each_restaurant['geocodes']['main']['longitude'])
       distance_foursquare.append(each_restaurant['distance'])
       bike_station_lat.append(i['bike_station_lat'])
       bike_station_lon.append(i['bike_station_lon'])

Put your parsed results into a DataFrame

In [9]:
# Create a dictionary for the dataframe using the lists created for each variable above
foursquare_data_dict = {
    'Restaurant Name': restaurant_name_foursquare,
    'Category Name': category_name_foursquare,
    'Latitude': latitude_foursquare, 
    'Longitude': longitude_foursquare, 
    'Distance': distance_foursquare,
    'Bike Station Lat': bike_station_lat,
    'Bike Station Lon': bike_station_lon
    }
# Create the Foursquare dataframe 
foursquare_df = pd.DataFrame(foursquare_data_dict)
foursquare_df.head(5)

Unnamed: 0,Restaurant Name,Category Name,Latitude,Longitude,Distance,Bike Station Lat,Bike Station Lon
0,Rorschach Brewing,Brewery,43.663372,-79.319902,194,43.665269,-79.319796
1,The Sidekick,Coffee Shop,43.664574,-79.325048,440,43.665269,-79.319796
2,Chino Locos,Chinese Restaurant,43.664482,-79.325548,470,43.665269,-79.319796
3,Hasting Snack Bar,Lounge,43.663791,-79.328899,759,43.665269,-79.319796
4,Udupi Palace,Indian Restaurant,43.672665,-79.321041,810,43.665269,-79.319796


In [None]:
# Save dataframe as an excel file and store it in the project's data folder
foursquare_df.to_csv('data/foursquare_data.csv', index=False)

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [10]:
# Create dictionary for headers
headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {yelp_api_key}"
}

# Create an empty list to store all URLs needed to make Yelp API calls
urls_list_yelp = []

# Create empty list to store data from Yelp API
data_yelp = []

# The function below creates a list of urls to loop over when making the API call.
# The output retains the corresponding bike station's lat/lon which will make joining the datasets easier.
for y in range(len(coordinates)):
    lat = (coordinates[y][0])
    lon = (coordinates[y][1])
    url = 'https://api.yelp.com/v3/businesses/search?latitude='+str(lat)+"&longitude="+str(lon)+'&term=restaurants&radius=1000&limit=50'
    response_yelp = requests.get(url=url, headers=headers)
    data_dict_yelp = response_yelp.json()
    data_dict_yelp['bike_station_lat'] = lat
    data_dict_yelp['bike_station_lon'] = lon
    data_yelp.append(data_dict_yelp)
    urls_list_yelp.append(url)
# Note: the output is not displayed in order to keep notebook clean

In [45]:
# Save API call raw data into a JSON file and store it in the project's data folder
with open('data/data_yelp.json', 'w') as data_yelp_json:
    json.dump(data_yelp, data_yelp_json)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [11]:
# Create empty lists to store parsed data prior to creating a dataframe
restaurant_name_yelp = [] 
category_name_yelp = []
latitude_yelp = []
longitude_yelp = []
distance_yelp = []
ratings_yelp = []
number_of_reviews_yelp = []
price_yelp = []
bike_station_lat = []
bike_station_lon = []

# The function below parses the data into the empty lists above
# Note: the try/except function catches instances where price data is missing and returns "no data"
for i in data_yelp:
    for each_restaurant in i['businesses']:
        restaurant_name_yelp.append(each_restaurant['name'])
        category_name_yelp.append(each_restaurant['categories'][0]['alias'])
        latitude_yelp.append(each_restaurant['coordinates']['latitude'])
        longitude_yelp.append(each_restaurant['coordinates']['longitude'])
        distance_yelp.append(each_restaurant["distance"])
        ratings_yelp.append(each_restaurant['rating'])
        number_of_reviews_yelp.append(each_restaurant['review_count'])
        try:
            price_yelp.append(each_restaurant['price'])
        except KeyError:
            price_yelp.append('no data')
        bike_station_lat.append(i['bike_station_lat'])
        bike_station_lon.append(i['bike_station_lon'])

In [12]:
# Create a dictionary for the dataframe using the lists created for each variable above
yelp_data_dict = {
    'Restaurant Name': restaurant_name_yelp,
    'Category Name': category_name_yelp,
    'Latitude': latitude_yelp, 
    'Longitude': longitude_yelp, 
    'Distance': distance_yelp,
    'Rating': ratings_yelp,
    'Number of Reviews': number_of_reviews_yelp,
    'Price Point': price_yelp,
    'Bike Station Lat': bike_station_lat,
    'Bike Station Lon': bike_station_lon
    }
# Create the Yelp dataframe 
yelp_df = pd.DataFrame(yelp_data_dict)
yelp_df.head(5)

Unnamed: 0,Restaurant Name,Category Name,Latitude,Longitude,Distance,Rating,Number of Reviews,Price Point,Bike Station Lat,Bike Station Lon
0,Jaclyn's,caribbean,43.66627,-79.31802,172.375134,4.5,16,no data,43.665269,-79.319796
1,Casa Di Giorgios,italian,43.66685,-79.31518,408.715724,4.0,75,$$,43.665269,-79.319796
2,Tulia Osteria,pizza,43.66487,-79.32425,365.115394,5.0,1,no data,43.665269,-79.319796
3,Lake Inez,bars,43.67234,-79.32064,794.661955,4.5,94,$$$,43.665269,-79.319796
4,The Sidekick,cafes,43.664589,-79.325111,436.003295,4.5,31,$,43.665269,-79.319796


In [47]:
# Save dataframe as an excel file and store it in the project's data folder
yelp_df.to_csv('data/yelp_data.csv', index=False)

# Comparing Results

**Which API provided you with more complete data? Provide an explanation.** <br>
When comparing both APIs, Yelp returned more detailed characteristics than Foursquare. For example, Yelp provides the restaurantâ€™s rating, number of reviews and price point which allows one to make a more informed decision when selecting a restaurant. <br>

However, it is worth mentioning that when there's multiple "optional" data points within an API result, there's a risk that there's no data available. For example, not all restaurants from the Yelp API had a price listed.

**Get the top 10 restaurants according to their rating** <br>
*Note: Foursquare does not return rating data so, the task will be completed using Yelp API data.*

In [6]:
# Load Yelp dataframe csv into notebook
yelp_dataframe = pd.read_csv('/Users/kthan/Desktop/LHL-Python/Python-Project/data/yelp_data.csv')
yelp_dataframe.head(2)

Unnamed: 0,Restaurant Name,Category Name,Latitude,Longitude,Distance,Rating,Number of Reviews,Price Point,Bike Station Lat,Bike Station Lon
0,Jaclyn's,caribbean,43.66627,-79.31802,172.375134,4.5,16,no data,43.665269,-79.319796
1,Lake Inez,bars,43.67234,-79.32064,794.661955,4.5,94,$$$,43.665269,-79.319796


In [7]:
# Create a new variable to showcase the top 10 restaurants by sorting the Yelp dataframe by the column 'Rating'.
yelp_top10 = yelp_dataframe.groupby(['Restaurant Name']).mean('Rating').sort_values(['Rating'], ascending=False).head(10)
yelp_top10

Unnamed: 0_level_0,Latitude,Longitude,Distance,Rating,Number of Reviews,Bike Station Lat,Bike Station Lon
Restaurant Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Shadi Shawarma,43.65102,-79.47575,868.66753,5.0,2.0,43.652615,-79.475123
Rani Fast Food,43.790339,-79.17421,1099.232821,5.0,1.0,43.784242,-79.184989
Stuu Stuu,43.666074,-79.380932,501.246381,5.0,2.0,43.667153,-79.379424
Stray,43.655947,-79.411008,296.564706,5.0,2.0,43.656296,-79.414663
Recette,43.64304,-79.42536,384.088133,5.0,1.0,43.641461,-79.42655
Hashiya,43.6604,-79.37896,92.098472,5.0,1.0,43.661205,-79.378981
Dumpling Dumpling,43.64971,-79.42023,198.220694,5.0,3.0,43.651111,-79.421389
The Smith House,43.658894,-79.393786,298.720909,5.0,1.0,43.659152,-79.393706
Casamiento,43.670582,-79.426069,743.094513,5.0,28.0,43.672282,-79.425591
The Stockyards Grind,43.66986,-79.48069,629.547871,5.0,4.0,43.666228,-79.486568


In [8]:
# To improve results, 'Number of Reviews' was used to further sort the list 
# Now, the output shows the most highly rated and reviewed restaurants
yelp_top10_improved = yelp_dataframe.groupby(['Restaurant Name']).mean('Rating').sort_values(['Rating','Number of Reviews'], ascending = [False, False]).head(10)
yelp_top10_improved

Unnamed: 0_level_0,Latitude,Longitude,Distance,Rating,Number of Reviews,Bike Station Lat,Bike Station Lon
Restaurant Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
New Orleans Seafood & Steakhouse,43.67775,-79.50611,737.234341,5.0,172.0,43.68347,-79.510894
Zeal Burgers,43.700276,-79.517078,230.229184,5.0,137.0,43.698841,-79.519472
Gurume Sushi,43.661391,-79.380995,563.235423,5.0,53.0,43.660399,-79.381672
Papyrus,43.67709,-79.35324,696.098873,5.0,50.0,43.677733,-79.352
Mallo,43.664631,-79.410738,251.180954,5.0,44.0,43.666298,-79.410914
Hawker,43.65632,-79.4024,456.694693,5.0,38.0,43.657491,-79.400701
Haidilao Hot Pot,43.654633,-79.379839,508.836685,5.0,33.0,43.655756,-79.380578
Casamiento,43.670582,-79.426069,743.094513,5.0,28.0,43.672282,-79.425591
Chef Harwash,43.65179,-79.40485,375.455662,5.0,26.0,43.652281,-79.405799
Rikki Tikki,43.654081,-79.401486,785.194592,5.0,25.0,43.656424,-79.397131
