In [290]:
# Import modules
import os
import pandas as pd
import json
import requests
from IPython.display import JSON

# Foursquare

Send a request to Foursquare for all the bike stations in your city of choice within a 1000m radius.

In [239]:
# Load required coordinates
csv = pd.read_csv('sobi_bikes', sep= ',')
cords = csv['coordinates'].values.tolist()

In [240]:
# Create function to input bike station coordinates into Yelp's URL syntax, appending all results to a list for future loop iteration
urls = []

def json_load(list):    
    idx = 0
    for val in list:
        ll = list[idx]
        url = "https://api.foursquare.com/v3/places/search?ll=" + ll + "&radius=1000&categories=13065&fields=name%2Crating%2Ccategories%2Clocation&limit=50"
        urls.append(url)
        idx += 1
        
json_load(cords)

# Verify function output
len(urls)

143

In [241]:
# Create empty DataFrame to store JSON response results
fsq_df = pd.DataFrame()

# Create a function to request and load JSON responses from list of URLs, then normalize response and add entry to DataFrame
def get_food(url):
    fsq_key = os.environ['FSQ_API']
    url = url
    headers = {"accept": "application/json", "Authorization": fsq_key}
    response = requests.get(url, headers=headers)

    poi_json = json.loads(response.text)
    add_df = pd.json_normalize(poi_json['results'])

    global fsq_df
    fsq_df = pd.concat([fsq_df, add_df], ignore_index=True)
    return 

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [242]:
# Use loop to iterate through list of URLs, then input the indexed URL into the function above
for val in urls:
    url = val
    get_food(url)

# Verify function output
fsq_df.shape

(5616, 13)

Put your parsed results into a DataFrame

In [243]:
# Load into DataFrame
fsq_df.head()
edit_fsq = fsq_df.rename(columns={'location.address':'address'})
subset = edit_fsq[['address', 'name', 'rating', 'categories']]

# Display the DataFrame
print(subset.shape)
subset.head()

(5616, 4)


Unnamed: 0,address,name,rating,categories
0,4 Queen St S,Scottish Rite,7.9,"[{'id': 10032, 'name': 'Night Club', 'icon': {..."
1,24 Hess St S,Gown & Gavel,7.5,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix..."
2,112 George St,Smoke's Poutinerie,7.5,"[{'id': 13145, 'name': 'Fast Food Restaurant',..."
3,41 King William St,The Mule,8.7,"[{'id': 13306, 'name': 'Taco Restaurant', 'ico..."
4,193 James St N,Mulberry Street Coffeehouse,8.5,"[{'id': 13034, 'name': 'Café', 'icon': {'prefi..."


In [293]:
# Save the DataFrame as a csv
subset.to_csv('fsq', sep=',')

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [245]:
# Load required coordinates
csv = pd.read_csv('sobi_bikes', sep= ',')
lat = csv['latitude'].values.tolist()
long = csv['longitude'].values.tolist()

In [246]:
# Create function to input bike station coordinates into Yelp's URL syntax, appending all results to a list for future loop iteration
urls = []

def json_load(coord1, coord2):
    index = 0    
    for val in cords:
        lat = coord1[index]
        long = coord2[index]
        url = 'https://api.yelp.com/v3/businesses/search?term=restaurants&latitude=' + str(lat) + '&longitude=' + str(long) + '&radius=1000'
        urls.append(url)
        index += 1

json_load(lat, long)

# Verify function output
len(urls)

143

In [247]:
# Create empty DataFrame to store JSON response results
yelp_df = pd.DataFrame()

# Create a function to request and load JSON responses from list of URLs, then normalize response and add entry to DataFrame
def get_food(url):
    yelp_key = os.environ['YELP_API']
    url = url
    headers = {"accept": "application/json", "Authorization": 'bearer %s' % yelp_key}
    response = requests.get(url, headers=headers)

    poi_json = json.loads(response.text)
    add_df = pd.json_normalize(poi_json['businesses'])

    global yelp_df
    yelp_df = pd.concat([yelp_df, add_df], ignore_index=True)
    return 

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [248]:
# Use loop to iterate through list of URLs, then input the indexed URL into the function above
for val in urls:
    url = val
    get_food(url)

# Verify function output
yelp_df.shape

(2707, 24)

Put your parsed results into a DataFrame

In [284]:
# Load into a DataFrame
yelp_df.head()
edit_yelp = yelp_df.rename(columns={'location.address1':'address', 'coordinates.latitude': 'latitude', 'coordinates.longitude': 'longitude'})
subset = edit_yelp[['address', 'name', 'rating', 'categories', 'latitude', 'longitude']]

# Display the DataFrame
print(subset.shape)
subset.head()

(2707, 6)


Unnamed: 0,address,name,rating,categories,latitude,longitude
0,258 Locke Street S,Earth To Table : Bread Bar,4.0,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",43.25284,-79.88702
1,96 George Street,Electric Diner,4.0,"[{'alias': 'diners', 'title': 'Diners'}, {'ali...",43.25845,-79.878
2,41 King William Street,The Mule,4.0,"[{'alias': 'mexican', 'title': 'Mexican'}]",43.25726,-79.86713
3,31 King William Street,Berkeley North,4.5,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",43.257405,-79.867715
4,43 King William Street,Diplomat,4.0,"[{'alias': 'venues', 'title': 'Venues & Event ...",43.257244,-79.867082


In [292]:
# Save the DataFrame as a csv
subset.to_csv('yelp', sep=',', index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

After requesting data from each API, I believe the Yelp Fusion API provided me with more complete data. This is because, not only was Yelp's documentation very clear, but also formatting of JSON responses were concise, and effective, as well as having efficient restaurant category naming conventions (Chinese, Korean, Italian, etc..). This was especially helpful, since I was trying to capture as much precise and relevant information about restaurants relative to Hamilton's SoBi bike stations as I could. Additionally, Yelp is a company specializing in points of interests that relate to food and drink, making it an easier choice in terms of results and data.

Get the top 10 restaurants according to their rating

In [291]:
# It is possible for multiple restaurants to have the maximum rating (5.0), thus we will add another category to sort by review count
# Thus, we will consider top restaurants to be those with both a high rating, and then those with more reviews

rename = yelp_df.rename(columns={'location.address1':'address'})
filter = rename[['name', 'address', 'rating', 'review_count']]

groupby = filter.groupby('name').first()
top10 = groupby.sort_values(['rating', 'review_count'], ascending=False).reset_index().head(10)

top10 

Unnamed: 0,name,address,rating,review_count
0,Maipai,631 Barton Street E,5.0,9
1,Famous Recipe,158 King Street W,5.0,7
2,Mancala Monk Board Game Cafe,1229 Cannon Street E,5.0,7
3,193 Bench Kitchen,193 King Street E,5.0,6
4,Bon Temps,61 Young Street,5.0,6
5,Chung Chun Rice Hot Dog,1051 King Street W,5.0,6
6,Tomah,132 Queen Street S,5.0,6
7,Cafe Baffico,1375 King Street E,5.0,5
8,Pita Pit,1056 King Street West,5.0,5
9,Manna Korean Takeout,1050 King Street W,5.0,4
