In [324]:
# imports
import requests 
import os 
import json
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [235]:
# get data from city bike api
df = pd.read_csv('station_info.tsv', sep='\t')
df.head()

Unnamed: 0.1,Unnamed: 0,city,station_name,empty_slots,slots,free_bikes,ebikes,latitude,longitude,timestamp
0,0,Toronto,Queen St E / Woodward Ave,12,19,5,0,43.665269,-79.319796,2023-06-05T15:38:08.527000Z
1,1,Toronto,Primrose Ave / Davenport Rd,12,15,3,0,43.67142,-79.445947,2023-06-05T15:38:08.527000Z
2,2,Toronto,Queen St. E / Rhodes Ave.,13,23,10,0,43.666224,-79.317693,2023-06-05T15:38:08.528000Z
3,3,Toronto,Bond St / Queen St E,8,25,16,0,43.653236,-79.376716,2023-06-05T15:38:08.533000Z
4,4,Toronto,Church St / Alexander St,12,15,2,0,43.663722,-79.380288,2023-06-05T15:38:08.534000Z


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [233]:
api_key = os.environ["FOURSQUARE_API_KEY"]

In [254]:
results_list = []

def get_foursquare(api_key):
    for index, row in df.iterrows():
        lat = row['latitude']
        lng = row['longitude']
            
        url = "https://api.foursquare.com/v3/places/search?&term=restaurantsblatitude={}&longitude={}&radius=1000&limit=50".format(
                lat,
                lng
            )

        headers = {"Accept": "application/json"}

        headers['Authorization'] = api_key

        response = requests.get(url, headers=headers)

        results_list.append(response.json())


    return print(results_list)



In [None]:
get_foursquare(api_key)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [256]:
 
len(results_list[0]['results'])

50

In [None]:
foursqr_list = []
for results in results_list:
    for result in results['results']:
    
        foursqr_info = {
            'id': result['fsq_id'],
            'latitude' : result['geocodes']['main']['latitude'],
            'longitude' : result['geocodes']['main']['longitude'],
            'address' : result['location']['formatted_address']
        }
        if 'categories' in result and len(result['categories']) != 0 and 'id' in result['categories'][0]:

            foursqr_info['categories'] = result['categories'][0]['id']

        else:

            foursqr_info['categories'] = '' 


        foursqr_list.append(foursqr_info)
    
foursqr_df = pd.DataFrame(foursqr_list)        


Put your parsed results into a DataFrame

In [245]:
foursqr_df.head()

Unnamed: 0,id,latitude,longitude,address,categories
0,4e6d17d71f6e84ca4cab2b7e,43.473657,-80.552997,"Columbia St W, Waterloo ON",16023
1,4f32d38ce4b09fde5f81a25e,43.474919,-80.552671,"Bearinger Rd (West mount), Waterloo ON",16019
2,4c2cf212ae6820a16d281a43,43.47416,-80.552344,"270 Columbia St W (University of Waterloo), Wa...",16000
3,5116a7ede4b07fc30751b131,43.475208,-80.551885,"Frank Tompa Dr (Hagey), Waterloo ON",16030
4,51267601e4b0fa3868af8180,43.476916,-80.552946,"275 Frank Tompa Dr, Waterloo ON N2L 0A1",10000


In [166]:
#foursqr_df.to_csv('foursqur_info.tsv', sep='\t')

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [259]:
api_key = os.environ["YELP_API_KEY"]

In [468]:
yelp_results_list = []

def get_yelp():

        for index, row in df.iterrows():
                lat = row['latitude']
                lng = row['longitude']

                yelp_url = "https://api.yelp.com/v3/businesses/search?latitude={}&longitude={}&radius=1000&limit=20".format(
                        lat,
                        lng
                )

                headers = {
                "accept": "application/json",
                "Authorization": "Bearer"+" "+ yelp_api_key
                }


                yelp_response = requests.get(yelp_url, headers=headers)

                yelp_results_list.append(yelp_response.text)

        return print(yelp_results_list)


In [353]:
len(yelp_results_list)


663

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
# parse text data to json format
temp_list = []
for item in yelp_results_list:

    new_1 = json.loads(item)
    temp_list.append(new_1)

In [464]:

def get_yelp_data():

    for item in yelp_results_list:

        return print(item)



Put your parsed results into a DataFrame

In [459]:
yelp_list = []
for item in temp_list:
    
    if 'businesses' in item:

        for item in item['businesses']:
            
            yelp_info = {
                'id': item['id'],
                'name': item['name'],
                'is_closed': item['is_closed'],
                'latitude': item['coordinates']['latitude'],
                'longitude': item['coordinates']['longitude'],
                'address': item['location']['address1'],
                'rating': item['rating'],
                'categories': item['categories'][0]['title']
            }
            yelp_list.append(yelp_info)

yelp_df = pd.DataFrame(yelp_list)

In [460]:
yelp_df.shape

(9811, 8)

In [463]:
yelp_df.head()

Unnamed: 0,id,name,is_closed,latitude,longitude,address,rating,categories
0,oWTn2IzrprsRkPfULtjZtQ,The Burger's Priest,False,43.6667,-79.315585,1636 Queen Street E,3.5,Burgers
1,2aIgbnGUg8VC0u9iXO-wnQ,O Sushi,False,43.66654,-79.31693,6 Coxwell Avenue,3.5,Sushi Bars
2,yP71_LfUPnGIRqA2g09RoQ,Lake Inez,False,43.67234,-79.32064,1471 Gerrard Street E,4.5,Bars
3,Ohengzi4viLEXqhK50aLxg,Jaclyn's,False,43.66627,-79.31802,1588 Queen Street E,4.5,Caribbean
4,NPHZkn1e-tSJAbo8Zm9rYw,Burrito Bandidos,False,43.666561,-79.316422,1614 Queen Street E,3.5,Mexican


In [462]:
#yelp_df.to_csv('yelp_info.tsv', sep='\t')

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

1. Foursquare API returns with more data (18565 rows) while Yelp API returns with less (9811 rows). However, from a sample size perspective, both of the APIs return with enough data for modelling.

2. In Yelp API, extra information such as close/open status of the location and rating can be accessed which may help for more exploratory analysis.

3. In Foursquare API, each of the category has a unique id so category of location can be easily accessed while method like regular expression may be adopted to get the same results from Yelp API.

Get the top 10 restaurants according to their rating

In [467]:
sorted_yelp_df = yelp_df.sort_values('rating',ascending=False)
sorted_yelp_df.head(10)

Unnamed: 0,id,name,is_closed,latitude,longitude,address,rating,categories
844,BPKtVKuEfubfU2LZFSlf1w,Gurume Sushi,False,43.661391,-79.380995,45 Carlton Street,5.0,Sushi Bars
5180,H5oLlt9G6ifU11n86oP5Fg,COBS Bread,False,43.651403,-79.475677,2204 Bloor Street W,5.0,Bakeries
8184,Ado06qsu31LI6ETUgSJq0Q,Kebab Kountry,False,43.762769,-79.215067,3750 Lawrence Avenue E,5.0,Turkish
8778,jwd7cmkQV0z-saVoOnXdIA,Rani Fast Food,False,43.790339,-79.17421,3600 Ellesmere Road,5.0,Fast Food
8777,d4buwaJL5br7NQbibXtIoA,2045 Spicyspace,False,43.7902,-79.19542,790 Military Trail,5.0,Comfort Food
5636,BPKtVKuEfubfU2LZFSlf1w,Gurume Sushi,False,43.661391,-79.380995,45 Carlton Street,5.0,Sushi Bars
5219,MEH-A03F0tSTrjLnAHh-PQ,The Cafe,False,43.6796,-79.357819,855 Broadview Avenue,5.0,Cafes
3028,8aTClyVNbDbmylGE93WSyw,Tacos Moras,False,43.682495,-79.420669,547 St Clair Avenue W,5.0,Mexican
1124,sg9k_AhqasHCma-_JnGLsw,Palace Pier Park,False,43.63123,-79.472008,,5.0,Parks
8055,17ukE9MpDvz2fEWQ1YKYkA,Pizza Nova,False,43.74684,-79.19933,123 Guildwood Pkwy,5.0,Pizza
