In [1]:
# imports
import os
import requests
from IPython.display import JSON
import pandas as pd

In [2]:
# retrieve bike station data from CityBikes
citybikes_df = pd.read_csv('../data/citybikes.csv')
citybikes_df

# extract latitude/longitude for each bike station
num_stations = citybikes_df.shape[0]
lat_lons = []
for i in range(num_stations):
    lat = citybikes_df.loc[i,'latitude']
    lon = citybikes_df.loc[i,'longitude']
    lat_lons.append(str(lat) + ',' + str(lon))
lat_lons[:5]

['-36.853235,174.770765',
 '-36.854561,174.763019',
 '-36.853544,174.757354',
 '-36.846182,174.76142',
 '-36.84771,174.767026']

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [3]:
# retrieve api key from local environment variables
foursquare_api_key = os.environ["FOURSQUARE_API_KEY"]
# create dictionary for headers
headers = {"Accept": "application/json"}
headers['Authorization'] = foursquare_api_key

# create dictionary for params, with placeholder for latitude/longitude
params = {}
params['ll'] = '0,0'
params['radius'] = '1000'
params['fields'] = 'rating,name,price,location,popularity,verified,distance,geocodes'
params['query'] = 'restaurant'

# set foursquare api url and placeholder for results
url = 'https://api.foursquare.com/v3/places/search'
foursquare_json = []

# query foursquare for each bike station and collect the results
for i in range(num_stations):
    params['ll'] = lat_lons[i]
    result = requests.get(url, params = params, headers = headers)
    result_json = result.json()['results']
    # record the station index for later use
    for poi in result_json:
        poi['station_idx'] = i
    foursquare_json.extend(result_json)
JSON(foursquare_json[:5])

<IPython.core.display.JSON object>

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [4]:
# placeholders for the foursquare parsing loop
details_json = []
details = {}

# parse through the foursquare response for desired details
for poi in foursquare_json:
    address = poi['location']['address']
    
    # price is scale from 1-4, we'll use -1 if data missing
    if 'price' not in poi:
        price = -1
    else:
        price = poi['price']
    
    # rating is scale from 0-10, we'll use -1 if data missing
    if 'rating' not in poi:
        rating = -1
    else:
        rating = poi['rating']
    
    lat = poi['geocodes']['main']['latitude']
    lon = poi['geocodes']['main']['longitude']
    
    details = {
        'name': poi['name'],
        'address': address,
        'rating': rating,
        'price': price,
        'popularity': poi['popularity'],
        'verified': poi['verified'],
        'station_idx': poi['station_idx'],
        'distance': poi['distance'],
        'lat_lon': str(lat) + ',' + str(lon)
        }
    details_json.append(details)
JSON(details_json[:5])

<IPython.core.display.JSON object>

Put your parsed results into a DataFrame

In [5]:
# convert parsed results in pandas DataFrame
foursquare_df = pd.json_normalize(details_json)
foursquare_df.head()

Unnamed: 0,name,address,rating,price,popularity,verified,station_idx,distance,lat_lon
0,Strata Cafe University of Auckland,2 Alfred Street,6.5,1,0.872042,False,0,179,"-36.851812,174.769457"
1,Relax a Lodge,30-38 Princes Street,6.7,1,0.847445,False,0,253,"-36.85209,174.768454"
2,Uni Sushi,30-38 Princes Street,6.3,2,0.782452,False,0,206,"-36.852262,174.768509"
3,Jin Hai Wan Chinese Seafood Restaurant,57-59 Wakefield St,7.9,1,0.862401,False,0,404,"-36.854518,174.765025"
4,Piko Cafe,55 Wellesley Street East,7.3,1,0.952997,True,0,400,"-36.852549,174.766527"


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [6]:
# retrieve api key from local environment variables
yelp_api_key = os.environ["YELP_API_KEY"]
# create dictionary for headers
headers = {"Accept": "application/json"}
headers['Authorization'] = 'Bearer ' + yelp_api_key

# create dictionary for params, with placeholder for latitude/longitude
params = {}
params['latitude'] = '0'
params['longitude'] = '0'
params['radius'] = '1000'
params['limit'] = '10' # limit of 10 to match foursquare
params['term'] = 'restaurant'

# query yelp for each bike station and collect the results
url = 'https://api.yelp.com/v3/businesses/search'
yelp_json = []
for i in range(num_stations):
    ll_split = lat_lons[i].split(',')
    params['latitude'] = ll_split[0]
    params['longitude'] = ll_split[1]
    result = requests.get(url, params = params, headers = headers)
    result_json = result.json()['businesses']
    # record the station index for later use
    for poi in result_json:
        poi['station_idx'] = i
    yelp_json.extend(result_json)
JSON(yelp_json[:5])

<IPython.core.display.JSON object>

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [7]:
# placeholders for the yelp parsing loop
details_json = []
details = {}

# parse through the yelp response for desired details
for poi in yelp_json:
    address = poi['location']['address1']
    
    # price is 1-4 $ symbols, we'll use -1 if data missing
    if 'price' not in poi:
        price = -1
    else:
        price = len(poi['price'])
    
    # rating is scale from 0-5 in 0.5 increments, we'll use -1 if data missing
    if 'rating' not in poi:
        rating = -1
    else:
        rating = poi['rating']
    
    lat = poi['coordinates']['latitude']
    lon = poi['coordinates']['longitude']
    
    details = {
        'name': poi['name'],
        'address': address,
        'rating': rating,
        'price': price,
        'review_count': poi['review_count'],
        'station_idx': poi['station_idx'],
        'distance': poi['distance'],
        'lat_lon': str(lat) + ',' + str(lon)
        }
    details_json.append(details)
JSON(details_json[:5])

<IPython.core.display.JSON object>

Put your parsed results into a DataFrame

In [8]:
# convert parsed results in pandas DataFrame
yelp_df = pd.json_normalize(details_json)
yelp_df.head()

Unnamed: 0,name,address,rating,price,review_count,station_idx,distance,lat_lon
0,Depot,86 Federal St,4.5,3,294,0,873.004288,"-36.8489969,174.7625058"
1,Federal Delicatessen,86 Federal St,4.5,2,201,0,873.004288,"-36.8489969,174.7625058"
2,Woodpecker Hill,196 Parnell Rd,4.5,3,26,0,859.929487,"-36.8546219,174.7801666"
3,Mamak,50 Kitchener St,4.5,1,60,0,612.381651,"-36.848316,174.76767"
4,Elliott Stables,39 Elliott St,4.5,2,72,0,717.377328,"-36.8502509,174.7636168"


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Yelp seemed to provide more complete data, as well as providing said data up front. Foursquare required rich data fields to be specified in order for them to be retrieved. Yelp provided everything without being explicitly told to do so. Additionally, Yelp contains fields for additional addresses, phone numbers, and review count. Finally, Yelp seemed to have orders of magnitude more places of interest in their database than Foursquare.

Get the top 10 restaurants according to their rating

In [9]:
# top 10 according to Foursquare
foursquare_df.nlargest(10, 'rating')

Unnamed: 0,name,address,rating,price,popularity,verified,station_idx,distance,lat_lon
143,Depot Eatery & Oyster Bar,86 Federal St,9.0,2,0.988079,False,14,136,"-36.849102,174.762433"
278,The Store,5B Gore Street,9.0,1,0.99219,False,27,203,"-36.844625,174.768839"
39,Federal Delicatessen,86 Federal Street,8.9,2,0.992908,False,3,303,"-36.849102,174.762433"
144,Federal Delicatessen,86 Federal Street,8.9,2,0.992908,False,14,152,"-36.849102,174.762433"
272,Ebisu,116-118 Quay Street,8.9,2,0.936389,False,27,146,"-36.844175,174.769787"
60,El Sizzling Chorizo,136-138 Ponsonby Road,8.8,2,0.945671,False,6,37,"-36.856306,174.746281"
123,Cassia,5 Fort Lane,8.8,2,0.963984,False,12,128,"-36.845763,174.766991"
141,Tony's Original Steak & Seafood Restaurant,27 Wellesley Street,8.8,4,0.983823,False,14,121,"-36.850359,174.763159"
259,Swashbucklers Restaurant,23 Westhaven Drive,8.8,3,0.957431,False,25,436,"-36.843723,174.751944"
8,Mrs Higgins Oven Fresh Cookies,268 Queen Street,8.7,1,0.950142,False,0,638,"-36.850198,174.76499"


In [10]:
# top 10 according to Yelp
yelp_df.nlargest(10, 'rating')

Unnamed: 0,name,address,rating,price,review_count,station_idx,distance,lat_lon
9,That's Amore,1 Courthouse Ln,5.0,2,30,0,652.170623,"-36.848159,174.767093"
24,Fondou Café,157 Hobson St,5.0,-1,13,2,353.675748,"-36.8511499,174.75997"
46,That's Amore,1 Courthouse Ln,5.0,2,30,4,50.281155,"-36.848159,174.767093"
47,King Made Noodles,48 fort St,5.0,-1,8,4,248.243417,"-36.84611,174.76898"
49,Culprit,12 Wyndham St,5.0,-1,6,4,173.953721,"-36.8472807369516,174.765146102108"
54,Pedro's House of Lamb,6/4 Williamson Ave,5.0,3,3,5,21.4306,"-36.8583585243259,174.748555253161"
88,Between Cafe & Eatery,82 Karangahape Rd,5.0,2,26,8,477.520635,"-36.8581,174.7619"
92,Between Cafe & Eatery,82 Karangahape Rd,5.0,2,26,9,78.434665,"-36.8581,174.7619"
109,Between Cafe & Eatery,82 Karangahape Rd,5.0,2,26,10,424.290095,"-36.8581,174.7619"
112,Between Cafe & Eatery,82 Karangahape Rd,5.0,2,26,11,153.40722,"-36.8581,174.7619"


In [11]:
# save POI data to csv for use in other notebooks
foursquare_df.to_csv('../data/foursquare.csv', index=False)
yelp_df.to_csv('../data/yelp.csv', index=False)