In [3]:
# imports
import requests
import os
import pandas as pd
import numpy as np

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [12]:
# Import df from Part 1 with Miami bike data
miami_bikes = pd.read_csv('../data/miami_bikes.csv')
miami_bikes.head()

Unnamed: 0,latitude,longitude,bikes
0,25.729275,-80.241581,3
1,25.732522,-80.233244,3
2,25.7511,-80.202641,4
3,25.760082,-80.196217,2
4,25.761884,-80.193101,1


In [3]:
#GET request to Foursquare API

url = 'https://api.foursquare.com/v3/places/search'

# Create dictionary for headers
headers = {"Accept": "application/json"}
# Add key with our API KEY
headers['Authorization'] = <<Foursquare_API_Key>>

In [4]:
#Check if the GET response is sucessful 
result = requests.get(url, headers=headers)
print(result)

<Response [200]>


In [22]:
#Iterate through each bike station from miami_bikes.csv
param= []
# Iterate through rows using iterrows()
for index, row in miami_bikes.iterrows():
    ll = str(row['latitude']) + "," + str(row['longitude'])
    param.append({
       "ll": ll,
       "radius": 1000,
    })
param

[{'ll': '25.729275,-80.241581', 'radius': 1000},
 {'ll': '25.732522,-80.233244', 'radius': 1000},
 {'ll': '25.7511,-80.202641', 'radius': 1000},
 {'ll': '25.760082,-80.196217', 'radius': 1000},
 {'ll': '25.761884,-80.193101', 'radius': 1000},
 {'ll': '25.768483,-80.190547', 'radius': 1000},
 {'ll': '25.761522,-80.1967748', 'radius': 1000},
 {'ll': '25.7604,-80.19186', 'radius': 1000},
 {'ll': '25.7603626,-80.1936702', 'radius': 1000},
 {'ll': '25.767414,-80.199166', 'radius': 1000},
 {'ll': '25.765769,-80.211373', 'radius': 1000},
 {'ll': '25.779881,-80.218424', 'radius': 1000},
 {'ll': '25.767497,-80.195732', 'radius': 1000},
 {'ll': '25.76258,-80.193948', 'radius': 1000},
 {'ll': '25.76589,-80.191042', 'radius': 1000},
 {'ll': '25.771428,-80.190815', 'radius': 1000},
 {'ll': '25.7717329,-80.1899646', 'radius': 1000},
 {'ll': '25.7725576,-80.1921467', 'radius': 1000},
 {'ll': '25.77369,-80.18896', 'radius': 1000},
 {'ll': '25.7733724,-80.1920071', 'radius': 1000},
 {'ll': '25.7870287,

In [23]:
## Filtering with rating, price, and location
url = "https://api.foursquare.com/v3/places/search?categories=13065%2C13032%2C12013&fields=name%2Crating%2Cprice%2Clocation%2Cpopularity%2Ccategories%2Cgeocodes"

#GET Response from url
#Create list to store Foursquare data from GET request
#Adding the latitude and longitude pair to to the json file to keep track of which bike stations yielded which results

Foursquare_data= []

for row in param:
    response = requests.request("GET", url, params=row, headers=headers)
    Foursquare_json = response.json()
    Foursquare_data.append([row['ll'], Foursquare_json])

In [24]:
#Confirm that for loop iterated through all bike stations
print(len(Foursquare_data))

166


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [25]:
#Create empty lists to add to dataframe
Name = []
Locality = []
Popularity =[]
Price = []
Rating = []
Category1 = []
Category2 = []
Category3 = []
Latitude = []
Longitude = []
Bike_Station_Coordinates = []

In [26]:
#Iterate through Foursquare_data made from GET request. 
#Add appropriate items to each empty list for each column 
#Try, except added so that missing values can be documented in the excel sheet

for store in Foursquare_data:
    businesses = store[1]['results']
    station_coordinates = store[0]
    for business in businesses:
        Name.append(business['name'])
        Locality.append(business['location']['locality'])
        Latitude.append(business['geocodes']['main']['latitude'])
        Longitude.append(business['geocodes']['main']['longitude'])
        Bike_Station_Coordinates.append(station_coordinates)
        try:
            Popularity.append(business['popularity'])
        except (KeyError, IndexError) as e:
            Popularity.append(None)
        try:
            Price.append(business['price'])
        except (KeyError, IndexError) as e:
            Price.append(None)
        try:
            Rating.append(business['rating'])
        except (KeyError, IndexError) as e:
            Rating.append(None)
        try:
            Category1.append(business['categories'][0]['name'])
        except IndexError as e:
            Category1.append(None)
        try:
            Category2.append(business['categories'][1]['name'])
        except IndexError as e:
            Category2.append(None)
        try:
            Category3.append(business['categories'][2]['name'])
        except IndexError as e:
            Category3.append(None)

In [27]:
#Check to see if all the lists are the same length

print(len(Name))
print(len(Locality))
print(len(Popularity))
print(len(Price))
print(len(Rating))
print(len(Category1))
print(len(Category2))
print(len(Category3))
print(len(Latitude))
print(len(Longitude))
print(len(Bike_Station_Coordinates))

1645
1645
1645
1645
1645
1645
1645
1645
1645
1645
1645


Put your parsed results into a DataFrame

In [28]:
#Create Foursquare dataframe with the parsed lists 

Foursquare_df = pd.DataFrame({
    'Name': Name,
    'Locality': Locality,
    'Popularity': Popularity,
    'Price': Price,
    'Rating': Rating,
    'Category1': Category1,
    'Category2': Category2,
    'Category3': Category3,
    'Latitude': Latitude,
    'Longitude': Longitude,
    'Bike_Station_Coordinates': Bike_Station_Coordinates
})

#Preview the dataframe
Foursquare_df.head()

Unnamed: 0,Name,Locality,Popularity,Price,Rating,Category1,Category2,Category3,Latitude,Longitude,Bike_Station_Coordinates
0,Bombay Darbar,Miami,0.977682,2.0,9.0,Indian Restaurant,Food and Beverage Service,Dining and Drinking,25.729251,-80.240721,"25.729275,-80.241581"
1,Jaguar Ceviche Spoon Bar,Miami,0.973,2.0,8.6,Latin American Restaurant,Seafood Restaurant,Dining and Drinking,25.72836,-80.242803,"25.729275,-80.241581"
2,Mister O1 Extraordinary Pizza,Miami,0.97436,1.0,8.2,Pizzeria,,,25.72874,-80.242009,"25.729275,-80.241581"
3,Le Pain Quotidien,Miami,0.952608,1.0,8.6,Café,,,25.727386,-80.24262,"25.729275,-80.241581"
4,Harry's Pizzeria,Coconut Grove,0.953315,2.0,8.4,New American Restaurant,Pizzeria,,25.727746,-80.242118,"25.729275,-80.241581"


In [29]:
#Check the dimensions of the dataframe
print(Foursquare_df.shape)

(1645, 11)


In [30]:
#Save the dataframe as a csv file
Foursquare_df.to_csv('../data/foursquare_data.csv', index=False)

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [31]:
#GET request to YELP API
#Filtering with rating, price, radius, and college/universites

url = 'https://api.yelp.com/v3/businesses/search?location=Miami%2C%20Florida&radius=1000&categories=food&categories=collegeuniv&categories=coffee&price=1&price=2&price=3&price=4'

# Create dictionary for headers
headers = {"Accept": "application/json"}
# Add key with our API KEY
headers['Authorization'] = <<Yelp_API_Key>>

In [32]:
#Iterate through each bike station from miami_bikes.csv
param=[]

#Iterate through rows using iterrows() with latitude, longitude as an interger pair.
#Format the latitude and longitude pair to add to yelp fetch url 
for index, row in miami_bikes.iterrows():
    latitude= str(row['latitude']) 
    longitude= str(row['longitude'])
    ll= '&latitude=' + latitude + '&longitude=' + longitude
    param.append(ll)

In [34]:
#Format urls with ll made from previous block including latitude and longitude from miami_bikes.csv. 
#Store formatted urls as list called new_url
new_url = []
for row in param:
    url_param = url + row
    new_url.append(url_param)

#Ensure that all urls were accounted for    
x= (len(new_url))
print(x)

166


In [36]:
#test case for 1 url 
response= requests.request("GET", url='https://api.yelp.com/v3/businesses/search?location=Miami%2C%20Florida&radius=1000&categories=food&categories=collegeuniv&categories=coffee&price=1&price=2&price=3&price=4&latitude=25.77662&longitude=-80.14191',headers=headers)
print(response)

<Response [200]>


In [37]:
#Check to see if all urls have unique latitude and longitude appended to the end. 
#See if new string has been formatted correctly

number = 0

for site in new_url:
    site = new_url[number]
    number+=1
#    print(site)
#    print("--------") #add a divider to separate each url 

In [99]:
#import regex to pull bike station latitude/longitude from the new_url
import re

#GET Response from url, create new string for yelp_data
yelp_data=[]

for site in new_url:
    response = requests.request("GET", url=site, headers=headers)
    yelp_json = response.json()
    latitude = re.search("(latitude=)(-?[0-9]*.[0-9]*)", site).group(2)
    longitude = re.search("(longitude=)(-?[0-9]*.[0-9]*)", site).group(2)
    coordinate_str = latitude + "," + longitude
#     print(coordinate_str), check to see if coordinates are being pulled correctly
    yelp_data.append([coordinate_str, yelp_json])

In [101]:
#Verify yelp_data looks correct
yelp_data

[['25.729275,-80.241581',
  {'businesses': [{'id': 'MRbRYp34p2gDdp8ufASEXg',
     'alias': 'asian-thai-kitchen-miami',
     'name': 'Asian Thai Kitchen',
     'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/1MXSz-Z-0iMHAKaCkHi5dA/o.jpg',
     'is_closed': False,
     'url': 'https://www.yelp.com/biz/asian-thai-kitchen-miami?adjust_creative=M43YyNZ73L_PZpjiAxy23w&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=M43YyNZ73L_PZpjiAxy23w',
     'review_count': 237,
     'categories': [{'alias': 'fooddeliveryservices',
       'title': 'Food Delivery Services'},
      {'alias': 'thai', 'title': 'Thai'},
      {'alias': 'asianfusion', 'title': 'Asian Fusion'}],
     'rating': 4.0,
     'coordinates': {'latitude': 25.7280635, 'longitude': -80.244032},
     'transactions': ['pickup', 'delivery'],
     'price': '$$',
     'location': {'address1': '3135 Grand Ave',
      'address2': '',
      'address3': '',
      'city': 'Miami',
      'zip_code': '33133',
      'countr

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [125]:
#Make lists for each column that will be put in the dataframe. These will be parsed for in the yelp_data
yelp_name = []
yelp_category1 = []
yelp_category2 = []
yelp_category3 = []
yelp_rating = []
yelp_price = []
yelp_distance = []
yelp_review_count = []
yelp_bike_station_coordinates = []

In [126]:
#Parse the yelp_data to pull for specific parameters. Add them to appropriate lists made. 
for shop in yelp_data:
    businesses = shop[1]['businesses']
    station_coordinates = shop[0]
    for business in businesses:
        yelp_name.append(business['name'])
        yelp_rating.append(business['rating'])
        yelp_price.append(business['price'])
        yelp_distance.append(business['distance'])
        yelp_review_count.append(business['review_count'])
        yelp_bike_station_coordinates.append(station_coordinates)
        yelp_category1.append(business['categories'][0]['title'])
        try:
            yelp_category2.append(business['categories'][1]['title'])
        except (KeyError, IndexError) as e:
            yelp_category2.append(None)
        try:
            yelp_category3.append(business['categories'][1]['title'])
        except (KeyError, IndexError) as e:
            yelp_category3.append(None)

In [127]:
#Check to see if length of each list is the same 
print(len(yelp_name))
print(len(yelp_category1))
print(len(yelp_category2))
print(len(yelp_category3))
print(len(yelp_rating))
print(len(yelp_price))
print(len(yelp_distance))
print(len(yelp_review_count))
print(len(yelp_bike_station_coordinates))

3202
3202
3202
3202
3202
3202
3202
3202
3202


Put your parsed results into a DataFrame

In [151]:
#create dataframe with desired columns using the lists made
yelp_df = pd.DataFrame({
    'name': yelp_name,
    'review_count': yelp_review_count,
    'rating': yelp_rating,
    'price': yelp_price,
    'distance': yelp_distance,
    'category_1': yelp_category1,
    'category_2': yelp_category2,
    'category_3': yelp_category3,
    'yelp_bike_station_coordinates': yelp_coordinates
    })

#preview the dataframe 
yelp_df.head()

Unnamed: 0,name,review_count,rating,price,distance,category_1,category_2,category_3,yelp_bike_station_coordinates
0,Asian Thai Kitchen,237,4.0,$$,280.048258,Food Delivery Services,Thai,Thai,"25.729275,-80.241581"
1,Loretta & The Butcher,224,4.5,$$$,343.398505,Argentine,Breakfast & Brunch,Breakfast & Brunch,"25.729275,-80.241581"
2,Fireman Derek’s Bake Shop,254,4.5,$$,257.37049,Desserts,Bakeries,Bakeries,"25.729275,-80.241581"
3,Le Pain Quotidien,237,4.0,$$,237.898851,Cafes,Bakeries,Bakeries,"25.729275,-80.241581"
4,Salt and Straw,160,4.5,$$,115.219169,Ice Cream & Frozen Yogurt,Desserts,Desserts,"25.729275,-80.241581"


In [152]:
#Check the dimensions of the dataframe
print(yelp_df.shape)

(3202, 9)


In [153]:
#Save the yelp_df as a csv file 
yelp_df.to_csv('../data/yelp_data.csv', index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

For this question, the GET request was analyzed to see which API had provided more complete data and gave more information regarding the point of interest that was fetched.  

From one GET request to the Yelp API, more data can be obtained compared to the Foursquare API. From both of the APIs, Yelp and Foursquare, you can retrieve the name, address, categories, rating and price of each business/company/site of interest. Unique to the Yelp API, from one GET request you can also fetch the location's website, phone number/display phone, distance from the bus station that was put in our initial query, whether or not the location offers delivery/pickup services, if the point of interest is closed along with the review count. Contrary to this, Foursquare provides only one unique field different from Yelp which is the popularity.

However, when you look at the categories given by YELP, the categories are much more broad and un-specific. It is difficult to see which location was a resturant vs. a bar or a bakery. Foursquare provided less data but gave more specific categories/filters you can use on the data. 

Get the top 10 restaurants according to their rating

In [103]:
import pandas as pd

In [104]:
foursquare_df = pd.read_csv('../data/foursquare_data.csv')

In [105]:
#Remove duplicate rows from dataframe and drop bike_station_coordinates column.
foursquare_df_clean= foursquare_df.drop(columns=['Bike_Station_Coordinates'])

#Remove duplicate rows
foursquare_df_clean = foursquare_df_clean.drop_duplicates()
foursquare_df_clean.shape

(381, 10)

In [106]:
#Reset index
foursquare_df_clean.head()

Unnamed: 0,Name,Locality,Popularity,Price,Rating,Category1,Category2,Category3,Latitude,Longitude
0,Bombay Darbar,Miami,0.977682,2.0,9.0,Indian Restaurant,Food and Beverage Service,Dining and Drinking,25.729251,-80.240721
1,Jaguar Ceviche Spoon Bar,Miami,0.973,2.0,8.6,Latin American Restaurant,Seafood Restaurant,Dining and Drinking,25.72836,-80.242803
2,Mister O1 Extraordinary Pizza,Miami,0.97436,1.0,8.2,Pizzeria,,,25.72874,-80.242009
3,Le Pain Quotidien,Miami,0.952608,1.0,8.6,Café,,,25.727386,-80.24262
4,Harry's Pizzeria,Coconut Grove,0.953315,2.0,8.4,New American Restaurant,Pizzeria,,25.727746,-80.242118


In [107]:
#Filtering categories to include only restuarants
foursquare_top10 = foursquare_df_clean.loc[(foursquare_df_clean['Category1'].str.contains('Restuarant'))|(foursquare_df_clean['Category2'].str.contains('Restuarant'))|(foursquare_df_clean['Category3'].str.contains('Restuarant'))]

#Fetching the top 10 restuarants by Rating and then by popularity
foursquare_top10= foursquare_df_clean.sort_values(['Rating', 'Popularity'], ascending=False)
foursquare_top10.head(10)

Unnamed: 0,Name,Locality,Popularity,Price,Rating,Category1,Category2,Category3,Latitude,Longitude
260,Wynwood Art Walk,Miami,0.996289,,9.4,Music Venue,Restaurant,,25.79834,-80.198793
1132,Broken Shaker,Miami Beach,0.982221,2.0,9.4,Cocktail Bar,American Restaurant,,25.804592,-80.126445
1467,Mandolin Aegean Bistro,Miami,0.980616,3.0,9.4,Greek Restaurant,Mediterranean Restaurant,,25.815777,-80.191635
1470,Fiorito,Miami,0.988638,2.0,9.3,Argentinian Restaurant,,,25.827049,-80.191712
97,Doce Provisions,Miami,0.979538,2.0,9.3,Bar,Cuban Restaurant,Tapas Restaurant,25.767836,-80.214159
163,Il Gabbiano,Miami,0.972134,4.0,9.3,Italian Restaurant,Seafood Restaurant,,25.771692,-80.185383
709,Yardbird Southern Table & Bar,Miami Beach,0.99256,3.0,9.2,American Restaurant,Southern Food Restaurant,,25.789114,-80.1402
199,Vice City Bean,Miami,0.985475,1.0,9.2,Coffee Shop,Fast Food Restaurant,,25.791431,-80.194222
279,Lagniappe,Miami,0.984521,2.0,9.2,Wine Bar,Burger Joint,American Restaurant,25.809075,-80.191011
506,Macchialina Taverna Rustica,Miami Beach,0.983573,2.0,9.2,Italian Restaurant,,,25.778547,-80.141132


In [99]:
yelp_df = pd.read_csv('../data/yelp_data.csv')
yelp_df.shape

(3202, 9)

In [100]:
#drop Distance column and bike station coordinates column
yelp_df_clean= yelp_df.drop(columns=['distance', 'yelp_bike_station_coordinates'])

#Remove duplicate rows from dataframe
yelp_df_clean = yelp_df_clean.drop_duplicates()
yelp_df_clean.head()

Unnamed: 0,name,review_count,rating,price,category_1,category_2,category_3
0,Asian Thai Kitchen,237,4.0,$$,Food Delivery Services,Thai,Thai
1,Loretta & The Butcher,224,4.5,$$$,Argentine,Breakfast & Brunch,Breakfast & Brunch
2,Fireman Derek’s Bake Shop,254,4.5,$$,Desserts,Bakeries,Bakeries
3,Le Pain Quotidien,237,4.0,$$,Cafes,Bakeries,Bakeries
4,Salt and Straw,160,4.5,$$,Ice Cream & Frozen Yogurt,Desserts,Desserts


In [101]:
yelp_df_clean.shape

(424, 7)

In [102]:
#Filter out some categories to filter out non-restuarant locations
values_to_remove = ['Food Trucks','Beer Garden','Beer Hall','Cafes','Cafeteria','Delis',
                    'Dinner Theater','Food Court','Food Stands','Milk Bars','Open Sandwiches',
                    'Poutineries','Sandwiches','Grocery','Food Delivery Services',
                    'Convenience Stores', 'Bakeries', 'Farmers Market', 'Beer, Wine & Spirits',
                    'Specialty Food', 'Ice Cream & Frozen Yogurt', 'Coffee & Tea', 'Fruits & Veggies',
                    'Health Markets', 'Juice Bars & Smoothies', 'Bubble Tea', 'Imported Food',
                    'Seafood Markets', 'Drugstores', 'Guitar Stores', 'Breweries', 'Cupcakes',
                    'Gelato', 'Coffee Roasteries', 'Street Vendors'
                   ] 
indexes_to_drop = []

#concat Category 1, 2 and 3, then search for string in 1 column

# pattern = '|'.join(values_to_remove)
# yelp_top10_restaurants = yelp_top10.loc[(yelp_top10['Category 1'].str.contains(pattern)==False)|(yelp_top10['Category 2'].str.contains(pattern)==False)|(yelp_top10['Category 3'].str.contains(pattern)==False)]

for index, row in yelp_df_clean.iterrows():
    match1 = next((x for x in values_to_remove if x in str(row['category_1'])), None)
    match2 = next((x for x in values_to_remove if x in str(row['category_2'])), None)
    match3 = next((x for x in values_to_remove if x in str(row['category_3'])), None)
    final_match = match1 or match2 or match3

    if final_match:
        indexes_to_drop.append(index)

indexes_to_drop.sort(reverse=True)
yelp_df_clean_filtered = yelp_df_clean.drop(index=indexes_to_drop)

#Sort by Rating and then Review Count
yelp_top10_restaurants = yelp_df_clean_filtered.sort_values(['rating', 'review_count'], ascending=False)
yelp_top10_restaurants.head(10)

Unnamed: 0,name,review_count,rating,price,category_1,category_2,category_3
931,Meraki Gyro & Bakery,28,5.0,$$,Greek,Mediterranean,Mediterranean
36,La Burguesa,20,5.0,$$,Hot Dogs,Burgers,Burgers
2456,Café Prima Pasta,1201,4.5,$$,Italian,Wine Bars,Wine Bars
1109,Full Bloom Vegan,955,4.5,$$,Vegan,Vegetarian,Vegetarian
927,GoGo Fresh SOBE,642,4.5,$,Salad,Empanadas,Empanadas
985,Il Pastaiolo,504,4.5,$$,Italian,Pizza,Pizza
515,Crazy Poke,331,4.5,$$,Poke,Hawaiian,Hawaiian
2804,Luna Pasta e Dolci,261,4.5,$$,Pasta Shops,,
1,Loretta & The Butcher,224,4.5,$$$,Argentine,Breakfast & Brunch,Breakfast & Brunch
472,Ono Poké Shop,218,4.5,$$,Japanese,Poke,Poke


In [None]:
#potential point of interests may removed because of the keywords that was used to filter out data. 
#don't want false represented data. Focus on accuracy. 