In [1]:
# import sobre libraries to explore the Foursquare API:
import requests
from IPython.display import JSON 
import pandas as pd
import numpy as np
import os

In [2]:
YELP_API_KEY=os.environ['YELP_API_KEY']

In [3]:
FSQ_API_KEY=os.environ['FOURSQUARE_API_KEY']

# Foursquare

In [4]:
#  let's first extract a sample from the population of citybikes stations in mobi with a random function.
# But, for that, let's download our dataframe created during Task 1 and then use a for loop:

df_city_bikes_mobi = pd.read_json("http://api.citybik.es/v2/networks/mobibikes")
total_city_bikes_stations_mobi = df_city_bikes_mobi["network"]["stations"]

def array_of_Station_Information():
    array_of_elements = []

    for index, each_station in enumerate(total_city_bikes_stations_mobi):
        # print(index % 10 == 0)
        if index % 10 == 0:
            id = each_station["id"]
            uid = each_station["extra"]["uid"]
            name = each_station["name"]
            empty_slots = each_station["empty_slots"]
            free_bikes = each_station["free_bikes"]
            last_updated = each_station["extra"]["last_updated"]
            renting_bikes = each_station["extra"]["renting"]
            returning_bikes = each_station["extra"]["returning"]
            latitude = each_station["latitude"]
            longitude = each_station["longitude"]
            timestamp = each_station["timestamp"]
            parsed_station_obj = {"Id": id, "Uid": uid, "Name": name, "Empty Slots": empty_slots, "Free Bikes": free_bikes, "Last Updated": last_updated,  "Renting Bikes": renting_bikes, "Returning Bikes": returning_bikes, "Latitude": latitude, "Longitude": longitude, "Timestamp": timestamp} 
            array_of_elements.append(parsed_station_obj)
            
    stations_count = len(array_of_elements)
    print(stations_count, "stations returned")
    
    return array_of_elements

Station_Information = array_of_Station_Information()

25 stations returned


In [5]:
# Now, let's save the sample in a data frame in a .csv file:

df = pd.DataFrame(Station_Information)
df.to_csv("df_city_bikes_mobi_network_sample.csv", index = True)

In [6]:
first_station_lat = Station_Information[0]["Latitude"]
first_station_lon = Station_Information[0]["Longitude"]
# print(Station_Information)
# print(first_station_lon)

In [7]:
# let's examine the data structure of the requested object by attempting to retrieve a request from the station at index 0. To do this, we'll utilize the "ll" string with latitude and longitude keywords.

foursquare_url = "https://api.foursquare.com/v3/places/search?ll=" + str(first_station_lat) + "," + str(first_station_lon)

headers = {
    "accept": "application/json",
    "Authorization": FSQ_API_KEY
}

response = requests.get(foursquare_url, headers = headers)
foursquare_response = response.json()
# print(foursquare_response)

Task 2: Connect to the [Yelp]

Yelp provides easy-to-follow instructions and an example for using their API on their website (https://docs.developer.yelp.com/docs/fusion-intro). But, to access their data, you need to get an API key by creating an app. That's why I made a "python_test" app to get my API key and begin using Yelp's services.

In [8]:
yelp_url = "https://api.yelp.com/v3/businesses/search?sort_by=best_match&latitude="+ str(first_station_lat) + "&longitude=" + str(first_station_lon)

headers = { 
    "accept": "application/json", 
    "Authorization": YELP_API_KEY
}

response = requests.get(yelp_url, headers = headers)
yelp_response = response.json()
# print(yelp_response)

Task 3: For each of the bike stations in Part 1, query both APIs to retrieve information for the following in that location:

 - Restaurants or bars
 - Various POIs (points of interest) of your choice

In [9]:
# Let's search for places under the category "sort by rating" in Foursquare wich includes restaurants, coffee shops and more retrived by users rating:

def get_pois_from_foursquare():
    foursquare_pois = []
    for each_station in Station_Information:
    
        foursquare_url = "https://api.foursquare.com/v3/places/search?ll=" + str(each_station["Latitude"]) + "," + str(each_station["Longitude"]) + "&sort=RATING"

        headers = {
        "accept": "application/json",
        "Authorization": FSQ_API_KEY
        }

        response = requests.get(foursquare_url, headers = headers)
        foursquare_response = response.json()
        foursquare_pois.append(foursquare_response)
    return foursquare_pois

In [10]:
foursquare_pois = get_pois_from_foursquare()
# print(foursquare_pois)

In [11]:
# # Let's search for places under the category "sort by best" in Yelp which includes restaurantes, bars and bakeries:

def get_pois_from_yelp():
    yelp_pois = []
    for each_station in Station_Information:
        
        yelp_url = "https://api.yelp.com/v3/businesses/search?sort_by=best_match&latitude=" + str(each_station["Latitude"]) + "&longitude=" + str(each_station["Longitude"])
       
        headers = { 
        "accept": "application/json", 
        "Authorization": YELP_API_KEY
        }
        
        response = requests.get(yelp_url, headers = headers)
        yelp_response = response.json()
        yelp_pois.append(yelp_response)
    return yelp_pois


In [12]:
yelp_pois = get_pois_from_yelp()
# print(yelp_pois)

 Send a request to Foursquare and Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [13]:
#  send a request to Foursquare considering the small radius of 1000m 

radius = 1000

def get_pois_with_radius_foursquare():
    foursquare_radius_1000 = []
    for each_station in Station_Information:
    
        foursquare_url = "https://api.foursquare.com/v3/places/search?ll=" + str(each_station["Latitude"]) + "," + str(each_station["Longitude"]) + "&radius=" + str(radius) + "&sort=RATING"
        headers = {
        "accept": "application/json",
        "Authorization": FSQ_API_KEY
        }

        response = requests.get(foursquare_url, headers = headers)
        foursquare_response = response.json()
        foursquare_radius_1000.append(foursquare_response)
    return foursquare_radius_1000

In [14]:
foursquare_pois_with_radius = get_pois_with_radius_foursquare()
# print(foursquare_pois_with_radius)

In [15]:
#  send a request to Yelp considering the small radius of 1000m 
def get_pois_with_radius_yelp():
    yelp_radius_1000 = []
    for each_station in Station_Information:
    
        yelp_url = "https://api.yelp.com/v3/businesses/search?radius="+ str(radius) + "&latitude="+ str(each_station["Latitude"]) + "&longitude=" + str(each_station["Longitude"])
       
        headers = { 
        "accept": "application/json", 
        "Authorization": YELP_API_KEY
        }

        response = requests.get(yelp_url, headers = headers)
        yelp_response = response.json()
        yelp_radius_1000.append(yelp_response)
    return yelp_radius_1000

# print(get_pois_with_radius_yelp())

In [16]:
yelp_pois_with_radius = get_pois_with_radius_yelp()

 Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc.)

In [17]:

import pandas

def foursquare_array_of_pois():
    final_array = []
    foursquare_array_of_elements = []
    
    for each_poi in foursquare_pois_with_radius:
        results = each_poi["results"]
        for each_result in results:
            id = each_result["fsq_id"]
            name = each_result["name"]
            category = each_result["categories"]
            distance = each_result["distance"]
            address = each_result["location"]
            latitude = each_result["geocodes"]["main"]["latitude"]
            longitude = each_result["geocodes"]["main"]["longitude"]
            dict_of_each_poi = {"Id": id, "Name": name, "Category": category, "Distance": distance, "Address": address, "Latitude": latitude, "Longitude": longitude}
            foursquare_array_of_elements.append(dict_of_each_poi)
        final_array.append(foursquare_array_of_elements)
        
    return final_array

fsq_results_to_convert = foursquare_array_of_pois()
# print(fsq_results_to_convert)

In [18]:
# The function sum is used to flatten the nested list (https://stackoverflow.com/a/20113075): 
fsq_flattened = sum(fsq_results_to_convert, [])
# print(fsq_flattened)

fsq_dataframe = pd.DataFrame(fsq_flattened, columns=["Id", "Name", "Category", "Distance", "Address", "Latitude", "Longitude"])
display(fsq_dataframe)

Unnamed: 0,Id,Name,Category,Distance,Address,Latitude,Longitude
0,51b201d27dd249ae714ba728,33 Acres Brewing Co,"[{'id': 13003, 'name': 'Bar', 'short_name': 'B...",674,"{'address': '15 8th Ave W', 'country': 'CA', '...",49.263877,-123.105307
1,4aada430f964a5201f6120e3,Charleson Park,"[{'id': 16032, 'name': 'Park', 'short_name': '...",891,"{'address': '999 Charleson St', 'country': 'CA...",49.266831,-123.124538
2,5478ca03498e3bbc45e6ce54,Elysian Coffee,"[{'id': 13035, 'name': 'Coffee Shop', 'short_n...",724,"{'address': '2301 Ontario St', 'country': 'CA'...",49.264531,-123.105038
3,5674aec9498e6800815c3598,Vij's,"[{'id': 13199, 'name': 'Indian Restaurant', 's...",553,"{'address': '3106 Cambie St', 'country': 'CA',...",49.257598,-123.114829
4,4aec909ff964a520ccc821e3,Milano Coffee,"[{'id': 13035, 'name': 'Coffee Shop', 'short_n...",430,"{'address': '156 8th Ave W', 'country': 'CA', ...",49.263724,-123.108701
...,...,...,...,...,...,...,...
6095,4aa80589f964a520ca4e20e3,Guu with Garlic,"[{'id': 13263, 'name': 'Japanese Restaurant', ...",147,"{'address': '1698 Robson St', 'country': 'CA',...",49.290278,-123.133923
6096,4aad33ecf964a520fb5e20e3,HI-Vancouver Jericho Beach,"[{'id': 19013, 'name': 'Hostel', 'short_name':...",871,"{'address': '1515 Discovery St', 'country': 'C...",49.273017,-123.203242
6097,4aad7169f964a520576020e3,Burgoo Bistro,"[{'id': 13065, 'name': 'Restaurant', 'short_na...",243,"{'address': '4434 10th Ave W', 'country': 'CA'...",49.263678,-123.207137
6098,5264889711d205ec029d2e6a,Takumi Japanese Restaurant,"[{'id': 13263, 'name': 'Japanese Restaurant', ...",233,"{'address': '4422W 10th Ave W', 'country': 'CA...",49.263617,-123.206889


In [19]:
columns = ['Name', 'Distance', 'Latitude', 'Longitude']
new_fsq_df_parsed = fsq_dataframe[columns]
display(new_fsq_df_parsed)

Unnamed: 0,Name,Distance,Latitude,Longitude
0,33 Acres Brewing Co,674,49.263877,-123.105307
1,Charleson Park,891,49.266831,-123.124538
2,Elysian Coffee,724,49.264531,-123.105038
3,Vij's,553,49.257598,-123.114829
4,Milano Coffee,430,49.263724,-123.108701
...,...,...,...,...
6095,Guu with Garlic,147,49.290278,-123.133923
6096,HI-Vancouver Jericho Beach,871,49.273017,-123.203242
6097,Burgoo Bistro,243,49.263678,-123.207137
6098,Takumi Japanese Restaurant,233,49.263617,-123.206889


In [20]:
new_fsq_df_parsed_and_grouped_by_name = new_fsq_df_parsed.sort_values(by = 'Name', inplace = False)
print(new_fsq_df_parsed_and_grouped_by_name)

                         Name  Distance   Latitude   Longitude
0         33 Acres Brewing Co       674  49.263877 -123.105307
5612      33 Acres Brewing Co       674  49.263877 -123.105307
5124      33 Acres Brewing Co       674  49.263877 -123.105307
2440      33 Acres Brewing Co       674  49.263877 -123.105307
2480      33 Acres Brewing Co       309  49.263877 -123.105307
...                       ...       ...        ...         ...
975   West Point Grey Academy       116  49.266549 -123.200170
3415  West Point Grey Academy       116  49.266549 -123.200170
4635  West Point Grey Academy       116  49.266549 -123.200170
5611  West Point Grey Academy       116  49.266549 -123.200170
6099  West Point Grey Academy       116  49.266549 -123.200170

[6100 rows x 4 columns]


In [21]:
# identify duplicates
duplicates = new_fsq_df_parsed_and_grouped_by_name.duplicated(subset='Name')

# drop duplicates
new_fsq_df_parsed_and_grouped_by_name.drop_duplicates(subset='Name', keep='first', inplace=True)

In [22]:
#  let's create our fsq dataset of pois .csv file:

new_fsq_df_parsed_and_grouped_by_name.to_csv("new_fsq_df_parsed_and_grouped_by_name.csv")

In [23]:
# At Yelp, different quantity of points of interest per station were found.

def yelp_array_of_pois():
    final_array = []
    yelp_array_of_elements = []
    
    for each_poi in yelp_pois_with_radius:
        businesses = each_poi["businesses"]
        for each_business in businesses:
            id = each_business["id"]
            name = each_business["name"]
            category = each_business["categories"]
            distance = int(each_business["distance"])
            address = each_business["location"]["address1"]
            lat = each_business["coordinates"]["latitude"]
            lon = each_business["coordinates"]["longitude"]
            review_count = each_business["review_count"]
            rating = each_business["rating"]
            dict_of_each_poi = {"Id": id, "Name": name, "Address": address, "Latitude": lat, "Longitude": lon, "Review Count": review_count, "Rating": rating}
            yelp_array_of_elements.append(dict_of_each_poi)
            
        final_array.append(yelp_array_of_elements)
   
    return final_array

yelp_pois_to_convert = yelp_array_of_pois()
# print(yelp_pois)

In [24]:
yelp_flattened = sum(yelp_pois_to_convert, [])
# print(yelp_converted)

yelp_dataframe = pd.DataFrame(yelp_flattened, columns=["Id", "Name", "Category", "Distance", "Address", "Latitude", "Longitude", "Rating", "Review Count"])
display(yelp_dataframe)


Unnamed: 0,Id,Name,Category,Distance,Address,Latitude,Longitude,Rating,Review Count
0,nkDZY5xqihF3XtZMzzfqqg,Hokkaido Ramen Santouka,,,558 W Broadway,49.263127,-123.116892,4.0,226
1,6iOAgzJ0DRZNSKA3FSrrOg,La Taqueria Pinche Taco Shop,,,2450 Yukon Street,49.263559,-123.112736,4.0,685
2,XAH2HpuUUtu7CUO26pbs4w,Saku,,,548 W Broadway,49.263101,-123.116675,4.5,237
3,4118Aq9LbkvUr4s719uUkA,Uma Sushi,,,450 W 8th Avenue,49.263805,-123.113729,4.5,118
4,F5wxgIiZE7LYQxgqhI483A,Seaport City Seafood Restaurant,,,2425 Cambie Street,49.263921,-123.115152,4.0,37
...,...,...,...,...,...,...,...,...,...
12495,dN_NUGjCjAoEhJOWg9ivTQ,Jericho Beach,,,Wallace St & Discovery St,49.270997,-123.192315,4.5,26
12496,vnOhpYbHv7PZXD1iwKytXg,Everyday Sushi,,,4572W 10 Avenue W,49.263745,-123.211153,3.0,29
12497,O3axNLzllaYzTHv2WOTmrg,Trimble Park,,,2250 Trimble Street,49.266208,-123.205293,5.0,1
12498,zWxhbAf9ZLM5qNiJ0NwRDg,Ooh La La Donuts,,,4406 West 10th Avenue,49.263594,-123.206367,4.5,3


In [25]:
columns = ['Name', 'Latitude', 'Longitude', 'Rating', 'Review Count']
new_yelp_df_parsed = yelp_dataframe[columns]
display(new_yelp_df_parsed)

Unnamed: 0,Name,Latitude,Longitude,Rating,Review Count
0,Hokkaido Ramen Santouka,49.263127,-123.116892,4.0,226
1,La Taqueria Pinche Taco Shop,49.263559,-123.112736,4.0,685
2,Saku,49.263101,-123.116675,4.5,237
3,Uma Sushi,49.263805,-123.113729,4.5,118
4,Seaport City Seafood Restaurant,49.263921,-123.115152,4.0,37
...,...,...,...,...,...
12495,Jericho Beach,49.270997,-123.192315,4.5,26
12496,Everyday Sushi,49.263745,-123.211153,3.0,29
12497,Trimble Park,49.266208,-123.205293,5.0,1
12498,Ooh La La Donuts,49.263594,-123.206367,4.5,3


In [26]:
new_yelp_df_parsed_and_grouped_by_name = new_yelp_df_parsed.sort_values(by = 'Name', inplace = False)
print(new_yelp_df_parsed_and_grouped_by_name)

                                       Name   Latitude   Longitude  Rating  \
11138                  49th Parallel Coffee  49.268009 -123.154781     4.0   
11638                  49th Parallel Coffee  49.268009 -123.154781     4.0   
10688                  49th Parallel Coffee  49.259133 -123.100885     4.0   
1299                   49th Parallel Coffee  49.259133 -123.100885     4.0   
276                    49th Parallel Coffee  49.259133 -123.100885     4.0   
...                                     ...        ...         ...     ...   
8006   iDen & Quan Ju De Beijing Duck House  49.260210 -123.114845     3.5   
1506   iDen & Quan Ju De Beijing Duck House  49.260210 -123.114845     3.5   
7006   iDen & Quan Ju De Beijing Duck House  49.260210 -123.114845     3.5   
8685   iDen & Quan Ju De Beijing Duck House  49.260210 -123.114845     3.5   
3185   iDen & Quan Ju De Beijing Duck House  49.260210 -123.114845     3.5   

       Review Count  
11138           212  
11638           212

In [27]:
# identify duplicates
duplicates = new_yelp_df_parsed_and_grouped_by_name.duplicated(subset='Name')

# drop duplicates
new_yelp_df_parsed_and_grouped_by_name.drop_duplicates(subset='Name', keep='first', inplace=True)

In [28]:
# let's create our yelp dataset of pois .csv file:

new_yelp_df_parsed_and_grouped_by_name.to_csv("new_yelp_df_parsed_and_grouped_by_name.csv")


In [29]:
yelp_dataframe_grouped_by_lat_and_lon = yelp_dataframe.groupby(['Latitude', 'Longitude'], as_index=False).sum()
print(yelp_dataframe_grouped_by_lat_and_lon)

yelp_dataframe_grouped_by_lat_and_lon.to_csv("yelp_dataframe_grouped_by_lat_and_lon.csv", index=False)


      Latitude   Longitude                                                 Id  \
0    49.238028 -123.128161  A-ZcfRRw9ICNUihVa8FpbQA-ZcfRRw9ICNUihVa8FpbQA-...   
1    49.238689 -123.129515  dm9HU1uHgJqgZ4cJQKsCrAdm9HU1uHgJqgZ4cJQKsCrAdm...   
2    49.241424 -123.114755  dqXYgJKJXNsY8Owsa48kngdqXYgJKJXNsY8Owsa48kngdq...   
3    49.241671 -123.112535  l2f2PU8Rtr5TPtzwaGQ_sAl2f2PU8Rtr5TPtzwaGQ_sAl2...   
4    49.241764 -123.112625  QZMDi6zSshZPxSwoyF3lvwQZMDi6zSshZPxSwoyF3lvwQZ...   
..         ...         ...                                                ...   
337  49.293373 -123.132184  Ckna_iwDI_QrTlU5XIcqCwCkna_iwDI_QrTlU5XIcqCwCk...   
338  49.293785 -123.129781  WiATqbLHV6BDIDdRva9VpwWiATqbLHV6BDIDdRva9VpwWi...   
339  49.297884 -123.130809  kajMc2fkWKdzKJ1M4pm47QkajMc2fkWKdzKJ1M4pm47Qka...   
340  49.299708 -123.133901  WDiGEzCsUHGpyS1B4DZcpQWDiGEzCsUHGpyS1B4DZcpQWD...   
341  49.300799 -123.130927  7CAWOmR5P8JBo4LFNXYgAg7CAWOmR5P8JBo4LFNXYgAg7C...   

                           

In [30]:
yelp_dataframe_sorted_by_name = yelp_dataframe.sort_values(by = 'Name', inplace = False)
# print(yelp_dataframe_sorted_by_name)

yelp_counts_of_pois = yelp_dataframe_sorted_by_name['Name'].value_counts()
display(yelp_counts_of_pois)

Name
Hokkaido Ramen Santouka    200
Saku                       175
Cactus Club Cafe           150
Guu with Garlic            125
Kingyo                     125
                          ... 
Matchstick Coffee           25
Meat & Bread                25
MeeT in Gastown             25
Menya Raizo                 25
Maizal RMF                  25
Name: count, Length: 328, dtype: int64

 Put your parsed results into a DataFrame

In [31]:
df_fsq_and_yelp_merged_by_name = pd.merge(fsq_dataframe, yelp_dataframe, on = "Name", how = "inner")

fqs_and_yelp_merged_by_concat = pd.concat([fsq_dataframe, yelp_dataframe], ignore_index=True, sort=False)
display(df_fsq_and_yelp_merged_by_name)

Unnamed: 0,Id_x,Name,Category_x,Distance_x,Address_x,Latitude_x,Longitude_x,Id_y,Category_y,Distance_y,Address_y,Latitude_y,Longitude_y,Rating,Review Count
0,4e223f06d4c0d32590f80ff4,La Taqueria Pinche Taco Shop,"[{'id': 13306, 'name': 'Taco Restaurant', 'sho...",164,"{'address': '2450 Yukon St', 'address_extended...",49.263601,-123.112665,6iOAgzJ0DRZNSKA3FSrrOg,,,2450 Yukon Street,49.263559,-123.112736,4.0,685
1,4e223f06d4c0d32590f80ff4,La Taqueria Pinche Taco Shop,"[{'id': 13306, 'name': 'Taco Restaurant', 'sho...",164,"{'address': '2450 Yukon St', 'address_extended...",49.263601,-123.112665,6iOAgzJ0DRZNSKA3FSrrOg,,,2450 Yukon Street,49.263559,-123.112736,4.0,685
2,4e223f06d4c0d32590f80ff4,La Taqueria Pinche Taco Shop,"[{'id': 13306, 'name': 'Taco Restaurant', 'sho...",164,"{'address': '2450 Yukon St', 'address_extended...",49.263601,-123.112665,6iOAgzJ0DRZNSKA3FSrrOg,,,2450 Yukon Street,49.263559,-123.112736,4.0,685
3,4e223f06d4c0d32590f80ff4,La Taqueria Pinche Taco Shop,"[{'id': 13306, 'name': 'Taco Restaurant', 'sho...",164,"{'address': '2450 Yukon St', 'address_extended...",49.263601,-123.112665,6iOAgzJ0DRZNSKA3FSrrOg,,,2450 Yukon Street,49.263559,-123.112736,4.0,685
4,4e223f06d4c0d32590f80ff4,La Taqueria Pinche Taco Shop,"[{'id': 13306, 'name': 'Taco Restaurant', 'sho...",164,"{'address': '2450 Yukon St', 'address_extended...",49.263601,-123.112665,6iOAgzJ0DRZNSKA3FSrrOg,,,2450 Yukon Street,49.263559,-123.112736,4.0,685
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130620,5264889711d205ec029d2e6a,Takumi Japanese Restaurant,"[{'id': 13263, 'name': 'Japanese Restaurant', ...",233,"{'address': '4422W 10th Ave W', 'country': 'CA...",49.263617,-123.206889,BAqcMoKH2oRCo0rC8UdyuQ,,,4422 W 10th Avenue,49.263793,-123.206766,3.5,31
130621,5264889711d205ec029d2e6a,Takumi Japanese Restaurant,"[{'id': 13263, 'name': 'Japanese Restaurant', ...",233,"{'address': '4422W 10th Ave W', 'country': 'CA...",49.263617,-123.206889,BAqcMoKH2oRCo0rC8UdyuQ,,,4422 W 10th Avenue,49.263793,-123.206766,3.5,31
130622,5264889711d205ec029d2e6a,Takumi Japanese Restaurant,"[{'id': 13263, 'name': 'Japanese Restaurant', ...",233,"{'address': '4422W 10th Ave W', 'country': 'CA...",49.263617,-123.206889,BAqcMoKH2oRCo0rC8UdyuQ,,,4422 W 10th Avenue,49.263793,-123.206766,3.5,31
130623,5264889711d205ec029d2e6a,Takumi Japanese Restaurant,"[{'id': 13263, 'name': 'Japanese Restaurant', ...",233,"{'address': '4422W 10th Ave W', 'country': 'CA...",49.263617,-123.206889,BAqcMoKH2oRCo0rC8UdyuQ,,,4422 W 10th Avenue,49.263793,-123.206766,3.5,31


In [32]:
# Finally, let's save our merged data frame in a .csv file:

df_fsq_and_yelp_merged_by_name.to_csv("df_fsq_and_yelp_merged_by_name.csv", index = True)
# print(df_fsq_and_yelp_merged_by_name)

In [33]:
# Also, let's try to do a merge by grouping our pois information from fsq and yelp:

df_fsq_and_yelp_parsed_and_grouped_by_name = pd.merge(new_fsq_df_parsed_and_grouped_by_name, new_yelp_df_parsed_and_grouped_by_name, on = "Name", how = "outer")

fqs_and_yelp_merged_by_concat = pd.concat([fsq_dataframe, yelp_dataframe], ignore_index=True, sort=False)
display(df_fsq_and_yelp_parsed_and_grouped_by_name)

Unnamed: 0,Name,Distance,Latitude_x,Longitude_x,Latitude_y,Longitude_y,Rating,Review Count
0,33 Acres Brewing Co,674.0,49.263877,-123.105307,,,,
1,49th Parallel Coffee Roast,877.0,49.259164,-123.100851,,,,
2,A Bread Affair,735.0,49.272480,-123.135889,,,,
3,Angus T,716.0,49.275871,-123.119875,,,,
4,Anh and Chi,750.0,49.255019,-123.100904,49.255031,-123.100882,4.0,653.0
...,...,...,...,...,...,...,...,...
410,Yuji's From Japan,,,,49.268297,-123.186045,4.0,53.0
411,Zakkushi Dining On Main,,,,49.249101,-123.101546,4.0,222.0
412,Zakkushi on Denman,,,,49.291080,-123.136430,4.0,258.0
413,Zarak By Afghan Kitchen,,,,49.266224,-123.100609,4.5,86.0


In [34]:
df_fsq_and_yelp_parsed_and_grouped_by_name.to_csv("df_fsq_and_yelp_parsed_and_grouped_by_name.csv", index = False)

 Comparing results between Foursquare and Yelp

Which API provided you with more complete data? Provide an explanation. 

To determine which API provided more complete data, we need to consider the nature of the data each API offers and the context of their use cases. We analyzed data from three different sources: Yelp, Foursquare (FSQ), and CityBikes. Each serves a distinct purpose:

Yelp API: Offers data about business locations, including user-generated content such as ratings and review counts. It provides a comprehensive view of each location from both a geographical and a consumer perspective.

Foursquare (FSQ) API: Primarily focuses on geographic locations (latitude, longitude) and distance. While it provides complete geographical data, it lacks the user-generated content like ratings and review counts that Yelp offers.

CityBikes API: Provides real-time operational data of bike-sharing stations, including geographical location, bike availability, operational status, and temporal data. This API is tailored to urban mobility and bike-sharing services, presenting a different type of completeness compared to Yelp and FSQ.

Comparison of Completeness
Yelp vs. Foursquare: Yelp's data can be seen as more complete in the context of business listings because it includes both geographical information and user-generated content (ratings and reviews). This dual-layer of data makes Yelp particularly valuable for understanding user perceptions and popularity of locations. On the other hand, FSQ, while comprehensive in its geographical data, lacks this user-generated dimension.

CityBikes API: The completeness of the CityBikes API data is context-specific. It is comprehensive for its domain, providing detailed information about bike station locations, availability, and status. However, it serves a completely different purpose compared to Yelp and FSQ.

Conclusion
Which API is More Complete? The answer depends on the specific use case. For business listings and understanding consumer opinions, Yelp offers more complete data due to its inclusion of both geographical data and user-generated content. For geographical information alone, FSQ is equally comprehensive. For urban mobility and bike-sharing service data, CityBikes is the most complete, but it caters to a different need.
Therefore, the "completeness" of data is relative and must be assessed in the context of the specific application or analysis goals.

Get the top 10 restaurants according to their rating

In [35]:
# Below is a list of the top 10 rated POIs privded by the get request of the Yelp API:

yelp_dataframe_sorted_by_the_top_10 = new_yelp_df_parsed_and_grouped_by_name.sort_values(by = "Rating", ascending = False).head(10)

display(yelp_dataframe_sorted_by_the_top_10)

Unnamed: 0,Name,Latitude,Longitude,Rating,Review Count
8700,Stanley Park,49.297884,-123.130809,5.0,1094
2316,Viet Family- The Vegan House,49.271826,-123.06975,5.0,22
7879,Urban Tadka,49.27827,-123.07566,5.0,9
69,Arike Restaurant,49.286805,-123.140856,5.0,26
9749,Quizine Kitchen,49.262535,-123.080673,5.0,6
6639,NUTTEA,49.267933,-123.14931,5.0,33
9497,Trimble Park,49.266208,-123.205293,5.0,1
7111,Jam Cafe on Beatty,49.280259,-123.109638,4.5,1403
3622,Jam Cafe,49.26827,-123.153983,4.5,238
7137,Jackson's Meat & Deli,49.26806,-123.15577,4.5,64
