In [1]:
# imports

import requests
import pandas as pd
import os

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [2]:
df2 = pd.read_csv("../data/montreal_city_bikes.csv")

FOURSQUARE_API_KEY = os.environ['FOURSQUARE_API_KEY']

results = []

for index, row in df2.iterrows():
    latitude = row['Latitude']
    longitude = row['Longitude']

    url = 'https://api.foursquare.com/v3/places/search'
    params = {
        # 'location': ‘Montreal, Canada’,
        'll': f'{latitude},{longitude}',
        'categoryId': '13000',    # for restaurants and bars
        'radius': 1000,
        'fields': 'name,location,rating,categories,distance,description,popularity,price',
        'sort': 'RATING',
        'limit': 50
    }
    headers = {
        'Accept': 'application/json',
        'Authorization': FOURSQUARE_API_KEY
    }

    response = requests.get(url, params=params, headers=headers)
    if response.status_code != 200:
        print("Request failed. Status code:", response.status_code)
    else:
        data = response.json()

    res = data.get('results')
    frame = pd.DataFrame(res)
    results.append({
        'Latitude': latitude,
        'Longitude': longitude,
        'Foursquare Name': frame['name'],
        'Distance': frame['distance'],
        'Popularity': frame['popularity'],
        'Rating': frame['rating'],
        'Price': frame['price']
        })

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [3]:
# Parsing was done within the for loop for each station in cell above and the transformation and cleaning were done after.

Put your parsed results into a DataFrame

In [4]:
df_foursquare = pd.DataFrame(results)

df_foursquare

Unnamed: 0,Latitude,Longitude,Foursquare Name,Distance,Popularity,Rating,Price
0,45.516926,-73.564257,0 La Maison ...,0 749 1 741 2 675 3 851 4 ...,0 0.302754 1 0.981131 2 0.971504 3...,0 9.5 1 9.5 2 9.4 3 9.4 4 ...,0 1.0 1 NaN 2 3.0 3 NaN 4 ...
1,45.553219,-73.539782,0 La Boutiq...,0 712 1 162 2 926 3 5...,0 0.981981 1 0.954891 2 0.979212 3...,0 9.0 1 8.8 2 8.7 3 8.6 4 ...,0 2.0 1 3.0 2 2.0 3 NaN 4 ...
2,45.526890,-73.572640,0 Parc la Fontaine 1...,0 264 1 942 2 522 3 958 4 ...,0 0.999622 1 0.968422 2 0.957389 3...,0 9.4 1 9.3 2 9.2 3 9.1 4 ...,0 NaN 1 2.0 2 NaN 3 1.0 4 ...
3,45.553400,-73.662255,0 132 Ba...,0 810 1 511 2 784 3 837 4 ...,0 0.935309 1 0.955469 2 0.950923 3...,0 8.3 1 8.3 2 8.0 3 8.0 4 ...,0 3.0 1 NaN 2 1.0 3 NaN 4 ...
4,45.559842,-73.615447,0 Marché Aux Puces St-M...,0 859 1 757 2 749 3 812 4 ...,0 0.993078 1 0.982709 2 0.997652 3...,0 8.1 1 7.9 2 7.9 3 7.9 4 ...,0 NaN 1 NaN 2 1.0 3 1.0 4 ...
...,...,...,...,...,...,...,...
155,45.489525,-73.584458,0 Thali Cuisine Indienne 1 ...,0 497 1 935 2 890 3 460 4 ...,0 0.709264 1 0.973524 2 0.986448 3...,0 9.4 1 9.1 2 8.9 3 8.9 4 ...,0 2.0 1 NaN 2 2.0 3 NaN 4 ...
156,45.507885,-73.563151,0 La Mai...,0 364 1 286 2 339 3 396 4 ...,0 0.302754 1 0.981131 2 0.997574 3...,0 9.5 1 9.5 2 9.4 3 9.4 4 ...,0 1.0 1 NaN 2 NaN 3 3.0 4 ...
157,45.496496,-73.578704,0 Thali Cuisine...,0 404 1 229 2 396 3 805 4 ...,0 0.709264 1 0.993292 2 0.961827 3...,0 9.4 1 9.3 2 9.2 3 9.1 4 ...,0 2.0 1 NaN 2 4.0 3 NaN 4 ...
158,45.529337,-73.577953,0 Parc Sir-Wilfrid-L...,0 766 1 752 2 807 3 162 4 ...,0 0.995019 1 0.999622 2 0.968422 3...,0 9.5 1 9.4 2 9.3 3 9.2 4 ...,0 NaN 1 NaN 2 2.0 3 NaN 4 ...


In [5]:
# Specify the file path for the CSV
csv_file_path = "../data/foursquare_nested_places.csv"

# Save the DataFrame to CSV
df_foursquare.to_csv(csv_file_path, index=False)

In [6]:
# Transforming the data (collecting the averages for the distance, popularity, and ratings on Foursquare)

df_foursquare['Average_dist_F'] = df_foursquare['Distance'].apply(lambda x: sum(x) / len(x))
df_foursquare['Average_popularity_F'] = df_foursquare['Popularity'].apply(lambda x: sum(x) / len(x))
df_foursquare['Average_rating_F'] = df_foursquare['Rating'].apply(lambda x: sum(x) / len(x))

df_foursquare['Max_rating_F'] = df_foursquare['Rating'].apply(lambda x: max(x))
df_foursquare['Max_popularity_F'] = df_foursquare['Popularity'].apply(lambda x: max(x))

df_foursquare['Min_rating_F'] = df_foursquare['Rating'].apply(lambda x: min(x))
df_foursquare['Min_popularity_F'] = df_foursquare['Popularity'].apply(lambda x: min(x))

In [7]:
# Cleaning

df_foursquare.drop(columns=['Foursquare Name', 'Distance', 'Popularity', 'Rating', 'Price'], inplace=True)
df_foursquare

Unnamed: 0,Latitude,Longitude,Average_dist_F,Average_popularity_F,Average_rating_F,Max_rating_F,Max_popularity_F,Min_rating_F,Min_popularity_F
0,45.516926,-73.564257,689.060000,0.938463,8.606,9.5,0.997731,8.0,0.302754
1,45.553219,-73.539782,893.346939,0.804348,,9.0,0.992079,5.6,0.073790
2,45.526890,-73.572640,720.760000,0.941647,8.460,9.4,0.999622,7.3,0.475590
3,45.553400,-73.662255,696.040000,0.772542,,8.3,0.994869,6.1,0.009805
4,45.559842,-73.615447,883.535714,0.757223,,8.1,0.999065,6.2,0.042997
...,...,...,...,...,...,...,...,...,...
155,45.489525,-73.584458,649.179487,0.816851,,9.4,0.997845,6.4,0.059275
156,45.507885,-73.563151,662.480000,0.949826,8.950,9.5,0.999122,8.6,0.302754
157,45.496496,-73.578704,515.320000,0.976412,8.604,9.4,0.999565,8.1,0.709264
158,45.529337,-73.577953,770.120000,0.833648,,9.5,0.999622,6.3,0.002926


In [8]:
# Specify the file path for the CSV
csv_file_path = "../data/foursquare_features.csv"

# Save the DataFrame to CSV
df_foursquare.to_csv(csv_file_path, index=False)

In [9]:
duplicate_rows = df_foursquare[df_foursquare.duplicated()]

print("Duplicate Rows:")
duplicate_rows

Duplicate Rows:


Unnamed: 0,Latitude,Longitude,Average_dist_F,Average_popularity_F,Average_rating_F,Max_rating_F,Max_popularity_F,Min_rating_F,Min_popularity_F


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [11]:
YELP_API_KEY = os.environ['YELP_API_KEY']

results = []

for index, row in df2.iterrows():      
    latitude = row['Latitude']
    longitude = row['Longitude']

    url = 'https://api.yelp.com/v3/businesses/search'
    params = {
        # 'location': 'Montreal, Canada',
        'latitude': latitude,
        'longitude': longitude,
        'radius': 1000,
        'categories': 'restaurants',
        'sort_by': 'rating',
        'limit': 50
    }
    headers = {
        'Authorization': f'Bearer {YELP_API_KEY}'
    }

    response = requests.get(url, params=params, headers=headers)
    if response.status_code != 200:
        print("Request failed. Status code:", response.status_code)
        break
    else:
        data = response.json()

    yelp_results = data.get('businesses', [])
    frame = pd.DataFrame(yelp_results)
    results.append({
    'Latitude': latitude,
    'Longitude': longitude,
    'Yelp Name': frame['name'],
    'Distance': frame['distance'],
    'Review count': frame['review_count'],
    'Rating': frame['rating']
    })

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
# Parsing was done within the for loop for each station in cell above and the transformation and cleaning were done after.

Put your parsed results into a DataFrame

In [12]:
df_yelp = pd.DataFrame(results)

df_yelp

Unnamed: 0,Latitude,Longitude,Yelp Name,Distance,Review count,Rating
0,45.516926,-73.564257,0 Nouilles de Yunnan Chinatown 1 ...,0 1059.456813 1 1113.937266 2 1111...,0 7 1 6 2 6 3 15 4 ...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...
1,45.553219,-73.539782,0 Hélicoptère 1 ...,0 143.831331 1 965.732694 2 134...,0 52 1 7 2 11 3 4 4 ...,0 4.5 1 4.5 2 4.5 3 5.0 4 ...
2,45.526890,-73.572640,0 Le Pontia...,0 741.347795 1 928.141750 2 738...,0 5 1 17 2 14 3 9 4 7 5...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...
3,45.553400,-73.662255,0 Brama Usine à Bouffe 1 ...,0 944.263471 1 964.446189 2 691...,0 5 1 6 2 6 3 10 4 29 5...,0 4.5 1 4.5 2 4.5 3 4.0 4 ...
4,45.559842,-73.615447,0 Café Zezin 1 ...,0 1246.980153 1 856.307876 2 818...,0 9 1 7 2 4 3 3 4 2 5...,0 5.0 1 4.5 2 5.0 3 5.0 4 ...
...,...,...,...,...,...,...
155,45.489525,-73.584458,0 Gentile Pizza Parlour 1 ...,0 350.536376 1 622.891944 2 1030...,0 6 1 6 2 7 3 20 4 ...,0 5.0 1 5.0 2 4.5 3 4.5 4 ...
156,45.507885,-73.563151,0 Nouilles de Yunnan Chinatow...,0 298.253370 1 962.451351 2 405...,0 7 1 6 2 9 3 6 4 ...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...
157,45.496496,-73.578704,0 Caffettier...,0 514.344697 1 272.674215 2 279...,0 10 1 6 2 20 3 20 4 ...,0 5.0 1 5.0 2 4.5 3 4.5 4 ...
158,45.529337,-73.577953,0 Le Pontia...,0 537.290653 1 795.551561 2 948...,0 5 1 17 2 14 3 7 4 ...,0 5.0 1 5.0 2 5.0 3 5.0 4 ...


In [13]:
# Specify the file path for the CSV
csv_file_path = "../data/yelp_nested_places.csv"

# Save the DataFrame to CSV
df_yelp.to_csv(csv_file_path, index=False)

In [14]:
# Transforming the data (collecting the averages for the distance, review count, and ratings on Yelp)

df_yelp['Average_distance_Y'] = df_yelp['Distance'].apply(lambda x: sum(x) / len(x))
df_yelp['Average_review_count_Y'] = df_yelp['Review count'].apply(lambda x: sum(x) / len(x))
df_yelp['Average_rating_Y'] = df_yelp['Rating'].apply(lambda x: sum(x) / len(x))

df_yelp['Max_rating_Y'] = df_yelp['Rating'].apply(lambda x: max(x))
df_yelp['Max_review_count'] = df_yelp['Review count'].apply(lambda x: max(x))

df_yelp['Min_rating_Y'] = df_yelp['Rating'].apply(lambda x: min(x))
df_yelp['Min_review_count'] = df_yelp['Review count'].apply(lambda x: min(x))

In [16]:
# Cleaning

df_yelp.drop(columns=['Yelp Name', 'Distance', 'Review count', 'Rating'], inplace=True)
df_yelp

Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y,Max_rating_Y,Max_review_count,Min_rating_Y,Min_review_count
0,45.516926,-73.564257,768.125648,50.320000,4.670000,5.0,503,4.5,3
1,45.553219,-73.539782,604.619033,17.660000,4.240000,5.0,159,3.5,1
2,45.526890,-73.572640,809.326029,19.280000,4.610000,5.0,97,4.5,3
3,45.553400,-73.662255,716.837299,5.708333,3.468750,4.5,29,2.0,1
4,45.559842,-73.615447,807.769747,7.000000,3.153846,5.0,28,1.0,1
...,...,...,...,...,...,...,...,...,...
155,45.489525,-73.584458,773.859825,42.980000,4.560000,5.0,720,4.5,3
156,45.507885,-73.563151,740.716920,42.540000,4.650000,5.0,503,4.5,3
157,45.496496,-73.578704,542.955093,19.880000,4.580000,5.0,126,4.5,3
158,45.529337,-73.577953,658.622691,60.200000,4.590000,5.0,1270,4.5,3


In [17]:
# Specify the file path for the CSV
csv_file_path = "../data/yelp_features.csv"

# Save the DataFrame to CSV
df_yelp.to_csv(csv_file_path, index=False)

In [18]:
duplicate_rows = df_yelp[df_yelp.duplicated()]

print("Duplicate Rows:")
duplicate_rows

Duplicate Rows:


Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y,Max_rating_Y,Max_review_count,Min_rating_Y,Min_review_count


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

In [22]:
df_foursquare.info()

df_yelp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 160 entries, 0 to 159
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Latitude              160 non-null    float64
 1   Longitude             160 non-null    float64
 2   Average_dist_F        160 non-null    float64
 3   Average_popularity_F  160 non-null    float64
 4   Average_rating_F      72 non-null     float64
 5   Max_rating_F          160 non-null    float64
 6   Max_popularity_F      160 non-null    float64
 7   Min_rating_F          160 non-null    float64
 8   Min_popularity_F      160 non-null    float64
dtypes: float64(9)
memory usage: 11.4 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 160 entries, 0 to 159
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Latitude                160 non-null    float64
 1   Longitude               16

In [24]:
df_foursquare.describe()

Unnamed: 0,Latitude,Longitude,Average_dist_F,Average_popularity_F,Average_rating_F,Max_rating_F,Max_popularity_F,Min_rating_F,Min_popularity_F
count,160.0,160.0,160.0,160.0,72.0,160.0,160.0,160.0,160.0
mean,45.521238,-73.579857,3716.972445,0.90782,8.649921,9.2075,0.997699,6.995,0.355111
std,0.024221,0.028689,15553.124076,0.069765,0.222209,0.352805,0.002683,1.058051,0.296504
min,45.467666,-73.667357,446.5,0.594126,7.81,7.9,0.989667,5.2,0.002376
25%,45.501837,-73.589594,643.615,0.867663,8.5785,9.1,0.996753,6.0,0.073032
50%,45.521267,-73.571923,694.28,0.940706,8.711,9.3,0.999122,6.8,0.302754
75%,45.54255,-73.56074,749.023571,0.959067,8.786,9.5,0.999565,8.1,0.545823
max,45.562219,-73.53025,104117.638889,0.981639,8.95,9.5,0.999879,8.6,0.956147


In [25]:
df_yelp.describe()

Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y,Max_rating_Y,Max_review_count,Min_rating_Y,Min_review_count
count,160.0,160.0,160.0,160.0,160.0,160.0,160.0,160.0,160.0
mean,45.521238,-73.579857,767.829756,32.22524,4.394377,4.984375,374.9625,3.7125,2.21875
std,0.024221,0.028689,105.884471,20.284893,0.381715,0.162739,359.005175,1.183681,0.994778
min,45.467666,-73.667357,487.422054,4.0,3.0,3.0,4.0,1.0,1.0
25%,45.501837,-73.589594,702.030081,17.54,4.2975,5.0,97.0,3.5,1.0
50%,45.521267,-73.571923,769.16759,25.58,4.59,5.0,298.0,4.5,3.0
75%,45.54255,-73.56074,846.266652,48.07,4.64,5.0,503.0,4.5,3.0
max,45.562219,-73.53025,1051.097593,98.6,4.72,5.0,2202.0,4.5,5.0


Get the top 10 restaurants according to their rating

In [27]:
# Top 10 restaurants according to their rating.
df_yelp.sort_values('Average_rating_Y', ascending=False).head(10)

Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y,Max_rating_Y,Max_review_count,Min_rating_Y,Min_review_count
82,45.521646,-73.593668,631.701648,21.36,4.72,5.0,134,4.5,3
49,45.542182,-73.622687,723.91031,16.8,4.71,5.0,77,4.5,2
33,45.51941,-73.58685,703.017479,18.08,4.71,5.0,134,4.5,3
13,45.516897,-73.589108,826.302606,21.54,4.7,5.0,134,4.5,3
12,45.533307,-73.620822,706.636098,25.74,4.7,5.0,270,4.5,3
11,45.53519,-73.615482,537.55286,25.12,4.7,5.0,270,4.5,3
63,45.527372,-73.603984,768.820021,29.22,4.7,5.0,432,4.5,4
30,45.509287,-73.571019,785.362117,34.3,4.7,5.0,503,4.5,3
36,45.530351,-73.624392,862.082189,32.0,4.69,5.0,432,4.5,2
35,45.536135,-73.622585,741.701996,24.24,4.69,5.0,270,4.5,3
