In [63]:
# imports
from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
from geopy.distance import great_circle
from geopy.point import Point
import numpy as np
import pandas as pd
import requests
from dotenv import load_dotenv
import os

load_dotenv()

True

In [64]:
# Extract csv data for get the long and lat
df = pd.read_csv("../data/bike_stations.csv")

# Extract coordinates
coords = df[['latitude', 'longitude']].to_numpy()

# Convert meters to radians (for DBSCAN with haversine distance)
kms_per_radian = 6371.0088
epsilon = 0.5 / kms_per_radian  # 0.5 km = 500 meters

# Apply DBSCAN clustering
#  Apply clustering (e.g., 10 clusters). Applying cluster to decrease the number of API call. 
# Clustering groups the station e.g. for 100 station, instead of 100 API call it makes 10 api call if we do 10 cluster for 100 station

db = DBSCAN(eps=epsilon, min_samples=1, algorithm='ball_tree', metric='haversine')
df['cluster'] = db.fit_predict(np.radians(coords))

# Mean coordinates per cluster
cluster_centroids = df.groupby('cluster')[['latitude', 'longitude']].mean().reset_index()
print(cluster_centroids.head())

   cluster   latitude  longitude
0        0  43.684513 -79.373033
1        1  43.665939 -79.311547
2        2  43.691500 -79.294351
3        3  43.655026 -79.393131
4        4  43.762444 -79.500654


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [None]:
# Create a function for the Foursquare API call and get the response
def get_foursquare_places(lat, lon, radius=500, category="13065"):  # Food & Drink category
    api_key = os.getenv('FOURSQUARE_API_KEY')
    url = "https://places-api.foursquare.com/places/search"
    # url = "https://api.foursquare.com/v3/places/search"
    headers = {
    "Accept": "application/json",
    "Authorization": f"Bearer {api_key}",
    "X-Places-API-Version": '2025-06-17'  
}
    params = {
        "ll": f"{lat},{lon}",
        "radius": radius,
        "categories": category,
        "limit": 100
    }
    
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code == 200:
        return response.json().get("results", [])
    else:
        print(f"Foursquare API error at ({lat},{lon}): {response.status_code}")
        return []

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
fsq_pois = []

kmeans = KMeans(n_clusters=10, random_state=0)
df['cluster'] = kmeans.fit_predict(coords)

# Get centroids
centroids = kmeans.cluster_centers_
cluster_centroids = pd.DataFrame(centroids, columns=['latitude', 'longitude'])
cluster_centroids['cluster'] = cluster_centroids.index

#  get the data and store the result in fsq_pois list
for _, row in cluster_centroids.iterrows():
    lat, lon = row['latitude'], row['longitude']
    results = get_foursquare_places(lat, lon)
    
    for r in results:
        fsq_pois.append({
            "cluster": row['cluster'],
            "name": r.get("name"),
            "category": r["categories"][0]["name"] if r.get("categories") else None,
            "lat": r["latitude"],
            "lon": r["longitude"]
        })

Put your parsed results into a DataFrame

In [156]:
df_fsq = pd.DataFrame(fsq_pois)
# Save Foursquare data to csv for another task
df_fsq.to_csv('../data/foursquare_data.csv', index=False)
df_fsq.head(10)

Unnamed: 0,cluster,name,category,lat,lon
0,0.0,St. Michael's Cathedral,Church,43.655007,-79.377061
1,0.0,Hokkaido Ramen Santouka らーめん山頭火,Ramen Restaurant,43.656435,-79.377586
2,0.0,Kyoto Katsugyu,Japanese Restaurant,43.65689,-79.376245
3,0.0,Mackenzie House,Monument,43.655678,-79.37825
4,0.0,Page One Cafe,Café,43.657243,-79.376021
5,0.0,Burrito Boyz,Burrito Restaurant,43.656331,-79.378541
6,0.0,SukoThai,Asian Restaurant,43.655528,-79.374907
7,0.0,Ali Basha Cafe,Hookah Bar,43.656728,-79.375378
8,0.0,Jazz Bistro,Music Venue,43.655678,-79.379276
9,0.0,The Senator Restaurant,Diner,43.655641,-79.379199


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [142]:
def get_yelp_data(lat, lon, radius=500):
    import requests
    api_key = os.getenv('YELP_API_KEY')
    YELP_API_KEY = "your_yelp_api_key"
    url = "https://api.yelp.com/v3/businesses/search"
    headers = {"Authorization": f"Bearer {api_key}"}
    params = {
        "latitude": lat,
        "longitude": lon,
        "radius": radius,
        "term": "restaurant",
        "limit": 50
    }

    response = requests.get(url, headers=headers, params=params)
    
    return response.json()

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
poi_data = []

#  Apply clustering (e.g., 10 clusters). Applying cluster to decrease the number of API call. 
# Clustering groups the station e.g. for 100 station, instead of 100 API call it makes 10 api call if we do 10 cluster for 100 station
kmeans = KMeans(n_clusters=10, random_state=0)
df['cluster'] = kmeans.fit_predict(coords)

# Get centroids
centroids = kmeans.cluster_centers_
cluster_centers = pd.DataFrame(centroids, columns=['latitude', 'longitude'])
cluster_centers['cluster'] = cluster_centers.index

for _, row in cluster_centers.iterrows():
    lat, lon = row['latitude'], row['longitude']
    result = get_yelp_data(lat, lon)
    
    for biz in result.get("businesses", []):
        
        poi_data.append({
            "cluster": row['cluster'],
            "name": biz['name'],
            "category": biz['categories'][0]['title'] if biz.get("categories") else None,
            "rating": biz.get("rating"),
            "review_count": biz.get("review_count"),
            "latitude": biz['coordinates']['latitude'],
            "longitude": biz['coordinates']['longitude'],
            "address": biz['location']['display_address']
        })

Put your parsed results into a DataFrame

In [157]:
df_ylp = pd.DataFrame(poi_data)
# df_ylp.shape[0]
# Save Yelp data to cst for another task
df_ylp.to_csv('../data/yelp_data.csv', index=False)
df_ylp.head()

Unnamed: 0,cluster,name,category,rating,review_count,latitude,longitude,address
0,0.0,Haidilao Hot Pot,Chinese,4.7,89,43.654633,-79.379839,"[237 Yonge Street, Toronto, ON M5B 1N8, Canada]"
1,0.0,The Rabbit Hole,Gastropubs,4.2,57,43.64995,-79.37976,"[21 Adelaide Street W, Toronto, ON M5H 1L6, Ca..."
2,0.0,Gyukatsu Kyoto Katsugyu,Japanese,4.0,186,43.65687,-79.37616,"[134 Dundas Street E, Toronto, ON M5B 1E2, Can..."
3,0.0,The Senator,Breakfast & Brunch,3.9,524,43.65574,-79.37896,"[249 Victoria Street, Toronto, ON M5B 1T8, Can..."
4,0.0,Adobar,Filipino,5.0,5,43.657322,-79.373885,"[200 Dundas Street E, Toronto, ON M5A 1Z4, Can..."


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 


Both Four square and Yelp provide the category of the name pf places which is good. But when we compare all the data, Yelp provides more complete data as it includes 
1. review counts, user ratings of customers
2. distance of the location( coverage)
3. Detail information about the place like address and business hours 

Get the top 10 restaurants according to their rating

In [154]:
# As the YELP has detail data we are choosing YELP
# restaurants_df = df_ylp[df_ylp['category'].str.contains('Restaurant', case=False, na=False)]
# restaurants_df.shape[0]
top10_restaurants = df_ylp.sort_values(by='rating', ascending=False).head(10)
top10_restaurants.head(10)


Unnamed: 0,cluster,name,category,rating,review_count,latitude,longitude,address
66,1.0,Burger Land,Burgers,5.0,1,43.676041,-79.450698,"[1351 St Clair Avenue W, Toronto, ON M6E 1C5, ..."
33,0.0,Ikkousha,Ramen,5.0,10,43.658826,-79.382227,"[374A Yonge Street, Toronto, ON M5B 1S6, Canada]"
24,0.0,Bites,Indian,5.0,1,43.657306,-79.380806,"[335 Yonge Street, Unit 2, Toronto, ON M5B 2L3..."
99,1.0,BSTO.,Comfort Food,5.0,1,43.676949,-79.4485,"[1310 Saint Clair Avenue W, Toronto, ON M6E 1C..."
27,0.0,Tagpuan,Filipino,5.0,2,43.657381,-79.380785,"[335 Yonge Street, World Food Market, Toronto,..."
221,7.0,Meza Resto Bar,Modern European,5.0,1,43.627498,-79.498287,"[664 The Queensway, Etobicoke, ON M8Y 1K3, Can..."
29,0.0,Makilala,Filipino,5.0,1,43.6524,-79.375198,"[105 Church Street, Toronto, ON M5C 2G3, Canada]"
223,7.0,Mojo Cafe,Cafes,5.0,1,43.626366,-79.501319,"[715 The Queensway, Etobicoke, ON M8Y 1L2, Can..."
31,0.0,Box And Brew Cafe,Cafes,5.0,1,43.657817,-79.381287,"[351 Yonge Street, Toronto, ON M5B 1S1, Canada]"
227,7.0,Family Meat & Deli,Delis,5.0,1,43.62693,-79.49907,"[675 The Queensway, Etobicoke, ON M8Y 1K8, Can..."
