In [2]:
import pandas as pd
import requests
import os
import time

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [3]:
# Loading the city bike dataset for Dublin
df_dublin = pd.read_csv('/Users/ruu/documents/LHL/Project-Statistical-Modelling/data/dublin_bikes.csv')
coordinates = df_dublin

# Setting up url, api_key and headers for Foursquare API
url = 'https://places-api.foursquare.com/places/search'
api_key = os.environ['MYTOKEN']
headers = {
            "accept": "application/json",
            "Authorization": f"Bearer {api_key}",
            "X-Places-Api-Version": "2025-06-17"
          }
# Iterating through all coordinates found in df_dublin dataset for the Foursquare API
all_results = []
for idx, row in coordinates.iterrows():
    latlon = f"{row['latitude']},{row['longitude']}"
    params = {
        "ll": latlon,
        "radius": 350,   # Search radius was progressively reduced from 1000m to 350m to avoid consistently maximizing the API's result limit
        "fsq_category_ids": "4bf58dd8d48988d1e0931735",  # Category_id code for coffee shops
         "limit": 50   # Limit set to 50
            }
    response = requests.get(url=url, headers=headers, params=params)
    if response.status_code == 200:
        fs_data = response.json()
        for restaurant in fs_data['results']:
            restaurant['bike_lat'] = row['latitude']
            restaurant['bike_lon'] = row['longitude']
            all_results.append(restaurant)
    else:
        print(f"Error for {latlon}: {response.status_code}")
    time.sleep(0.2)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [4]:
# Retrieving coffee shops details such as name, address, and distance in meters
# Using .get() in case a key doesnt exist and to prevent the code from crashing. 
name = [result.get('name')for result in all_results]
address = [result.get('location', {}).get('address')for result in all_results]
distance = [result.get('distance')for result in all_results] 
bike_lat = [result.get('bike_lat')for result in all_results]
bike_lon = [result.get('bike_lon')for result in all_results]

Put your parsed results into a DataFrame

In [37]:
df_fs = pd.DataFrame({
                        "name": name,
                        "address": address,
                        "distance": distance,
                        "bike_lat": bike_lat,
                        "bike_lon": bike_lon
                    })
df_fs.head()

Unnamed: 0,name,address,distance,bike_lat,bike_lon
0,The Lovinspoon,13 N Frederick Street,331.745696,53.354929,-6.269425
1,La Pausa Cafe,1-B Blessington Street,287.411461,53.354929,-6.269425
2,Candy Café,Gardiner Row,421.882505,53.354929,-6.269425
3,Bleecker Street Cafe Bar,68 Dorset Street,421.257967,53.354929,-6.269425
4,Crispers Coffee Shop,6 Berkeley Road,359.005151,53.354929,-6.269425


In [6]:
# Saving the df_fs dataframe as .csv
df_fs.to_csv('foursquare.csv')

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [27]:
df_dublin = pd.read_csv('/Users/ruu/documents/LHL/Project-Statistical-Modelling/data/dublin_bikes.csv')
coordinates = df_dublin

url = 'https://api.yelp.com/v3/businesses/search'
api_key = os.environ['YELPTOKEN']
headers = { "accept": "application/json",
            "Authorization": f"Bearer {api_key}"
          }

all_businesses = []

for idx, row in coordinates.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']
    params = { "latitude": latitude,
               "longitude": longitude,
               "term": "coffee shops",
               "radius": 350,
               "limit": 50 
               
             }
    response = requests.get(url=url, headers=headers, params=params)
    if response.status_code == 200:
        yelp_data = response.json()
        for business in yelp_data['businesses']:
            business['bike_lat'] = latitude
            business['bike_lon'] = longitude
            all_businesses.append(business)
    else:
        print(f"Error for {latlon}: {response.status_code}")
    time.sleep(0.2)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [28]:
name = [business.get('name')for business in all_businesses]
address = [business.get('location', {}).get('address1')for business in all_businesses]
distance = [business.get('distance')for business in all_businesses]
rating = [business.get('rating')for business in all_businesses]
bike_lat = [business.get('bike_lat')for business in all_businesses]
bike_lon = [business.get('bike_lon')for business in all_businesses]

Put your parsed results into a DataFrame

In [38]:
df_yelp = pd.DataFrame({
                        "name": name,
                        "address": address,
                        "distance": distance,
                        "rating": rating,
                        "bike_lat": bike_lat,
                        "bike_lon": bike_lon
                        })
df_yelp.head()

Unnamed: 0,name,address,distance,rating,bike_lat,bike_lon
0,The Lovinspoon,13 N Frederick Street,331.745696,4.6,53.354929,-6.269425
1,La Pausa Cafe,1-B Blessington Street,287.411461,5.0,53.354929,-6.269425
2,Candy Café,Gardiner Row,421.882505,3.6,53.354929,-6.269425
3,Bleecker Street Cafe Bar,68 Dorset Street,421.257967,4.6,53.354929,-6.269425
4,Crispers Coffee Shop,6 Berkeley Road,359.005151,3.0,53.354929,-6.269425


In [36]:
df_yelp.to_csv('yelp.csv')

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

The Yelp API returned a more detailed dataset compared to the Foursquare API. For each coffee shop, Yelp provided more meaningful attributes such as ratings, hours of operation, and price range — all of which add valuable context for analysis. In addition, using the same search parameters, the Yelp API returned a higher volume of results (4072 rows), indicating broader coverage and a more comprehensive capture of coffee shops within the specified area.

Get the top 10 restaurants according to their rating

In [39]:
top_10_rated = df_yelp.sort_values(by='rating',ascending=False) # There are duplicates because the same coffee shop might be within the 350m radius of every bike station 
top_10_rated_unique = top_10_rated.drop_duplicates(subset=['name', 'address']).head(10)
top_10_rated_unique

Unnamed: 0,name,address,distance,rating,bike_lat,bike_lon
2634,Café Jago's,59 Power's Court,591.055819,5.0,53.333653,-6.248345
3351,F Ronan,24 Eden Quay,111.752485,5.0,53.348279,-6.254662
777,Snax and the City,,571.224028,5.0,53.343105,-6.277167
1935,La Pausa Cafe,1-B Blessington Street,186.102933,5.0,53.357043,-6.263232
3284,Starbucks,Bishops Square,352.109064,5.0,53.337494,-6.26199
3323,BOWLS by Kwanghi Chan,56 Marlborough St,145.261442,5.0,53.352149,-6.260533
3336,O'Briens Irish Sandwich Bar,Unit 1,261.028374,5.0,53.352149,-6.260533
3355,Nicks Coffee Company,7 Tara St,130.688534,5.0,53.348279,-6.254662
807,Gerard’s,81A Sir John Rogerson's Quay,137.139013,5.0,53.347777,-6.244239
1872,Zagros,1b Blessington Street Dublin 1,505.582057,5.0,53.359624,-6.260348
