In [75]:
import pandas as pd
import requests
import os
import time

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [76]:
# Loading the city bike dataset for Dublin
df_dublin = pd.read_csv('/Users/ruu/documents/LHL/Project-Statistical-Modelling/data/dublin.csv')
coordinates = df_dublin

# Setting up url, api_key and headers for Foursquare API
url = 'https://places-api.foursquare.com/places/search'
api_key = os.environ['MYTOKEN']
headers = {
            "accept": "application/json",
            "Authorization": f"Bearer {api_key}",
            "X-Places-Api-Version": "2025-06-17"
          }
# Iterating through all coordinates found in df_dublin dataset in the API from foursquare
all_results = []
for idx, row in coordinates.iterrows():
    latlon = f"{row['latitude']},{row['longitude']}"
    params = {
        "ll": latlon,
        "radius": 350,   # search radius was progressively reduced from 1000m to 350m to avoid consistently reaching the Yelp API's result limit
        "fsq_category_ids": "4bf58dd8d48988d1e0931735",  # category_id code for coffee shops
         "limit": 50   # limit set to 50
            }
    response = requests.get(url=url, headers=headers, params=params)
    if response.status_code == 200:
        fs_data = response.json()
        for restaurant in fs_data['results']:
            restaurant['bike_lat'] = row['latitude']
            restaurant['bike_lon'] = row['longitude']
            all_results.append(restaurant)
    else:
        print(f"Error for {latlon}: {response.status_code}")
    time.sleep(0.2)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [77]:
# Retrieving restaurant details such as name, address, and distance in meters
# Using .get() in case a key doesnt exist and to prevent the code from crashing. 
name = [result.get('name')for result in all_results]
address = [result.get('location', {}).get('address')for result in all_results]
distance = [result.get('distance')for result in all_results] 
bike_lat = [result.get('bike_lat')for result in all_results]
bike_lon = [result.get('bike_lon')for result in all_results]

Put your parsed results into a DataFrame

In [78]:
df_fs = pd.DataFrame({
                        "name": name,
                        "address": address,
                        "distance": distance,
                        "bike_lat": bike_lat,
                        "bike_lon": bike_lon
                    })
df_fs

Unnamed: 0,name,address,distance,bike_lat,bike_lon
0,La Pausa Caffe,2b Blessington St,300,53.354929,-6.269425
1,Bookafé,101 Dorset Street Upper Perto da Parnell St,240,53.354929,-6.269425
2,Jay's Café Broadstone,"No 42 Dominick Street Upper, Broadstone, Dubli...",183,53.354929,-6.269425
3,Delisuz,58A Blessington St,213,53.354929,-6.269425
4,Deli Suz,Blessington St.,266,53.354929,-6.269425
...,...,...,...,...,...
1418,Igor's Coffee Bar,,229,53.344115,-6.237153
1419,Grindstone,"Bolands Quay, Barrow St, Dublin 4",258,53.344115,-6.237153
1420,Groundstate Coffee,,329,53.344115,-6.237153
1421,Goya Lounge,172 North Strand Road,107,53.354845,-6.247579


In [79]:
# Saving the df_fs dataframe as .csv
df_fs.to_csv('foursquare.csv')

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [80]:
df_dublin = pd.read_csv('/Users/ruu/documents/LHL/Project-Statistical-Modelling/data/dublin.csv')
coordinates = df_dublin

url = 'https://api.yelp.com/v3/businesses/search'
api_key = os.environ['YELPTOKEN']
headers = { "accept": "application/json",
            "Authorization": f"Bearer {api_key}"
          }

all_businesses = []

for idx, row in coordinates.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']
    params = { "latitude": latitude,
               "longitude": longitude,
               "term": "coffee shops",
               "radius": 350,
               "limit": 50 
               
             }
    response = requests.get(url=url, headers=headers, params=params)
    if response.status_code == 200:
        yelp_data = response.json()
        for business in yelp_data['businesses']:
            business['bike_lat'] = latitude
            business['bike_lon'] = longitude
            all_businesses.append(business)
    else:
        print(f"Error for {latlon}: {response.status_code}")
    time.sleep(0.2)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [81]:
name = [business.get('name')for business in all_businesses]
address = [business.get('location', {}).get('address1')for business in all_businesses]
distance = [business.get('distance')for business in all_businesses]
rating = [business.get('rating')for business in all_businesses]
bike_lat = [business.get('bike_lat')for business in all_businesses]
bike_lon = [business.get('bike_lon')for business in all_businesses]

Put your parsed results into a DataFrame

In [82]:
df_yelp = pd.DataFrame({
                        "name": name,
                        "address": address,
                        "distance": distance,
                        "rating": rating,
                        "bike_lat": bike_lat,
                        "bike_lon": bike_lon
                        })
df_yelp

Unnamed: 0,name,address,distance,rating,bike_lat,bike_lon
0,The Lovinspoon,13 N Frederick Street,331.745696,4.6,53.354929,-6.269425
1,La Pausa Cafe,1-B Blessington Street,287.411461,5.0,53.354929,-6.269425
2,Candy Café,Gardiner Row,421.882505,3.6,53.354929,-6.269425
3,Blas Café,26 King's Inns Street,415.354471,4.3,53.354929,-6.269425
4,Crispers Coffee Shop,6 Berkeley Road,359.005151,3.0,53.354929,-6.269425
...,...,...,...,...,...,...
4067,Double Expresso,38 Amiens St,314.410366,4.0,53.354845,-6.247579
4068,Subway,14 Amiens Street,476.547523,4.0,53.354845,-6.247579
4069,Quirke,40 Amiens Street,303.083239,0.0,53.354845,-6.247579
4070,Steady Eddies,Connolly Station,421.097047,0.0,53.354845,-6.247579


In [83]:
df_yelp.to_csv('yelp.csv')

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

df_yelp provided more complete data as it captures the following:
1. has more meaningful attributes (for example, it has ratings, hours of operation and price range of the coffee shops).
2. returned more results (=4072 rows) for the same search parameters.

Get the top 10 restaurants according to their rating

In [86]:
top_10_rated = df_yelp.sort_values(by='rating',ascending=False).head(10)
top_10_rated

Unnamed: 0,name,address,distance,rating,bike_lat,bike_lon
493,Munchies IFSC,"Unit , IFSC",398.270327,5.0,53.346637,-6.246154
213,Bácús Bakery,Green Street,531.788384,5.0,53.347692,-6.278214
2961,Riggers d8,145 Emmet Rd,518.594641,5.0,53.342113,-6.310015
1611,Fox & Feather,27A Parkgate Street,111.208844,5.0,53.347972,-6.291804
2964,La Pausa Cafe,1-B Blessington Street,423.559553,5.0,53.359967,-6.264828
2965,Thunders Home Bakery,2 Lower Drumcondra Rd,423.831775,5.0,53.359967,-6.264828
1561,Project Black,3 Ranelagh,570.538865,5.0,53.330662,-6.260177
2977,Zagros,1b Blessington Street Dublin 1,414.977433,5.0,53.359967,-6.264828
211,Bounceback Cafe,80B Thomas Street,515.21131,5.0,53.347692,-6.278214
481,As One,"AS ONE 13- 18 CITY QUAY, DUBLIN 2, DUBLIN 2",303.20551,5.0,53.346637,-6.246154
