In [28]:
# imports
import requests
import os
import pandas as pd
from pprint import pprint # this will display the data in a structured, more readable manner

In [46]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Access the key
api_key = os.getenv("MY_API_KEY")
YELP_API_KEY = os.getenv("YELP_API_KEY")

# Example usage
print(f"Using API key: {api_key[:4]}****")  # Just to verify it's working
print(f"Using Yelp API key: {YELP_API_KEY[:4]}****")

Using API key: fsq3****
Using Yelp API key: Xb5n****


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [57]:
df = pd.read_csv('../data/bike_stations.csv')

[{'distance': 27,
  'name': "Rosedale's Finest",
  'num_attributes': 0,
  'num_categories': 1,
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304)},
 {'distance': 38,
  'name': 'Dolce Bakery',
  'num_attributes': 0,
  'num_categories': 3,
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304)},
 {'distance': 876,
  'name': "Kirkwood's Rib Joint",
  'num_attributes': 0,
  'num_categories': 1,
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304)},
 {'distance': 897,
  'name': 'Picnic Cafe & Wine Shop',
  'num_attributes': 0,
  'num_categories': 1,
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304)},
 {'distance': 381,
  'name': 'Holly Berry Fair',
  'num_attributes': 0,
  'num_categories': 3,
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304)},
 {'distance': 248,
  'name': 'Delina Restaurant',
  'num_attributes': 0,
 

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [76]:
sample_df = df[['latitude', 'longitude']].head(50)
headers = {"Accept": "application/json", "Authorization": api_key}

# Store results here
poi_list = []

# Loop over each lat/lon
for idx, row in sample_df.iterrows():
    lat = row['latitude']
    lon = row['longitude']
    
    url = f"https://api.foursquare.com/v3/places/search?ll={lat},{lon}&radius=1000&categories=13065,13003,13032&limit=50"
    
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    data = response.json()

    json_data = response.json()  # Convert response to Python dict

    for poi in json_data['results']:  # Loop through each place
        
        fsq_id = poi.get("fsq_id")

        url2 = f"https://api.foursquare.com/v3/places/{fsq_id}"
        
        response2 = requests.get(url2, headers=headers)
        response2.raise_for_status()
        data2 = response2.json()

        json_data2 = response2.json()  # Convert response to Python dict

        url3 = f"https://api.foursquare.com/v3/places/{fsq_id}"
            
        response3 = requests.get(url3, headers=headers)
        response3.raise_for_status()
        data3 = response3.json()

        json_data3 = response3.json()  # Convert response to Python dict

        features = json_data3.get("features", {})

        poi_list.append({
            'query_latitude': lat,
            'query_longitude': lon,
            'num_categories': len(poi.get("categories", [])),
            'name': poi.get('name'),
            'distance': poi.get("distance"),
            'fsqid': poi.get("fsq_id"),
            'num_attributes': sum(1 for v in features.values() if v is not None) if isinstance(features, dict) else 0
        })

pprint(poi_list)

[{'distance': 27,
  'fsqid': '5f6f6dc999b0676341c2edd0',
  'name': "Rosedale's Finest",
  'num_attributes': 0,
  'num_categories': 1,
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304)},
 {'distance': 38,
  'fsqid': '5f710453c2dbe370d739275e',
  'name': 'Dolce Bakery',
  'num_attributes': 0,
  'num_categories': 3,
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304)},
 {'distance': 876,
  'fsqid': '4c005620ad46c9b6e9568f66',
  'name': "Kirkwood's Rib Joint",
  'num_attributes': 0,
  'num_categories': 1,
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304)},
 {'distance': 897,
  'fsqid': '645fd57a10a5633876bb2c8f',
  'name': 'Picnic Cafe & Wine Shop',
  'num_attributes': 0,
  'num_categories': 1,
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304)},
 {'distance': 381,
  'fsqid': '396cd48d15ba4cac949057c4',
  'name': 'Holly Berry Fair',
  'num_attribu

Put your parsed results into a DataFrame

In [77]:
# Convert to a dataframe
results_df = pd.json_normalize(poi_list)

# Save or preview
results_df.to_csv(index=False, path_or_buf= '../data/foursquare_results.csv')
print(results_df.head())

   query_latitude  query_longitude  num_categories                     name  \
0       43.685924       -79.376304               1        Rosedale's Finest   
1       43.685924       -79.376304               3             Dolce Bakery   
2       43.685924       -79.376304               1     Kirkwood's Rib Joint   
3       43.685924       -79.376304               1  Picnic Cafe & Wine Shop   
4       43.685924       -79.376304               3         Holly Berry Fair   

   distance                     fsqid  num_attributes  
0        27  5f6f6dc999b0676341c2edd0               0  
1        38  5f710453c2dbe370d739275e               0  
2       876  4c005620ad46c9b6e9568f66               0  
3       897  645fd57a10a5633876bb2c8f               0  
4       381  396cd48d15ba4cac949057c4               0  


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [53]:
headers = {"accept": "application/json", 'Authorization': f"Bearer {YELP_API_KEY}"}

# Store results here
poi_list_yelp = []

# Loop over each lat/lon
for idx, row in sample_df.iterrows():
    lat = row['latitude']
    lon = row['longitude']
    
    url = f"https://api.yelp.com/v3/businesses/search?latitude={lat}&longitude={lon}&radius=1000&categories=restaurants&categories=bars&attributes=&sort_by=best_match&limit=50"

    response = requests.get(url, headers=headers)
    response.raise_for_status()
    data = response.json()

    json_data = response.json()  # Convert response to Python dict

    for poi in json_data['businesses']:  # Loop through each place
        poi_list_yelp.append({
            'query_latitude': lat,
            'query_longitude': lon,
            'num_categories': len(poi.get("categories", [])),
            'name': poi.get('alias'),
            'distance': poi.get("distance"),
            'num_attributes': sum(1 for v in poi.get("attributes", {}).values() if v is not None)
        })

pprint(poi_list_yelp)

[{'distance': 1302.4855439306064,
  'name': 'megumi-mazesoba-toronto',
  'num_attributes': 0,
  'num_categories': 1,
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304)},
 {'distance': 1402.3273522236896,
  'name': 'mineral-toronto',
  'num_attributes': 0,
  'num_categories': 3,
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304)},
 {'distance': 1701.0348687991896,
  'name': 'ryus-noodle-bar-toronto-7',
  'num_attributes': 1,
  'num_categories': 1,
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304)},
 {'distance': 1426.5095641961218,
  'name': 'black-camel-toronto-2',
  'num_attributes': 0,
  'num_categories': 1,
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304)},
 {'distance': 25.650316265688087,
  'name': 'dolce-bakery-toronto',
  'num_attributes': 1,
  'num_categories': 3,
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.flo

Put your parsed results into a DataFrame

In [72]:
# Convert to a dataframe
results2_df = pd.json_normalize(poi_list_yelp)

# Save or preview
results2_df.to_csv(index=False, path_or_buf='../data/yelp_results.csv')
print(results2_df.head())

   query_latitude  query_longitude  num_categories                       name  \
0       43.685924       -79.376304               1    megumi-mazesoba-toronto   
1       43.685924       -79.376304               3            mineral-toronto   
2       43.685924       -79.376304               1  ryus-noodle-bar-toronto-7   
3       43.685924       -79.376304               1      black-camel-toronto-2   
4       43.685924       -79.376304               3       dolce-bakery-toronto   

      distance  num_attributes  
0  1302.485544               0  
1  1402.327352               0  
2  1701.034869               1  
3  1426.509564               0  
4    25.650316               1  


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

More complete data initially seems to be provided by Yelp for the criteria that I was interested in extracting. I wanted to collect the distance from the points of interest (pois) to the query lat/long to inform how far the pois available from each API were to the query site. I also thought that the number of features available and number of categories used to classify each site could inform how detailed the information was from each API. Interestingly, none of the features from the FourSquare data were available for this set of restaurants/cafes/bars in Toronto. I thought it may be a issue with how I ran the API call initially as some resources only mentioned getting features from the call using a specific site ID, so I reran my code to embed an API call using the site ID but still was not able to gather any features. After spot-checking some of the json outputs I came to the conclusion that there were no features available for the pois in this dataset using the free version of Foursquare. Therefore, the Yelp API provided more complete data.

Get the top 10 restaurants according to their rating

In [75]:
headers = {"accept": "application/json", 'Authorization': f"Bearer {YELP_API_KEY}"}


# Store results here
poi_list_yelp_top10 = []

# Loop over each lat/lon
for idx, row in sample_df.iterrows():
    lat = row['latitude']
    lon = row['longitude']
    
    url = f"https://api.yelp.com/v3/businesses/search?latitude={lat}&longitude={lon}&radius=1000&categories=restaurants&sort_by=rating&limit=10"

    response = requests.get(url, headers=headers)
    response.raise_for_status()
    data = response.json()

    json_data_top = response.json()  # Convert response to Python dict

    for poi in json_data_top['businesses']:  # Loop through each place
        poi_list_yelp_top10.append({
            'query_latitude': lat,
            'query_longitude': lon,
            'name': poi.get('alias'),
            'rating': poi.get('rating')
        })

results2_df = pd.json_normalize(poi_list_yelp_top10)

[{'name': 'el-tenedor-toronto',
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304),
  'rating': 5.0},
 {'name': 'el-inka-toronto-toronto',
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304),
  'rating': 5.0},
 {'name': 'lil-e-coffee-cafe-toronto',
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304),
  'rating': 4.8},
 {'name': 'aroma-espresso-bar-toronto-31',
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304),
  'rating': 4.8},
 {'name': 'istanbul-döner-wrap-toronto',
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304),
  'rating': 4.8},
 {'name': 'poppadum-toronto',
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304),
  'rating': 4.8},
 {'name': 'imperfect-fresh-eats-toronto-5',
  'query_latitude': np.float64(43.685924),
  'query_longitude': np.float64(-79.376304),
  'rating'

Unnamed: 0,query_latitude,query_longitude,name,rating
0,43.685924,-79.376304,el-tenedor-toronto,5.0
132,43.647259,-79.379878,adobar-toronto-3,5.0
134,43.647259,-79.379878,bodega-burgers-toronto,5.0
135,43.647259,-79.379878,mazeh-toronto,5.0
136,43.647259,-79.379878,wanaag-toronto,5.0
137,43.647259,-79.379878,naan-kabob-toronto-3,5.0
368,43.67044,-79.453285,la-spesa-food-market-and-specialties-toronto,5.0
366,43.633473,-79.425679,top-gun-burgers-toronto-3,5.0
364,43.633473,-79.425679,logas-corner-toronto-2,5.0
146,43.6397,-79.44865,logas-corner-toronto-2,5.0


The names of the top 10 restaurants based on rating are below and were determined based on Yelp as I felt that they had more complete data (and based on their business would have a rich source of rating for pois):

In [82]:
print(results2_df.sort_values("rating", ascending = False).drop_duplicates(subset='name', keep="first").head(10)['name'])

0                                el-tenedor-toronto
132                                adobar-toronto-3
134                          bodega-burgers-toronto
135                                   mazeh-toronto
136                                  wanaag-toronto
137                            naan-kabob-toronto-3
368    la-spesa-food-market-and-specialties-toronto
366                       top-gun-burgers-toronto-3
364                          logas-corner-toronto-2
148                           hakka-wok-hei-toronto
Name: name, dtype: object
