In [1]:
#Importing Packages 
import pandas as pd
import numpy as np
import json
import requests
import os

In [2]:
FS_KEY = os.getenv('FOURSQUARE_API_KEY')
YELP_KEY = os.getenv('YELPPIES')

In [4]:
stations_df = pd.read_csv('gent_bike_stations.csv')

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [24]:
def get_nearby_venues_fs(latitude, longitude, radius, categories):
    """
    Get venues from foursquare with a specified place type and coordinates.
    Args:
        latitude (float): latitude for query (must be combined with longitude)
        longitude (float): longitude for query (must be combined with latitude)
        api_key (str): foursquare API to use for query
        categories (str) : Foursquare-recognized place type. If not passed no place_type will be specified. 
    Extra Args:
        Rating: A numerical rating (from 0.0 to 10.0) of the FSQ Place, based on user votes, likes/dislikes, tips sentiment, and visit data
        Popularity: A measure of the FSQ Place's popularity, by foot traffic. This score is on a 0 to 1 scale and uses a 6-month span of POI visits for a given geographic area.
    Returns:
        response: response object from the requests library.
    """
     
    url = "https://api.foursquare.com/v3/places/search"
    
    params = {"categories": categories, # below, we will search for four category types
              "radius":radius,
              "ll": f"{latitude},{longitude}",
              "limit": 50, #This is the upper limit 
              "fields": "fsq_id,name,rating,popularity,price"  
             }
    # Dict for headers 
    headers = {"Accept": "application/json"}
    #Add API Key
    headers['Authorization'] = FS_KEY
    responses = requests.get(url, params=params, headers=headers)
    fs_data = responses.json()
    return fs_data


In [58]:
#Testing the function(using categoryid for restaurants)
test_results = get_nearby_venues_fs(stations_df['latitude'][0], stations_df['longitude'][0], 1000, 13065)


In [59]:
test_results

{'results': [{'fsq_id': '4b55fe33f964a520dafa27e3',
   'name': 'Eetkaffee Multatuli',
   'popularity': 0.9953832734080803,
   'price': 2,
   'rating': 8.9},
  {'fsq_id': '4b406c73f964a52025b825e3',
   'name': 'Simon Says',
   'popularity': 0.9944684381847967,
   'price': 2,
   'rating': 9.1},
  {'fsq_id': '4b71a9e2f964a52084542de3',
   'name': 'Özgem',
   'popularity': 0.9859211114184201,
   'price': 1,
   'rating': 8.7},
  {'fsq_id': '56f40805498e52376bf468f3',
   'name': 'Café VenTura',
   'popularity': 0.9996489585771121,
   'price': 2,
   'rating': 9.0},
  {'fsq_id': '5593976d498e56ace6480f22',
   'name': 'Le Bal Infernal - used book café',
   'popularity': 0.9982421334808417,
   'price': 2,
   'rating': 9.2},
  {'fsq_id': '4c40def9520fa5939fc1c8ac',
   'name': 'Café Afsnis',
   'popularity': 0.99877667382933,
   'price': 2,
   'rating': 8.6},
  {'fsq_id': '5e207bf4e9050700086c006c',
   'name': 'Mission Masala',
   'popularity': 0.9971118864753313,
   'price': 2,
   'rating': 8.6},

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

## Functions for aggregating the responses for the different categories

After looking at the documentation on the foursquare website, I have decided that for the purpose of this project, I would like to query the following categories. I have obtained the catgeory IDs from the FSQ website. 
These are the following categories along with their category IDs
1. Restaurants : 13065
2. Bars : 13003
4. Museums : 10027

Source: https://docs.foursquare.com/data-products/docs/categories


In [14]:
test_df = stations_df.head(3)

As multiple API calls within the functions was taking a really long time, I decided t store all the restaurants, bars and museums for all the station points in three different dataframes. 
This is to prevent calling the API again and again as done previously, which takes an enormous amount of time as the number of stations increases. (referting to functions in yelp_foursquare_EDA_bhopal).
Also referring to that file, I figured out a way to have multiple categories in the category parameter of the API function using FSQ documentation, however, it was challenging for me to assign which category a particular POI belonged to in the dataset as the call was made all together. I wanted to have teh knowledge of categories for my analysis as I wanted to have a count of certain POIs around the bike stations. 

In [21]:
#Making the dataframe for restaurants
all_restos = {}
for row in stations_df.itertuples(index=False):
        fs_result = get_nearby_venues_fs(row.latitude, row.longitude, 1000, 13065)
        norm_result = pd.json_normalize(fs_result['results'])  
        all_restos[row.station_id] = norm_result


Unnamed: 0,station_id,venue_id,fsq_id,name,popularity,price,rating
0,fedd9729d9183b05bb4a3bd9da7d7de1,0,4b55fe33f964a520dafa27e3,Eetkaffee Multatuli,0.995383,2.0,8.9
1,fedd9729d9183b05bb4a3bd9da7d7de1,1,4b406c73f964a52025b825e3,Simon Says,0.994468,2.0,9.1
2,fedd9729d9183b05bb4a3bd9da7d7de1,2,4b71a9e2f964a52084542de3,Özgem,0.985921,1.0,8.7
3,fedd9729d9183b05bb4a3bd9da7d7de1,3,56f40805498e52376bf468f3,Café VenTura,0.999649,2.0,9.0
4,fedd9729d9183b05bb4a3bd9da7d7de1,4,5593976d498e56ace6480f22,Le Bal Infernal - used book café,0.998242,2.0,9.2
...,...,...,...,...,...,...,...
18349,62f39a1526ab1e751dde2f508439439f,45,574bc576cd10a714173b1764,Paul,0.998734,1.0,7.5
18350,62f39a1526ab1e751dde2f508439439f,46,4bd2d008a8b3a5931588685f,Korenlei Twee,0.998529,3.0,7.7
18351,62f39a1526ab1e751dde2f508439439f,47,4b87b80bf964a52093c831e3,Souplounge,0.995843,1.0,7.5
18352,62f39a1526ab1e751dde2f508439439f,48,5ca33b950868a2002c3ae2c6,Bavet,0.998939,2.0,7.6


In [24]:
restaurants_fsq = pd.concat(all_restos).reset_index(drop=False, names=['station_id', 'venue_id'])
restaurants_fsq.drop(columns=['venue_id'], inplace=True)

In [25]:
restaurants_fsq.to_csv('gent_restaurants_fsq.csv', index=False)

In [26]:
restaurants_fsq

Unnamed: 0,station_id,fsq_id,name,popularity,price,rating
0,fedd9729d9183b05bb4a3bd9da7d7de1,4b55fe33f964a520dafa27e3,Eetkaffee Multatuli,0.995383,2.0,8.9
1,fedd9729d9183b05bb4a3bd9da7d7de1,4b406c73f964a52025b825e3,Simon Says,0.994468,2.0,9.1
2,fedd9729d9183b05bb4a3bd9da7d7de1,4b71a9e2f964a52084542de3,Özgem,0.985921,1.0,8.7
3,fedd9729d9183b05bb4a3bd9da7d7de1,56f40805498e52376bf468f3,Café VenTura,0.999649,2.0,9.0
4,fedd9729d9183b05bb4a3bd9da7d7de1,5593976d498e56ace6480f22,Le Bal Infernal - used book café,0.998242,2.0,9.2
...,...,...,...,...,...,...
18349,62f39a1526ab1e751dde2f508439439f,574bc576cd10a714173b1764,Paul,0.998734,1.0,7.5
18350,62f39a1526ab1e751dde2f508439439f,4bd2d008a8b3a5931588685f,Korenlei Twee,0.998529,3.0,7.7
18351,62f39a1526ab1e751dde2f508439439f,4b87b80bf964a52093c831e3,Souplounge,0.995843,1.0,7.5
18352,62f39a1526ab1e751dde2f508439439f,5ca33b950868a2002c3ae2c6,Bavet,0.998939,2.0,7.6


In [27]:
#Making the dataframe for bars
all_bars = {}
for row in stations_df.itertuples(index=False):
        fs_result_1 = get_nearby_venues_fs(row.latitude, row.longitude, 1000, 13003)
        norm_result_1 = pd.json_normalize(fs_result_1['results'])  
        all_bars[row.station_id] = norm_result_1


In [28]:
bars_fsq = pd.concat(all_bars).reset_index(drop=False, names=['station_id', 'venue_id'])

In [29]:
bars_fsq.drop(columns=['venue_id'], inplace=True)

In [32]:
bars_fsq.to_csv('gent_bars_fsq.csv', index=False)

In [35]:
bars_fsq

Unnamed: 0,station_id,fsq_id,name,popularity,price,rating
0,fedd9729d9183b05bb4a3bd9da7d7de1,50c89ecae4b027054257d6f0,Ramen Noedelbar,0.990322,2.0,9.1
1,fedd9729d9183b05bb4a3bd9da7d7de1,56f40805498e52376bf468f3,Café VenTura,0.999649,2.0,9.0
2,fedd9729d9183b05bb4a3bd9da7d7de1,5593976d498e56ace6480f22,Le Bal Infernal - used book café,0.998242,2.0,9.2
3,fedd9729d9183b05bb4a3bd9da7d7de1,4c40def9520fa5939fc1c8ac,Café Afsnis,0.998777,2.0,8.6
4,fedd9729d9183b05bb4a3bd9da7d7de1,4bb7d25fb35776b0783dc801,Trollekelder,0.999503,3.0,8.7
...,...,...,...,...,...,...
16976,62f39a1526ab1e751dde2f508439439f,4e87363c7ee6109f08fe426d,A-Pluss,0.996293,2.0,7.4
16977,62f39a1526ab1e751dde2f508439439f,4d8c58fed1c7721e92a928da,Wally,0.997689,,7.8
16978,62f39a1526ab1e751dde2f508439439f,57777c90498e65da259568f3,De Post,0.999622,,7.8
16979,62f39a1526ab1e751dde2f508439439f,4d342543b6093704b81b08e0,De Croone,0.996960,2.0,7.5


In [46]:
#Making the dataframe for museums
all_museumzz = {}
for row in stations_df.itertuples(index=False):
        fs_result_4 = get_nearby_venues_fs(row.latitude, row.longitude, 1000, 10027)
        norm_result_4 = pd.json_normalize(fs_result_4['results'])  
        all_museumzz[row.station_id] = norm_result_4


In [47]:
museums_fsq2 = pd.concat(all_museumzz).reset_index(drop=False, names=['station_id', 'venue_id'])

In [48]:
museums_fsq2

Unnamed: 0,station_id,venue_id,fsq_id,name,popularity,rating
0,fedd9729d9183b05bb4a3bd9da7d7de1,0,5ba60d6212c8f0002c82173d,Industriemuseum,0.998319,7.5
1,fedd9729d9183b05bb4a3bd9da7d7de1,1,4d299c7cf7a9224bbb2b00a0,Huis van Alijn,0.998338,7.1
2,fedd9729d9183b05bb4a3bd9da7d7de1,2,4d9765d9744f37042e05d457,Flinxo,0.413410,
3,fedd9729d9183b05bb4a3bd9da7d7de1,3,50cc9b25e4b04f44de53bd3e,Modern Chinese Art Foundation vzw,0.316222,
4,fedd9729d9183b05bb4a3bd9da7d7de1,4,55aa741a498e079a2b987d39,Julien Reviszaal,0.479690,
...,...,...,...,...,...,...
3598,62f39a1526ab1e751dde2f508439439f,10,84dea0d6964f4f6a52c0aa06,Industriemuseum Gent,,
3599,62f39a1526ab1e751dde2f508439439f,11,e76b19b30de343c1d2b436d7,Museum voor Industriële Archeologie en Textiel...,,
3600,62f39a1526ab1e751dde2f508439439f,12,45df2e3ea043425d02796aa4,Adam - Brussels Design Museum,,
3601,62f39a1526ab1e751dde2f508439439f,13,197eae68797d45481ea8233c,Gravensteen - Gerechtsmuseum en Wapenmuseum,,


In [49]:
museums_fsq2.drop(columns=['venue_id'], inplace=True)

In [50]:
museums_fsq2.to_csv('gent_museums_fsq_final.csv', index=False)

In [51]:
test_bars = bars_fsq.head(3)
test_museums = museums_fsq2.head(3)
test_restaurants = restaurants_fsq.head(3)

In [67]:
pd.concat([test_bars, test_museums, test_restaurants], axis=0, keys=['bars', 'museums', 'restaurants']).reset_index(level=0, names=['category'])


Unnamed: 0,category,station_id,fsq_id,name,popularity,price,rating
0,bars,fedd9729d9183b05bb4a3bd9da7d7de1,50c89ecae4b027054257d6f0,Ramen Noedelbar,0.990322,2.0,9.1
1,bars,fedd9729d9183b05bb4a3bd9da7d7de1,56f40805498e52376bf468f3,Café VenTura,0.999649,2.0,9.0
2,bars,fedd9729d9183b05bb4a3bd9da7d7de1,5593976d498e56ace6480f22,Le Bal Infernal - used book café,0.998242,2.0,9.2
0,museums,fedd9729d9183b05bb4a3bd9da7d7de1,5ba60d6212c8f0002c82173d,Industriemuseum,0.998319,,7.5
1,museums,fedd9729d9183b05bb4a3bd9da7d7de1,4d299c7cf7a9224bbb2b00a0,Huis van Alijn,0.998338,,7.1
2,museums,fedd9729d9183b05bb4a3bd9da7d7de1,4d9765d9744f37042e05d457,Flinxo,0.41341,,
0,restaurants,fedd9729d9183b05bb4a3bd9da7d7de1,4b55fe33f964a520dafa27e3,Eetkaffee Multatuli,0.995383,2.0,8.9
1,restaurants,fedd9729d9183b05bb4a3bd9da7d7de1,4b406c73f964a52025b825e3,Simon Says,0.994468,2.0,9.1
2,restaurants,fedd9729d9183b05bb4a3bd9da7d7de1,4b71a9e2f964a52084542de3,Özgem,0.985921,1.0,8.7


In [68]:
comb_df_fsq = pd.concat([bars_fsq, museums_fsq2, restaurants_fsq], axis=0, keys=['bars', 'museums', 'restaurants']).reset_index(level=0, names=['category'])

In [69]:
comb_df_fsq

Unnamed: 0,category,station_id,fsq_id,name,popularity,price,rating
0,bars,fedd9729d9183b05bb4a3bd9da7d7de1,50c89ecae4b027054257d6f0,Ramen Noedelbar,0.990322,2.0,9.1
1,bars,fedd9729d9183b05bb4a3bd9da7d7de1,56f40805498e52376bf468f3,Café VenTura,0.999649,2.0,9.0
2,bars,fedd9729d9183b05bb4a3bd9da7d7de1,5593976d498e56ace6480f22,Le Bal Infernal - used book café,0.998242,2.0,9.2
3,bars,fedd9729d9183b05bb4a3bd9da7d7de1,4c40def9520fa5939fc1c8ac,Café Afsnis,0.998777,2.0,8.6
4,bars,fedd9729d9183b05bb4a3bd9da7d7de1,4bb7d25fb35776b0783dc801,Trollekelder,0.999503,3.0,8.7
...,...,...,...,...,...,...,...
18349,restaurants,62f39a1526ab1e751dde2f508439439f,574bc576cd10a714173b1764,Paul,0.998734,1.0,7.5
18350,restaurants,62f39a1526ab1e751dde2f508439439f,4bd2d008a8b3a5931588685f,Korenlei Twee,0.998529,3.0,7.7
18351,restaurants,62f39a1526ab1e751dde2f508439439f,4b87b80bf964a52093c831e3,Souplounge,0.995843,1.0,7.5
18352,restaurants,62f39a1526ab1e751dde2f508439439f,5ca33b950868a2002c3ae2c6,Bavet,0.998939,2.0,7.6


In [75]:
comb_df_fsq.to_csv('gent_combined_fsq.csv', index=False)

In [70]:
#Check for duplicates
comb_df_fsq.duplicated().sum()

0

In [71]:
#Use group by to count categories for the combined dataset 

comb_df_fsq.groupby('category').size()

category
bars           16981
museums         3603
restaurants    18354
dtype: int64

In [74]:
#USe group by to count categories for the unique station IDs
#Group by station id and category
count_data = comb_df_fsq.groupby(['station_id', 'category']).size()

In [77]:
count_data.dataframe = count_data.reset_index(name='count')

In [78]:
count_data.dataframe

Unnamed: 0,station_id,category,count
0,0169398b94b3eed7ace5cef57be40375,bars,12
1,0169398b94b3eed7ace5cef57be40375,restaurants,8
2,01a6766ca7d27d30daa6b6d3fd661680,bars,50
3,01a6766ca7d27d30daa6b6d3fd661680,museums,17
4,01a6766ca7d27d30daa6b6d3fd661680,restaurants,50
...,...,...,...
1157,fedd9729d9183b05bb4a3bd9da7d7de1,museums,17
1158,fedd9729d9183b05bb4a3bd9da7d7de1,restaurants,50
1159,fffa2ab1429abc74cb18206fa315d67a,bars,50
1160,fffa2ab1429abc74cb18206fa315d67a,museums,7


In [123]:
#Instead of the wide format, elongating the dataset
count_data_fsq = count_data.dataframe.pivot(index='station_id', columns='category', values='count').reset_index()
count_data_fsq.fillna(0, inplace=True)

In [124]:
count_data_fsq

category,station_id,bars,museums,restaurants
0,0169398b94b3eed7ace5cef57be40375,12.0,0.0,8.0
1,01a6766ca7d27d30daa6b6d3fd661680,50.0,17.0,50.0
2,01b098a044c60e31c642865cf0ce1b3e,50.0,7.0,50.0
3,039374c72295ea6698c3626d478b85d0,50.0,19.0,50.0
4,039822fb9f6d2d7b9fc69d31d31ddae7,50.0,23.0,50.0
...,...,...,...,...
401,fd9055f8ffc3d10246da2114b6aa2c39,28.0,1.0,49.0
402,fdd045998e6d644711170bf8238e8f36,50.0,4.0,50.0
403,fecd51be0c13ce728017a0127aae2152,50.0,20.0,50.0
404,fedd9729d9183b05bb4a3bd9da7d7de1,50.0,17.0,50.0


In [125]:
count_data_fsq.to_csv('gent_count_data.csv', index=False)

In [80]:
#count_bars = bars_fsq.groupby('station_id').size().reset_index(name='bar_count')
#count_museums = museums_fsq2.groupby('station_id').size().reset_index(name='museum_count')
#count_restaurants = restaurants_fsq.groupby('station_id').size().reset_index(name='restaurant_count')

In [110]:
#Calculating averages for numerical variables 
# Group the data by 'station_id' and 'category' and calculate the average rating, price, and popularity
aggregated_data = comb_df_fsq.groupby(['station_id', 'category']).agg({
    'rating': 'mean',  # Calculate the mean of 'rating'
    'price': 'median',   # Calculate the median of 'price'
    'popularity': 'mean' # Calculate the mean of 'popularity'
}).reset_index()

# Pivot the data so that each category is a column
pivoted_data = aggregated_data.pivot(index='station_id', columns='category', values=['rating', 'price', 'popularity']).reset_index()


In [120]:
#Flatten the column index 
pivoted_data.columns = [''.join(col).strip() for col in pivoted_data.columns.values]
#Fill missing values with 0
pivoted_data.fillna(0, inplace=True)


In [121]:
pivoted_data

Unnamed: 0,station_id,ratingbars,ratingmuseums,ratingrestaurants,pricebars,pricerestaurants,popularitybars,popularitymuseums,popularityrestaurants
0,0169398b94b3eed7ace5cef57be40375,6.920000,0.00,6.200000,1.0,0.0,0.957234,0.000000,0.829983
1,01a6766ca7d27d30daa6b6d3fd661680,8.236000,8.00,8.540000,2.0,2.0,0.996602,0.991628,0.997347
2,01b098a044c60e31c642865cf0ce1b3e,7.797619,0.00,8.308000,2.0,2.0,0.931755,0.493017,0.996990
3,039374c72295ea6698c3626d478b85d0,7.996000,7.95,8.234000,2.0,2.0,0.984462,0.857698,0.996307
4,039822fb9f6d2d7b9fc69d31d31ddae7,8.510000,7.65,8.666000,2.0,2.0,0.997573,0.724833,0.997722
...,...,...,...,...,...,...,...,...,...
401,fd9055f8ffc3d10246da2114b6aa2c39,7.200000,0.00,7.123077,2.0,1.0,0.755113,0.378566,0.753601
402,fdd045998e6d644711170bf8238e8f36,7.552000,8.00,7.788636,2.0,2.0,0.913627,0.992393,0.968438
403,fecd51be0c13ce728017a0127aae2152,8.498000,7.30,8.690000,2.0,2.0,0.997542,0.657442,0.997523
404,fedd9729d9183b05bb4a3bd9da7d7de1,8.392000,7.30,8.440000,2.0,2.0,0.997026,0.546150,0.997115


In [122]:
pivoted_data.to_csv('gent_avg_data_fsq.csv', index=False)

In [126]:
characteristics_fsq_gent = pd.merge(count_data_fsq, pivoted_data, on='station_id')

In [127]:
characteristics_fsq_gent

Unnamed: 0,station_id,bars,museums,restaurants,ratingbars,ratingmuseums,ratingrestaurants,pricebars,pricerestaurants,popularitybars,popularitymuseums,popularityrestaurants
0,0169398b94b3eed7ace5cef57be40375,12.0,0.0,8.0,6.920000,0.00,6.200000,1.0,0.0,0.957234,0.000000,0.829983
1,01a6766ca7d27d30daa6b6d3fd661680,50.0,17.0,50.0,8.236000,8.00,8.540000,2.0,2.0,0.996602,0.991628,0.997347
2,01b098a044c60e31c642865cf0ce1b3e,50.0,7.0,50.0,7.797619,0.00,8.308000,2.0,2.0,0.931755,0.493017,0.996990
3,039374c72295ea6698c3626d478b85d0,50.0,19.0,50.0,7.996000,7.95,8.234000,2.0,2.0,0.984462,0.857698,0.996307
4,039822fb9f6d2d7b9fc69d31d31ddae7,50.0,23.0,50.0,8.510000,7.65,8.666000,2.0,2.0,0.997573,0.724833,0.997722
...,...,...,...,...,...,...,...,...,...,...,...,...
401,fd9055f8ffc3d10246da2114b6aa2c39,28.0,1.0,49.0,7.200000,0.00,7.123077,2.0,1.0,0.755113,0.378566,0.753601
402,fdd045998e6d644711170bf8238e8f36,50.0,4.0,50.0,7.552000,8.00,7.788636,2.0,2.0,0.913627,0.992393,0.968438
403,fecd51be0c13ce728017a0127aae2152,50.0,20.0,50.0,8.498000,7.30,8.690000,2.0,2.0,0.997542,0.657442,0.997523
404,fedd9729d9183b05bb4a3bd9da7d7de1,50.0,17.0,50.0,8.392000,7.30,8.440000,2.0,2.0,0.997026,0.546150,0.997115


In [128]:
characteristics_fsq_gent.to_csv('gent_characteristics_fsq.csv', index=False)

Put your parsed results into a DataFrame

In [5]:
stations_df_fs = stations_df.copy()

In [27]:
#stations_df_fs = stations_df_fs.drop(columns=['renting', 'returning', 'last_updated', 'restaurants_avg_rating', 'bars_avg_rating', 'museums_avg_rating', 'restaurants_count', 'bars_count', 'museums_count'])

In [133]:
stations_df_fsq_data = pd.merge(stations_df_fs, characteristics_fsq_gent, on='station_id')

In [134]:
stations_df_fsq_data.to_csv('gent_bike_stations_fsq.csv', index=False)

In [138]:
#Check for duplicates
stations_df_fsq_data.duplicated().sum()

0

In [139]:
stations_df_fsq_data.isnull().sum()

station_id               0
station_name             0
latitude                 0
longitude                0
empty_slots              0
free_bikes               0
timestamp                0
station_uid              0
bars                     0
museums                  0
restaurants              0
ratingbars               0
ratingmuseums            0
ratingrestaurants        0
pricebars                0
pricerestaurants         0
popularitybars           0
popularitymuseums        0
popularityrestaurants    0
dtype: int64

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice.

Bhopal has no YELP coverage as we can see with only the NULL values and through my research online so I made a new notebook with a new city and did the project all over again. 

The Bhopal city issue can be seen at the bottom or the corresponding Bhopal file. 

In [13]:

def get_nearby_venues_yelp_pls(latitude, longitude, radius, categories):
    """
    Get venues from yelp with a specified place type and coordinates.
    Args:
        latitude (float): latitude for query (must be combined with longitude)
        longitude (float): longitude for query (must be combined with latitude)
        radius (int): search radius in meters
        categories (str or list): Yelp categories for the query (can be a string or list of strings)
    Returns:
        pd.DataFrame: DataFrame containing the results from the Yelp API
    """
    
    # Convert categories list to a comma-separated string if needed
    category_string = ','.join(categories) if isinstance(categories, list) else categories

    # Yelp API URL
    url = "https://api.yelp.com/v3/businesses/search"
    
    # API parameters
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "radius": radius,
        "limit": 50,  # Upper limit of results
        "categories": category_string
    }
    
    # API headers
    headers = {
        "Authorization": f"Bearer {YELP_KEY}",  # Add the Yelp API key to the headers
        "Accept": "application/json"
    }
    
    # Make the API request
    responses = requests.get(url, params=params, headers=headers)
    yelp_data = responses.json()
    
    # Check if 'businesses' key exists in the API response
    if 'businesses' in yelp_data:
        businesses = yelp_data['businesses']
        data = []
        
        # Loop through each business
        for business in businesses:
            # Check if 'categories' key exists
            if 'categories' in business:
                categories_list = business['categories']
                # Get category aliases and titles
                category_title = [cat['title'] for cat in categories_list]
            else:
                category_title = []
                
            # Collect necessary data from the business
            data.append({
                'ID': business.get('id'),
                'Name': business.get('name'),
                'Category Title': ', '.join(category_title),
                'Price': business.get('price', 'N/A'),  # Default value 'N/A' if price is missing
                'Rating': business.get('rating', 0.0),
                'Address': business.get('location', {}).get('address1', ''),
                'Latitude': business.get('coordinates', {}).get('latitude'),
                'Longitude': business.get('coordinates', {}).get('longitude'),
                'Review Count': business.get('review_count')
            })
        
        # Convert the list of dictionaries to a DataFrame
        venues_df = pd.DataFrame(data)
        return venues_df
    
    else:
        print(f"Failed to retrieve : {responses.status_code}")
        return None
        # Return an empty DataFrame if no businesses data
        #return pd.DataFrame()


In [7]:
#Test the function 
blah = get_nearby_venues_yelp_pls(stations_df['latitude'][0], stations_df['longitude'][0], 1000, 'restaurants')

In [8]:
blah

Unnamed: 0,ID,Name,Category Title,Price,Rating,Address,Latitude,Longitude,Review Count
0,ngMm4jw58dxnb8WRKrWmsA,De Stokerij,Belgian,€€,4.7,Tichelrei 2A,51.059839,3.72342,11
1,stCancn57gbNiPRolX2r7w,Simon Says,"Coffee & Tea, Sandwiches",€€,4.3,Sluizeken 8,51.059337,3.724773,12
2,xphaj9QeUAAGh-r_adlU2A,Balls & Glory,Belgian,€€,4.3,Jakobijnenstraat 6,51.05235,3.72022,92
3,nfKa-w4DfADriF842GMh_g,'t Oud Clooster,Belgian,€€,4.4,Zwartezustersstraat 5,51.05207,3.71913,40
4,JbBeYAvi4-O2FNqaen7FOg,Eetkaffee Multatuli,Belgian,€€€,4.0,Huidevetterskaai 40,51.06106,3.727819,12
5,whbwMHXf96IanX1JX47hng,Gruut,"Cafes, Breweries",€,4.4,Rembert Dodoensdreef,51.057669,3.730377,7
6,BK3Q9mppHB2hqVdLNwXISw,Uncle Babe's Burger Bar,"American, Burgers",€€,4.7,Sluizekenstraat 2,51.059392,3.725065,26
7,6TByL6jwSWcG8qXaBUb7sQ,Publiek,Modern European,,4.6,Ham 39,51.057784,3.732898,9
8,1Er36Yqfn9ayPLIIVJnXhQ,Seli's Noodlebar,"Dim Sum, Noodles",€€,4.7,Limburgstraat 28,51.05222,3.72745,28
9,qCTeIltMiyx98YfGLtCGPw,Amadeus Gent 1,Barbeque,€€,3.9,Plotersgracht 8,51.057892,3.722343,24


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

The categories (in line with the FourSquare POIs) are: 
1. 'restaurants'
2. 'bars'
3. 'museums'

Source: https://docs.developer.yelp.com/docs/resources-categories


In [9]:
def fetch_category_yelp(category, df, radius=1000):
    """
    Fetches data from Yelp API for a specified category.
    Args:
        category (str): category to search for
    Returns:
        dict: dictionary containing the data from the Yelp API
    """
    for row in df.itertuples(index=False):
        yelp_result_category = get_nearby_venues_yelp_pls(row.latitude, row.longitude, 1000, category)

    return yelp_result_category

In [12]:
all_rests_yelp = fetch_category_yelp('restaurants', stations_df[0:4])
#The calls ran out from 300 to 0 in one line to testing 

Failed to retrieve categories: 429
Failed to retrieve categories: 429
Failed to retrieve categories: 429
Failed to retrieve categories: 429


In [11]:
print(all_rests_yelp)

None


In [208]:
#all_rests_yelp = {}
#for row in test_df.itertuples(index=False):
        #yelp_result = get_nearby_venues_yelp_pls(row.latitude, row.longitude, 1000, 'restaurants')
        #all_rests_yelp[row.station_id] = yelp_result
        

In [None]:
#Using a similar method as the FSQ Data 
comb_df_yelp = pd.concat([bars_yelp, museums_yelp, restaurants_yelp], axis=0, keys=['bars', 'museums', 'restaurants']).reset_index(level=0, names=['category'])

In [None]:
#Code to get catgeory counts for yelp data
count_data_yelp = comb_df_yelp.groupby(['station_id', 'category']).size()
count_data_yelp.dataframe = count_data_yelp.reset_index(name='count')
count_data_category_y = count_data_yelp.dataframe.pivot(index='station_id', columns='category', values='count').reset_index()
count_data_category_y.fillna(0, inplace=True)

In [None]:
count_data_category_y

In [None]:
#Code to get averages for yelp data
agg_yelp = comb_df_yelp.groupby(['station_id', 'category']).agg({
    'rating': 'mean',  # Calculate the mean of 'rating'
    'price': 'median',   # Calculate the median of 'price'
    'review_count': 'mean' # Calculate the mean of review count
}).reset_index()
pivot_yelp = agg_yelp.pivot(index='station_id', columns='category', values=['rating', 'price', 'review_count']).reset_index()
#Flatten the column index 
pivot_yelp.columns = [''.join(col).strip() for col in pivoted_data.columns.values]
#Fill missing values with 0
pivot_yelp.fillna(0, inplace=True)

In [None]:
pivot_yelp

In [None]:
characteristics_yelp = pd.merge(count_data_category_y, pivot_yelp, on='station_id')

In [22]:
#Repeating functions created to loop through the YELP results and get counts, highest rated and most popular venues for each category
def category_count_yelp(category):
    """
    Loop through the stations_df dataframe and return a list of counts of the number of items returned in the get_venues_yelp function that corresponds with the input category
    
    Args:
        category (str): the category string of what POI we are searching for

    Returns:
        response: count_of_category: a list of the number of POI's which match the category string from the get_venues_yelp function
    """
    c_counts = []
    for row in stations_df.itertuples(index=False):

        yelp_result = get_nearby_venues_yelp(row.latitude, row.longitude, 1000, category)
        norm_result = pd.json_normalize(yelp_result['businesses'])    
        c_counts.append(len(norm_result))
    return c_counts

In [23]:
def highest_rated_yelp(category):
    """
    Loop through the stations_df dataframe and return a list of the highest rated item returned in the get_venues_yelp function that corresponds with the input category
    
    Args:
        category (str): the category string of what POI we are searching for

    Returns:
        response: highest_rated: a list of the highest rated POI's which match the category string from the get_venues_yelp function
    """
    highest_rated = []
    for row in stations_df.itertuples(index=False):
        yelp_result = get_nearby_venues_yelp(row.latitude, row.longitude, 1000, category)
        norm_result = pd.json_normalize(yelp_result['businesses'])
        highest_rated_dict = {}
        if len(norm_result) > 0 and 'rating' in norm_result.columns:
            # Find the maximum rating
            max_rating = norm_result['rating'].max()
            # Find all venues with the maximum rating
            highest_rated_venue = norm_result[norm_result['rating'] == max_rating].iloc[0]
            highest_rated_dict = {highest_rated_venue['name']: highest_rated_venue['rating']}

            highest_rated.append(highest_rated_dict) 
        else:
            highest_rated.append(0)


    return highest_rated

In [24]:
def most_reviews_yelp(category):
    """
    Loop through the stations_df dataframe and return a list of the most reviewed item returned in the get_venues_yelp function that corresponds with the input category
    
    Args:
        category (str): the category string of what POI we are searching for

    Returns:
        response: most_reviews: a list of the most reviewed POI's which match the category string from the get_venues_yelp function
    """
    most_reviews = []
    for row in stations_df.itertuples(index=False):
        yelp_result = get_nearby_venues_yelp(row.latitude, row.longitude, 1000, category)
        norm_result = pd.json_normalize(yelp_result['businesses'])
        most_popular_dict = {}
        if len(norm_result) > 0 and 'review_count' in norm_result.columns:
            # Find the maximum review count
            max_review_count = norm_result['review_count'].max()
            # Find all venues with the maximum review count
            most_popular_venue = norm_result[norm_result['review_count'] == max_review_count].iloc[0]
            most_popular_dict = {most_popular_venue['name']: most_popular_venue['review_count']}
            most_reviews.append(most_popular_dict) 
        else:
            most_reviews.append(0)

    return most_reviews

In [51]:
#Testing the function 
test_yelp_func = most_reviews_yelp('hotels')
print(test_yelp_func)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [None]:
#stations_df['highest_rated_restaurant_yelp'] = highest_rated_yelp('restaurants')
#stations_df['most_reviews_restaurant_yelp'] = most_reviews_yelp('restaurants')


In [None]:

#stations_df['highest_rated_bar_yelp'] = highest_rated_yelp('bars')
#stations_df['most_reviews_bar_yelp'] = most_reviews_yelp('bars')

In [None]:

#stations_df['highest_rated_museum_yelp'] = highest_rated_yelp('museums')
#stations_df['most_reviews_museum_yelp'] = most_reviews_yelp('museums')


In [None]:
#stations_df_fs_and_yelp = stations_df.to_csv('stations_df_fs_and_yelp.csv', index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Get the top 10 restaurants according to their rating

In [11]:
top10 = pd.read_csv("gent_restaurants_fsq.csv")

In [18]:
top_10_1 = top10.groupby('name')['rating'].mean().sort_values(ascending=False)

In [21]:
top_10_restaurants = top_10_1.head(10)
print(top_10_restaurants)

name
Paul's Boutique                     9.4
Fou d'O                             9.4
Stefano's Place                     9.3
Kantien 13                          9.3
Eetoile                             9.2
De Frietketel                       9.2
Le Bal Infernal - used book café    9.2
Oats Day Long                       9.2
Soup'r                              9.2
De Kastart                          9.1
Name: rating, dtype: float64


In [47]:
#FROM BHOPAL DATASET 

top_10_restaurants = restaurants_fs.sort_values(by='rating', ascending=False).head(10)
#The top 10 restaurants within 1000 m of all city bike stations
top_10_restaurants

Unnamed: 0,name,rating
572,La Kuchina,8.1
1210,Hotel Jehanuma Palace,8.1
298,Wind and Waves,7.9
0,Sagar Gaire,7.5
2724,Machan,7.1
202,Indian Coffee House,6.8
399,Rainbow Treat,6.8
2435,Filfora,6.7
2584,Domino's Pizza,6.6
152,Domino's Pizza,6.5
