In [3]:
# imports
import requests
import os
import pandas as pd
import numpy as np

#read in the data from part 1 and store it as a Data Frame
bike_stations_df = pd.read_csv('/Users/skylerwilson/Desktop/Lighthouse_Labs/Projects/statistical_modeling/data/stations_data.csv')
bike_stations_df.head()

Unnamed: 0,name,latitude,longitude,free bikes
0,Lille Schous plass,59.920259,10.760629,6
1,Sjølyst,59.921673,10.67666,15
2,Tøyenparken,59.915667,10.777567,4
3,Lindern,59.935888,10.735006,0
4,Uelands gate,59.929545,10.748986,10


In [2]:
print(len(bike_stations_df))

263


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [117]:
def get_poi(latitude, longitude):
    api_key = os.environ["FOURSQUARE_API_KEY"]
    base_url = "https://api.foursquare.com/v3/places/search?"
    radius = 1000
    category = ['13065', 
        '10004', 
        '13026', 
        '13027', 
        '13030', 
        '13049',
        '13006',
        ]  # Restaurant category
    limit = 50
    #set the fields to be returned by the request
    fieldnames = [
        'fsq_id',
        'categories',
        'name',
        'location',
        'geocodes',
        'tel',
        'website',
        'rating',
        'price',
    ]
    headers = {
        "accept": "application/json",
        "Authorization": api_key
    }
    
    # Ensure fields are joined correctly
    fields = '%2C'.join(fieldnames)
    categories = '%2C'.join(category)
    
    # Construct the API request URL
    url = f"{base_url}ll={latitude}%2C{longitude}&radius={radius}&categories={categories}&fields={fields}&limit={limit}"

    # a session object allows certain parameters and settings to persist
    # across multiple requests made with that session
    with requests.Session() as session:
        try:
            # Send the request and handle errors
            response = session.get(url, headers=headers)
            response.raise_for_status()
            
            # Parse the JSON response
            data = response.json()
            return data
        except requests.exceptions.RequestException as e:
            print(f"Request Error: {e}")
        except ValueError as e:
            print(f"Error parsing response JSON: {e}")
    return None

#function takes the longitude and latitude data from the Get request and matches it with the longitude and latitude in the data frame
def get_poi_data(data_frame):
    locations = data_frame[['latitude', 'longitude']]
    
    # Apply get_poi function to each row in the DataFrame
    pois_list = locations.apply(lambda row: get_poi(row['latitude'], row['longitude']), axis=1)
    
    # Create an empty list to store the results
    results = []
    
    for poi_data in pois_list:
        if poi_data and 'results' in poi_data:
            # Extend the results list with the results from each row
            results.extend(poi_data['results'])
    
    return results

data = get_poi_data(bike_stations_df)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [130]:
#generate the first list of data in the json string
results = data[0]


#parse the information stored in result
business_id = results.get('fsq_id', {})
name = results.get('name', None)
category_id = results.get('categories', {})[0].get('id', None)
category = results.get('categories', {})[0].get('name', None)
address = results.get('location', {}).get('formatted_address', None)
latitude = results.get('geocodes', {}).get('main', {}).get('latitude', None)
longitude = results.get('geocodes', {}).get('main', {}).get('longitude', None)
hours = results.get('hours', {}).get('regular', None)
tel = results.get('tel', None)
website = results.get('website', None)
rating = results.get('rating', None)

#print the parsed info to ensure correctness
print(f'ID: {business_id}\nName: {name}\nCategory: {category}\nLocation: {address}\nTelephone: {tel}\nRating: {rating}')



ID: 57f53ed7498e23279caaed8e
Name: Txotx
Category: Bar
Location: Trondheimsveien 2, 0560 Oslo
Telephone: 921 69 062
Rating: 9.1


Put your parsed results into a DataFrame

In [140]:
#create the database
data_list = {
    'business_id': [],
    'name': [],
    'category_id': [],
    'category': [],
    'address': [],
    'latitude': [],
    'longitude': [],
    'tel': [],
    'rating': []
}

for result in data:
    data_list['business_id'].append(result.get('fsq_id', {}))
    data_list['name'].append(result.get('name', None))
    data_list['category_id'].append(result.get('categories', {})[0].get('id', None))
    data_list['category'].append(result.get('categories', {})[0].get('name', None))
    data_list['address'].append(result.get('location', {}).get('formatted_address', None))
    data_list['latitude'].append(result.get('geocodes', {}).get('main', {}).get('latitude', None))
    data_list['longitude'].append(result.get('geocodes', {}).get('main', {}).get('longitude', None))
    data_list['tel'].append(result.get('tel', None))
    data_list['rating'].append(result.get('rating', None))

foursquare_results_df = pd.DataFrame(data_list)

In [141]:
#remove the duplicates
foursquare_results_df = foursquare_results_df.drop_duplicates()
foursquare_results_df.head()


Unnamed: 0,business_id,name,category_id,category,address,latitude,longitude,tel,rating
0,57f53ed7498e23279caaed8e,Txotx,13003,Bar,"Trondheimsveien 2, 0560 Oslo",59.919337,10.759947,921 69 062,9.1
1,4d6803f3709bb60c63f9b014,Le Benjamin Bar & Bistro,13148,French Restaurant,Søndre gate 6 (mellom Markveien & Thorvald Mey...,59.91875,10.758048,22 35 79 44,9.4
2,547a5140498e356dc652cfb7,Territoriet,13025,Wine Bar,"Markveien 58, 0550 Oslo",59.918887,10.75756,950 23 894,8.8
3,4bd1eaae77b29c74bb8f8d82,Glød,13003,Bar,"Thorvald Meyers gate 70 A (Korsgata), 0552 Oslo",59.920272,10.759572,930 03 674,8.2
4,5547ba21498e3c063aa9a7ba,Bd57,13006,Beer Bar,"Markveien 57, 0550 Oslo",59.91991,10.757172,923 11 315,8.4


In [142]:
foursquare_results_df.to_csv('foursquare_results.csv', index=False)

In [18]:
test_subset = bike_stations_df[:5]
test_subset

Unnamed: 0,name,latitude,longitude,free bikes
0,Lille Schous plass,59.920259,10.760629,6
1,Sjølyst,59.921673,10.67666,15
2,Tøyenparken,59.915667,10.777567,4
3,Lindern,59.935888,10.735006,0
4,Uelands gate,59.929545,10.748986,10


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [54]:
def yelp_poi(latitude, longitude):
    yelp_api_key = os.environ["YELP_API_KEY"]
    limit = 50
    rating = 'rating'
    category = 'restaurants'
    radius = 1000
    base_url = "https://api.yelp.com/v3/businesses/search?"
    
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer " + yelp_api_key
    }
    
    url = f"{base_url}latitude={latitude}&longitude={longitude}&radius={radius}&categories={category}&sort_by={rating}&limit={limit}"
    
    with requests.Session() as session:
        try:
            response = session.get(url, headers=headers)
            response.raise_for_status()
            yelp_data = response.json()
            return yelp_data
        except requests.exceptions.RequestException as e:
            print(f"Request Error: {e}")
        except ValueError as e:
            print(f"Error parsing response JSON: {e}")
    return None

def yelp_poi_data(data_frame):
    locations = data_frame[['latitude', 'longitude']]
    
    yelp_poi_data = locations.apply(lambda row: yelp_poi(row['latitude'], row['longitude']), axis=1)

    # Create an empty list to store the results
    results = []
    
    for poi_data in yelp_poi_data:
        if poi_data and 'businesses' in poi_data:
            # Extend the results list with the results from each row
            results.extend(poi_data['businesses'])
    
    return results

[{'id': '0_2TaPyt9IR2wfnSqqGSRw',
  'alias': 'gazakjøkken-oslo',
  'name': 'GazaKjøkken',
  'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/l__KJdHZe4CMfj7hPfn1Kg/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/gazakj%C3%B8kken-oslo?adjust_creative=UDu3d2zjN5n7VuNbOisggA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=UDu3d2zjN5n7VuNbOisggA',
  'review_count': 5,
  'categories': [{'alias': 'kebab', 'title': 'Kebab'},
   {'alias': 'mediterranean', 'title': 'Mediterranean'},
   {'alias': 'mideastern', 'title': 'Middle Eastern'}],
  'rating': 5.0,
  'coordinates': {'latitude': 59.9144524318312, 'longitude': 10.7471727932207},
  'transactions': [],
  'price': '$',
  'location': {'address1': 'Møllergata 10',
   'address2': '',
   'address3': '',
   'city': 'Oslo',
   'zip_code': '0179',
   'country': 'NO',
   'state': '03',
   'display_address': ['Møllergata 10', '0179 Oslo', 'Norway']},
  'phone': '+4796728610',
  'display_phone': '+47 967 28 61

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [41]:
results = yelp_poi_data(test_subset)
#parse the information stored in result
for result in results:
    # Parse the information stored in each result
    id = result.get('id', None)
    name = result.get('name', None)
    address = result.get('location', {}).get('display_address', None)
    latitude = result.get('coordinates', {}).get('latitude', None)
    longitude = result.get('coordinates', {}).get('longitude', None)
    tel = result.get('phone', None)
    rating = result.get('rating', None)

    print(f'ID: {id}\nName: {name}\nLocation: {address}\nLatitude: {latitude}\nLongitude: {longitude}\nTelephone: {tel}\nRating: {rating}')
    print('---')

ID: 0_2TaPyt9IR2wfnSqqGSRw
Name: GazaKjøkken
Location: ['Møllergata 10', '0179 Oslo', 'Norway']
Latitude: 59.9144524318312
Longitude: 10.7471727932207
Telephone: +4796728610
Rating: 5.0
---
ID: cAG1s3UZosdF9e7WZzGMpg
Name: Picasso Pizza
Location: ['Sannergata 30', '0557 Oslo', 'Norway']
Latitude: 59.9288559
Longitude: 10.7651739
Telephone: 
Rating: 5.0
---
ID: _udObTLAN6eJe35wTzt0sA
Name: Stangeriet
Location: ['Vulkan 5', '0178 Oslo', 'Norway']
Latitude: 59.9223593
Longitude: 10.7516904
Telephone: +4790418454
Rating: 5.0
---
ID: aWTw_nG2eQmakTX_VumWrg
Name: Syverkiosken
Location: ['Maridalsveien 45 B', '0175 Oslo', 'Norway']
Latitude: 59.9283791
Longitude: 10.75243
Telephone: 
Rating: 5.0
---
ID: sEOoYxHVmxYN6_ZzZfkcJA
Name: Vesuvio Café
Location: ['Bjerregaards gate 29 B', '0172 Oslo', 'Norway']
Latitude: 59.92502
Longitude: 10.74574
Telephone: +4791878986
Rating: 5.0
---
ID: RpFugceRsP1bIUf-Rww5BA
Name: La Villa Restaurant
Location: ['Tøyengata 2', '0190 Oslo', 'Norway']
Latitude: 59

Put your parsed results into a DataFrame

In [55]:
results = yelp_poi_data(bike_stations_df)
yelp_data_list = {
    'business_id': [],
    'name': [],
    'address': [],
    'latitude': [],
    'longitude': [],
    'tel': [],
    'rating': [],
}
# Iterate over the results and append data to the corresponding lists
for result in results:
    business_id = result.get('id', None)
    name = result.get('name', None)
    address = result.get('location', {}).get('display_address', None)
    latitude = result.get('coordinates', {}).get('latitude', None)
    longitude = result.get('coordinates', {}).get('longitude', None)
    tel = result.get('phone', None)
    rating = result.get('rating', None)

    yelp_data_list['business_id'].append(business_id)
    yelp_data_list['name'].append(name)
    yelp_data_list['address'].append(address)
    yelp_data_list['latitude'].append(latitude)
    yelp_data_list['longitude'].append(longitude)
    yelp_data_list['tel'].append(tel)
    yelp_data_list['rating'].append(rating)

# Create a DataFrame from the data dictionary
yelp_results_df = pd.DataFrame(yelp_data_list)

Request Error: 429 Client Error: Too Many Requests for url: https://api.yelp.com/v3/businesses/search?latitude=59.919001&longitude=10.692321&radius=1000&categories=restaurants&sort_by=rating&limit=50
Request Error: 429 Client Error: Too Many Requests for url: https://api.yelp.com/v3/businesses/search?latitude=59.90294292465149&longitude=10.698048967006343&radius=1000&categories=restaurants&sort_by=rating&limit=50
Request Error: 429 Client Error: Too Many Requests for url: https://api.yelp.com/v3/businesses/search?latitude=59.929005&longitude=10.7496755&radius=1000&categories=restaurants&sort_by=rating&limit=50
Request Error: 429 Client Error: Too Many Requests for url: https://api.yelp.com/v3/businesses/search?latitude=59.919252&longitude=10.774712&radius=1000&categories=restaurants&sort_by=rating&limit=50
Request Error: 429 Client Error: Too Many Requests for url: https://api.yelp.com/v3/businesses/search?latitude=59.939025551395765&longitude=10.723002619643406&radius=1000&categories=

In [60]:
#yelp_results_df.drop_duplicates()
yelp_results_df['address'] = yelp_results_df['address'].apply(lambda x: ', '.join(map(str, x)))
yelp_results_df.head()

In [63]:
yelp_results_df.drop_duplicates()
yelp_results_df.head(20)

Unnamed: 0,business_id,name,address,latitude,longitude,tel,rating
0,0_2TaPyt9IR2wfnSqqGSRw,GazaKjøkken,"Møllergata 10, 0179 Oslo, Norway",59.914452,10.747173,4796728610.0,5.0
1,cAG1s3UZosdF9e7WZzGMpg,Picasso Pizza,"Sannergata 30, 0557 Oslo, Norway",59.928856,10.765174,,5.0
2,_udObTLAN6eJe35wTzt0sA,Stangeriet,"Vulkan 5, 0178 Oslo, Norway",59.922359,10.75169,4790418454.0,5.0
3,aWTw_nG2eQmakTX_VumWrg,Syverkiosken,"Maridalsveien 45 B, 0175 Oslo, Norway",59.928379,10.75243,,5.0
4,RpFugceRsP1bIUf-Rww5BA,La Villa Restaurant,"Tøyengata 2, 0190 Oslo, Norway",59.91278,10.76391,4722177111.0,5.0
5,sEOoYxHVmxYN6_ZzZfkcJA,Vesuvio Café,"Bjerregaards gate 29 B, 0172 Oslo, Norway",59.92502,10.74574,4791878986.0,5.0
6,U_ju2nIwYmcmGBOBJuSJHw,Girotondo,"Torggata 13, 0181 Oslo, Norway",59.915128,10.750021,4723653270.0,5.0
7,VwSOI__CV48zf39tj3OrDA,Champagneria Bodega,"Vulkan 5, 0178 Oslo, Norway",59.922345,10.752224,4721627575.0,4.5
8,syrg9fvCsKOVSHvty5xs3g,Tim Wendelboe,"Grüners Gate 1, 0552 Oslo, Norway",59.923394,10.755638,4794431627.0,4.5
9,_HU--ykNKoRUj2gRuUNlPA,Haralds Vaffel,"Olaf Ryes plass 3, 0552 Oslo, Norway",59.922218,10.758081,4792223230.0,4.5


In [64]:
#save as a csv file
yelp_results_df.to_csv('yelp_results.csv', index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

In [None]:
print(f"length of yelp data frame: {len(yelp_results_df)}\nlength of foursquare data frame: {len(foursquare_results_df)}")

The Foursquare API provides more robust information and was much more targetted because i could add the use fields to get the exact sata that i wanted to return without the need to parse the json file nearly as much as I had to with Yelp. Plus the API request returned more useful data to begin with. However, the format of Yelps data from the API was much easier to parse which made accessing the relevent information far less difficult. Also as seen by the code cell above, the yelp API returned far more data even after removing the duplicates. 

Get the top 10 restaurants according to their rating

In [83]:
foursquare = foursquare_results_df.sort_values(by='rating', ascending=False)[:10]
foursquare


Unnamed: 0,business_id,name,address,latitude,longitude,tel,website,rating,popularity
1,13148,Le Benjamin Bar & Bistro,Søndre gate 6 (mellom Markveien & Thorvald Mey...,59.91875,59.91875,22 35 79 44,http://lebenjamin.no,9.4,0.944264
30,13035,Supreme Roastworks,"Thorvald Meyers gate 18, 0555 Oslo",59.928147,59.928147,22 71 42 02,http://www.supremeroastworks.no,9.4,0.99869
33,13003,Oslo Mekaniske Verksted,"Tøyenbekken 34 (Joachim Nielsens Gang), 0188 Oslo",59.911306,59.911306,452 37 534,http://www.oslomekaniskeverksted.no,9.3,0.993123
810,13336,Cru,"Ingelbrecht Knudssøns gate 1 (Industrigata), 0...",59.927402,59.927402,23 98 98 98,http://cru.no,9.2,0.993949
256,13035,Paradis Gelateria,"Lille Stranden 4, 0252 Oslo",59.908351,59.908351,22 83 83 00,http://www.iskrembar.no,9.2,0.984421
11,13003,Crowbar & Bryggeri,"Torggata 32, 0183 Oslo",59.91711,59.91711,21 38 67 57,http://tealounge.no,9.2,0.996086
0,13003,Txotx,"Trondheimsveien 2, 0560 Oslo",59.919337,59.919337,921 69 062,http://www.txotx.no,9.1,0.959516
1048,13377,Nordvegan,"Kristian Iv's gate 15, 0164 Oslo",59.915649,59.915649,969 11 167,https://www.nordvegan.com,9.1,0.976234
802,13276,Izakaya Oslo,"St. Olavs gate 7, 0165 Oslo",59.918259,59.918259,463 45 679,http://izakayaoslo.com,9.1,0.95175
898,13034,Pust,"Slemdalsveien 1, 0369 Oslo",59.930398,59.930398,408 00 014,http://pustkaffebar.no,9.1,0.985637
