In [None]:
# imports
import requests
import os
import pandas as pd
import json

In [None]:
# import stations data file, created in city_biles.ipynb
citybike_df = pd.read_json('../data/van_stations.json')

In [None]:
# filter dataframe for only latitude and longitude data
station_locations = citybike_df[['id', 'latitude', 'longitude']]

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [None]:
# set api key environment variable
fs_key = os.environ.get('FOURSQUARE_KEY')

In [None]:
# function to get venue information from foursquare api
def get_venues_fsq(latitude, longitude, radius, api_key, categories):
    url = f'https://api.foursquare.com/v3/places/search?ll={latitude},{longitude}&radius={radius}&categories={categories}'

    # create dictionary for headers
    headers = {"Accept": "application/json"}
    headers['Authorization'] = api_key
    
    # make api call
    response = requests.get(url, headers=headers)
    # check if response is successful
    if response.status_code == 200:
        return response.json()
    else:
        return response.raise_for_status()  # raise an HTTPError if the HTTP request returned an unsuccessful status code

In [None]:
# pass in station locations and get venue information

# set radius to 1000m from station
radius = 1000
# assign fsq catergories ids ('Dining & Drinking, 'Arts & Entertainment')
categories = '13000,10000'
fsq_venues_list = [] # list to venues dictionaries to be filled in loop
# loop through station locations and get venues
for index, row in station_locations.iterrows():
    # pass in each station location and get venues using get_venues_fs function
    fsq_venue = get_venues_fsq(row['latitude'], row['longitude'], radius, fs_key, categories)
    fsq_venues_list.append(fsq_venue)

In [None]:
# save venues as dataframe
fsq_response_df = pd.DataFrame(fsq_venues_list)

In [None]:
# # save venues dataframe as json file to avoid having to make api calls
fsq_response_df.to_json('../data/fsq_response.json')

In [None]:
# create df from van_fsq_venues.json to avoid calling api if notebook is restarted
fsq_response_df = pd.read_json('../data/van_fsq_venues.json')

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
# filter dataframe for selected columns

# initialize empty lists to store target venue information
fsq_venue_data_list = []

# interate through venues dataframe and extract venue information
for index, venues in fsq_response_df['results'].items():
    # iterate through the data for each venue
    for venue in venues:
        # extract venue info
        fsq_id = venue['fsq_id']
        name = venue['name']
        category = venue['categories'][0]['short_name'] if venue['categories'] else None
        # check if address exists, if not set to empty string (to avoid key error)
        address = venue['location'].get('address', '') if 'location' in venue else ''
        latitude = venue['geocodes']['main']['latitude']
        longitude = venue['geocodes']['main']['longitude']
        distance = venue['distance']

    # append data to the list as a dictionary
    fsq_venue_data_list.append({'fsq_id': fsq_id,
                       'name': name,
                       'category': category,
                       'address': address,
                       'latitude': latitude,
                       'longitude': longitude,
                       'distance': distance
    })

Put your parsed results into a DataFrame

In [None]:
# save venue list as dataframe, using fsq_id as index
fsq_df = pd.DataFrame(fsq_venue_data_list, index=None)

In [None]:
# export venue_info_df to json file
# fsq_df.to_json('../data/fsq_data.json')

# Yelp

In [None]:
# set api key environment variable
yelp_key = os.environ.get('YELP_KEY')

In [None]:
# import stations data file, created in city_biles.ipynb
citybike_df = pd.read_json('../data/citybikes_data.json')


# get venue information from yelp api
def get_venues(station_id, latitude, longitude, radius, api_key, category):
    url = 'https://api.yelp.com/v3/businesses/search'
    headers = {
        'Authorization': f'Bearer {api_key}'
    }
    parameters = {
        'latitude': latitude,
        'longitude': longitude,
        'radius': radius,
        'categories': category,
        'limit': 50
    }
    response = requests.get(url, headers=headers, params=parameters)
    if response.status_code == 200:
        data = response.json()
        data['station_id'] = station_id  # Add the station ID to each venue in the data
        return data['businesses']  # Return only the businesses part of the response
    else:
        print(f'Error for station: {station_id}')
        return None



venue_list = [] # Initialize empty list to store venues
radius = 1000 # Set radius to 1000m
category = 'restaurants' # Set category to restaurants
for station in citybike_df.itertuples(): # Iterate through the stations
    venues = get_venues(station.id, station.latitude, station.longitude, radius, yelp_key, category)
    if venues:  # Check if venues is not None
        for venue in venues:
            venue['station_id'] = station.id  # Ensure each venue has the station_id
        venue_list.extend(venues)  # Add the venues to the venue list

# errors for station_ids that did not return any venues:
# will remove these stations from the dataframe
'''
Error for station: b7dd37a7dd668d6d10024b7f18acc438
Error for station: cc25ae4f093b33ba0afd1dbc0dd20324
Error for station: db1e56a71a3279f51301d732ee5a055b
Error for station: 5726de7c5646b82f7aaeafba50aee03e
Error for station: cde61c70e5556ee2fe3ceb9d958b6482
Error for station: 852212d6d52b5f00544fad01d21c7db2
Error for station: 3dae5d936bb6b222d135eff52833e7f4
'''


In [None]:
yelp_response_df = pd.DataFrame(venue_list)

In [None]:
# save unfiltered dataframe to json to avoid having to call the API again
yelp_response_df.to_json('../data/yelp_response.json')

In [None]:
# create df from yelp_response.json to avoid calling api if notebook is restarted
yelp_response_df = pd.read_json('../data/yelp_response.json')

In [None]:
# create copy of yelp_response_df to normalize
yelp_df = yelp_response_df.copy()

# flatten categories dictionary into a list of title strings
yelp_df['categories'] = yelp_df['categories'].apply(lambda x: [d['title'] for d in x if 'title' in d])
# flatten coordinates dictionary into separate columns
yelp_df['latitude'] = yelp_df['coordinates'].apply(lambda x: x['latitude'])
yelp_df['longitude'] = yelp_df['coordinates'].apply(lambda x: x['longitude'])
# flatten address dictionary into first address1 string
yelp_df['address'] = yelp_df['location'].apply(lambda x: x['address1'])


# create new dataframe with only the columns we need
yelp_df = yelp_df[[
    'id', 'name', 'rating', 'review_count', 'price', 'categories',
    'latitude', 'longitude', 'address', 'distance', 'station_id'
]]

# rename columns for clarity
yelp_df.rename(columns={
    'id': 'yelp_venue_id', 'name': 'venue_name', 'station_id': 'nearest_station_id',
    'distance': 'venue_to_station_distance', 'address': 'venue_address',
    'longitude': 'venue_longitude', 'latitude': 'venue_latitude',
    'categories': 'restaurant_categories'
}, inplace=True)


In [None]:
# save normalized data to json for use in other notebooks
yelp_df.to_json('../data/yelp_data.json')

In [None]:
# restore yelp_df from json file if notebook is restarted
yelp_df = pd.read_json('../data/yelp_data.json')

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

In [None]:
yelp_df.shape

fsq_df.shape

### explanation:
- yelp provided more complete data
- I was unable to get fsq api to return a full list of venues, only returning one for each station
    - yelp returned 11,828 results, yelp returned 248 (same as number of unique stations coordinates)
    - Given more time, I would troubleshoot this until i was able to construct a proper dataset
- yelp provides more data per venue, this data is required for modeling
    - rating
    - review count
    - price
- moving forward, will be using yelp df for EDA, joining, database and modelling


Get the top 10 restaurants according to their rating

In [None]:
yelp_df[['venue_name', 'rating']].sort_values(by='rating', ascending=False).head(10)