In [None]:
# imports
import pybikes
import json
import pandas as pd
import requests
import copy
import os
import scipy

##### Setting up for success

In [None]:

# Read City Bike dataframe from csv store in a temporary dataframe referreed to here as 'data'
data = pd.read_csv('City_Bike.csv', index_col=0)

In [None]:
# Create lat-lon column for use in API requests
data['ll'] = data['latitude'].astype(str) + ',' + data['longitude'].astype(str)

In [None]:
# Create a deep copy that we can modify, incase something fails we can evert back to the original dataframe
df = copy.deepcopy(data)

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

> setting up for success

In [None]:
# get latitute - longitude from city bike dataframe for use in the the python fucntion we are going to be using
location = df.ll.tolist()

In [None]:
# create a function that takes in a list of cordinates as an argument, sends request to four square and returns a dataframe
# with list of points of intrest per coordinate
def locate_point_of_I(some_list):
    '''function that takes in a list of cordinates as an argument, sends request to four square and returns a dataframe
# with list of points of intrest per coordinate'''
    list_of_places = []
    for ll1 in some_list: 
# get request
        url = f'https://api.foursquare.com/v3/places/search?query=Bar&ll={ll1}&radius=1000&fields=fsq_id%2Cname%2Clocation%2Ccategories%2Cpopularity%2Cprice%2Crating&sort=POPULARITY'
        #api_key = os.environ["FOURSQUARE_API_KEY"]
        headers = {
            "accept": "application/json",
            "Authorization": 'fsq3zWaQ/44fYj08FZtEaJdlElEGfyy/K44oZt8FMGs1YIg='
                }
# Parse through the response to get the POI 
        response = requests.get(url, headers=headers).json()
#iterate through response and pass response for the following parameters to a pandas df
        for poi in response['results']:
            categories = poi.get('categories', None)
            cat_id = categories[0]['id'] if len(categories) > 0 else None
            category_name = categories[0]['name'] if len(categories) > 0 else None

            places = {
                'll': f'{ll1}'
                , 'fsq_id': poi['fsq_id']
                , 'cat_id': cat_id
                , 'category_name': category_name
                , 'categories': categories
                , 'name': poi.get('name', None)
                , 'address': poi.get('location', {}).get('formatted_address', None)
                , 'city': poi.get('location', {}).get('locality', None)
                , 'country': poi.get('location', {}).get('country', None)
                , 'rating': poi.get('rating', None)
                , 'popularity': poi.get('popularity', None)
                , 'price': poi.get('price', None)

            }
            list_of_places.append(places)
    return pd.DataFrame(list_of_places)
# return list_of_places

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
# This step is already included in the function above

Put your parsed results into a DataFrame

In [None]:
# Create new dataframe for foursquare here reffered to as dataf
# doing this by calling the 'locate_point_of_I' function and passing in the location list (coordinates from the city_bike data frame) 
dataf = pd.DataFrame(locate_point_of_I(some_list = location))

In [None]:
# Create a clean copy
FourSquare_df = copy.deepcopy(dataf)

In [None]:
# Export Dataframe to CSV
FourSquare_df.to_csv('FourSquare_df.csv')

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

> setting up for success

In [None]:
# set API key
YelpAPI = os.environ.get('YELPAPI')

In [None]:
# create a list of indexes, latitudes, and longitudes from bike stations to be used in the python program

bike_stations_for_yelp = df.loc[:,['latitude', 'longitude']]

In [None]:
# set index range from city bike dataframe
bike_station_index = range(0,234)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
#Define a function that will take a dataframe formatted with columns as index, latitude and longitude
def get_yelp_poi(data_frame_index):
    '''function that connects to yelp API, queries the api and returns a pandas dataframe with details like price, rating, etc for points of interest on Yelp'''
    list_of_places = []

    #loop that goes through the indexes and returns lat and lon to be used in the body of the function
    for index in data_frame_index:
            lat = bike_stations_for_yelp.iloc[index,0]
            lon = bike_stations_for_yelp.iloc[index,1]
            
            url = f'https://api.yelp.com/v3/businesses/search?latitude={lat}&longitude={lon}&radius=500&categories=Bar&categories=Resturant&categories=Coffee&categories=Park&price=1&price=2&price=3&price=4&fields=id%2Cname%2Coordinates%2Ctransactions%2Ccategories%2Clocation%2Cprice%2Crating%2Creview_count&sort=review_count'
            key = YelpAPI
            headers = {
                "accept": "application/json",
                'Authorization' : YelpAPI
            }
        
        # get request    
            response = requests.get(url, headers=headers).json()

        # define varriables in the json that i need
            print (response) # so i can see whats bring populated
            for poi in response['businesses']:
                    categories = poi['categories'][0] if len(poi['categories']) > 0 else None
                    category_name = poi['categories'][0].get('title') if len(categories) > 0 else None
                    business_id = poi.get('id') if len(categories) > 0 else None
                    business_name = poi.get('name') if len(categories) > 0 else poi.get('alias')
                    try: 
                        price_value = len(poi.get('price'))
                    except: 
                        price_value = 0

        #columns to be populated 
                    places = {
                        'lat': f'{lat}'
                        ,'lat': f'{lon}'
                        , 'business_id': business_id
                        , 'category_name': category_name
                        #, 'categories': categories
                        , 'name': business_name
                        , 'address': poi.get('location', {}).get('display_address', None)
                        , 'city': poi.get('location', {}).get('city', None)
                        , 'state': poi.get('location', {}).get('state', None)
                        , 'country': poi.get('location', {}).get('country', None)
                        , 'transactions': poi.get('transactions', None)
                        , 'reviews': poi.get('review_count', None)
                        , 'rating': poi.get('rating', None)
                        , 'price_scale': price_value
                        , 'price': poi.get('price', None)
                        , 'distance_4rm_bike' : round(poi.get('distance'))
                    }
                    list_of_places.append(places)
    return pd.DataFrame(list_of_places)

Put your parsed results into a DataFrame

In [None]:
# create a dataframe by calling the 'get_yelp_poi' function and pass bike station index as a argument

df_yelp = pd.DataFrame(get_yelp_poi(data_frame_index = bike_station_index))

In [None]:
# Create a copy so we can revert back to original dataframe if an error occurs
Yelp_df = copy.deepcopy(df_yelp)

In [None]:
# Expoort dataframe to CSV 
Yelp_df.to_csv('Yelp_df.csv')

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

> Overall the yelp dataframe is more rich - more observations, and additional data like price rating, reviews and no null/ nan values (seems yelp put alot of effort into maintaining the api but also judging from the doccumentation and different types of querries you can do). This is especially important depending on the use case of the data. If further analysis is going to be done like conducting regression modeling, rich data is important and having to drop/ fill for a large number of observations is taxing, but might also introduce alot of bias or noise in the dataset. 

Get the top 10 restaurants according to their rating

In [None]:
# Four Square
dataf.sort_values(by=['rating'])[:10]

In [None]:
#Yelp
Yelp_df.sort_values(by=['rating'])[:10]