In [165]:
import os
from dotenv import load_dotenv
import requests
import pandas as pd
import ast

## City Bikes Data

In [166]:
# Get all networks from CSV file
city_bikes = pd.read_csv('../data/city_bikes.csv')
# Print the coordinates for the networks
for i, station in city_bikes.head(1).iterrows():
    print('latitude :', station['latitude'], 'longitude :', station['longitude'])

latitude : 53.033019 longitude : 18.599727


In [167]:
# Load environment
load_dotenv()

False

# Foursquare

In [168]:
fsq_api_key = os.getenv('FOURSQUARE_API_KEY')
# Set FourSquare URL
fsq_url = 'https://api.foursquare.com/v3/places/search'
# Create dictionary for headers and add API KEY
fsq_headers = {'Accept': 'application/json', 'Authorization': fsq_api_key}


### Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [169]:
# Getting parameters for the api query string
def get_fsq_params(lat, lon):
    return {
        'll': f'{lat},{lon}',
        'radius': 1000,
        'categories': ','.join([
            '4d4b7105d754a06374d81259',  # Restaurant,
            '4bf58dd8d48988d116941735',  # Bar,
            '4bf58dd8d48988d1fa931735'  # Hotel
        ]),
        # 'limit': 10 # max 50
        'fields': 'name,categories,distance,rating,price'
    }

In [170]:
# Send API requests
def get_fsq_response(url, headers, params):
    return requests.get(url, headers=headers, params=params)

### Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [171]:
#FourSquare DataFrame
fsq_df = pd.DataFrame()
# Loop once to save on api usage
for i, station in city_bikes.hhead().iterrows():
    # Set params
    latitude, longitude = station['latitude'], station['longitude']
    fsq_params = get_fsq_params(latitude, longitude)
    # Get response
    fsq_response = get_fsq_response(fsq_url, fsq_headers, fsq_params)
    # Normalize response JSON to fit DataFrame
    df = pd.json_normalize(
        fsq_response.json(), record_path='results'
    )
    df = df.reindex(columns=['name', 'price', 'rating', 'distance'])
    df['latitude'] = latitude
    df['longitude'] = longitude

    fsq_df = pd.concat([fsq_df, df], ignore_index=True)
# Write the data to CSV - IDEALLY SHOULD APPEND  
fsq_df.to_csv('../data/fsq_search_data.csv', sep=',', index=False)

In [172]:
fsq_df.head()

Unnamed: 0,name,price,rating,distance,latitude,longitude
0,Montenegro,1.0,8.0,670,53.033019,18.599727
1,Weranda Chełmińska,2.0,7.5,842,53.033019,18.599727
2,Stara Paczkarnia,1.0,,25,53.033019,18.599727
3,KFC,1.0,6.3,803,53.033019,18.599727
4,Klif. Bar,2.0,,181,53.033019,18.599727


### Put your parsed results into a DataFrame

In [173]:
fsq_df = pd.read_csv('../data/fsq_search_data.csv')
# fsq_df.head()

# Yelp

In [174]:
# Get Yelp API Key
yelp_api_key = os.environ['YELP_API_KEY']
# Set Yelp URL
yelp_url = 'https://api.yelp.com/v3/businesses/search'
# Create dictionary for headers and add API KEY
yelp_headers = {
    'accept': 'application/json',
    'Authorization': f'Bearer {yelp_api_key}'
}

### Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [175]:
# Getting parameters for the api query string
def get_yelp_params(lat, lon):
    return {
        'latitude': lat,
        'longitude': lon,
        'radius': 1000,
        'limit': 10,
        'categories': 'Bars,Restaurants,Hotels'
    }

In [176]:
# Send API requests
def get_yelp_response(url, headers, params):
    return requests.get(url, headers=headers, params=params)

### Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [177]:
#Yelp DataFrame
yelp_df = pd.DataFrame()

for i, station in city_bikes.head(1).iterrows():
    # Set params
    latitude, longitude = station['latitude'], station['longitude']
    yelp_params = get_yelp_params(latitude, longitude)
    # Get response
    yelp_response = get_yelp_response(yelp_url, yelp_headers, yelp_params)
    # Normalize response JSON to fit DataFrame
    df = pd.json_normalize(
        yelp_response.json(), record_path='businesses'
    )
    df = df.reindex(columns=['name', 'price', 'rating', 'distance'])
    df['latitude'] = latitude
    df['longitude'] = longitude

    yelp_df = pd.concat([yelp_df, df], ignore_index=True)

# Write the data to CSV - IDEALLY SHOULD APPEND
yelp_df.to_csv('../data/yelp_search_data.csv', sep=',', index=False)

In [178]:
yelp_df.head()

Unnamed: 0,name,price,rating,distance,latitude,longitude
0,Cafe Molus,$,4.0,595.066046,53.033019,18.599727
1,Bar Klif,$,3.0,184.713994,53.033019,18.599727
2,La Grande,,4.0,328.194576,53.033019,18.599727
3,Presto,,4.5,516.357592,53.033019,18.599727
4,Azzurro,,4.0,278.106448,53.013489,18.571125


### Put your parsed results into a DataFrame

## FourSquare Data 

In [179]:
#Read from CSV
fsq_df = pd.read_csv('../data/fsq_search_data.csv')
# # Safely evaluate the string as a Python dictionary
# fsq_df['categories'] = fsq_df['categories'].apply(ast.literal_eval)
# # Explode the data by categories
# fsq_df = fsq_df.explode('categories')
# # Explode the dictionary
# categories = fsq_df['categories'].apply(pd.Series)
# # Rename the categories columns
# categories.columns = ['category.id', 'category.short', 'category.name', 'category.plural', 'category.img_url']
# # categories.head()
# # Replace categories column by columns
# fsq_df = pd.concat([fsq_df.drop(columns=['categories']), categories], axis=1)
# print(fsq_df.head(1))
# # Choose columns for EDA
# fsq_df = fsq_df[['name', 'price', 'rating', 'distance', 'category.plural', 'category.name']]
# fsq_df.head()

## YELP Data

In [180]:
#Read from CSV
yelp_df = pd.read_csv('../data/yelp_search_data.csv')
# # Safely evaluate the string as a Python dictionary
# yelp_df['categories'] = yelp_df['categories'].apply(ast.literal_eval)
# # Explode the data by categories
# yelp_df = yelp_df.explode('categories')
# # Explode the dictionary
# categories = yelp_df['categories'].apply(pd.Series)
# # Rename the categories columns
# categories.columns = ['category.alias', 'category.title']
# # Replace categories column by columns
# yelp_df = pd.concat([yelp_df.drop(columns=['categories']), categories], axis=1)
# # Choose columns for EDA
# yelp_df = yelp_df[['name', 'review_count', 'rating', 'price', 'distance', 'business_hours', 'category.title']]
# yelp_df.head()

# Comparing Results

### Which API provided you with more complete data? Provide an explanation? 

#### Each APIs have different approach to handle the data. The outcome of a query depends on various parameters. I wish I could test all the possible options

### Get the top 10 restaurants according to their rating

### FourSquare Top 10 by ranking 

In [181]:
# Remove duplicate rows
df = fsq_df.drop_duplicates().copy()
# print(df.head())
# Sort data by rating
df.sort_values(by='rating', ascending=False, inplace=True)
# Display top 10
df.head(10)
# Write the data to CSV - for EDA
df.to_csv('../data/fsq_for_EDA.csv', index=False)

### YELP Top 10 by ranking

In [182]:
# Remove duplicate rows
df = yelp_df.drop_duplicates().copy()
# Sort data by rating
df.sort_values(by='rating', ascending=False, inplace=True)
# Display top 10
df.head(10)
# Write the data to CSV - for EDA
df.to_csv('../data/yelp_for_EDA.csv', index=False)

## Testing Area

In [183]:
# Open and read the JSON file
# with open('fsq_search_response.json', 'r') as file:
#     data = json.load(file)
# data = pd.read_json('fsq_search_columns.json')
# data = data.explode('categories').reset_index(drop=True)
# data['categories'] = data['categories'].apply(lambda x: x['short_name'])
# data.drop([
#     'geocodes.drop_off.latitude',
#     'geocodes.drop_off.longitude',
#     'geocodes.roof.latitude',
#     'geocodes.roof.longitude',
# ], axis=1, inplace=True)
# data