### Import libraries

In [None]:
import pandas as pd 
import numpy as np
import json
from bs4 import BeautifulSoup
import requests

### Get NYC Neighborhoods data

In [None]:
with open('./Data/newyork_data.json') as json_data:
    nyc_data = json.load(json_data)
nyc_neighborhoods_data = nyc_data['features']

#Transform to a pandas dataframe
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
nyc_neighborhoods = pd.DataFrame(columns=column_names)
for data in nyc_neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    nyc_neighborhoods = nyc_neighborhoods.append({'Borough': borough,
                                                  'Neighborhood': neighborhood_name,
                                                  'Latitude': neighborhood_lat,
                                                  'Longitude': neighborhood_lon}, ignore_index=True)

print(nyc_neighborhoods.shape)
nyc_neighborhoods.head()

### Get Toronto Neighborhoods data

In [None]:
#Get Canada neighborhoods data from website
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
r  = requests.get(url)
canada_neighborhoods_data = r.text

soup = BeautifulSoup(canada_neighborhoods_data, 'html.parser')
table = soup.find('table').find_all('tr')

#Transform to a pandas dataframe
column_names = ['PostalCode', 'Borough', 'Neighborhood']
canada_neighborhoods = pd.DataFrame(columns=column_names)

for index, item in enumerate(table):
    if index > 0:
        data = item.find_all('td')
        postalcode = data[0].text
        borough = data[1].text
        neighborhood_name  = data[2].text.replace('\n', '')
        if neighborhood_name == 'Not assigned':
            neighborhood_name = borough
    
        canada_neighborhoods = canada_neighborhoods.append({'PostalCode': postalcode,
                                                              'Borough': borough,
                                                              'Neighborhood': neighborhood_name,
                                                              'Latitude': np.nan,
                                                              'Longitude': np.nan}, ignore_index=True)
canada_neighborhoods = canada_neighborhoods[canada_neighborhoods['Borough'] != 'Not assigned']
canada_neighborhoods = canada_neighborhoods.groupby(['PostalCode','Borough']).agg( ','.join).reset_index()

#Add coordinates columns
coords = pd.read_csv('./Data/Geospatial_Coordinates.csv')
canada_neighborhoods = canada_neighborhoods.set_index('PostalCode').join(coords.set_index('Postal Code'), how='inner').reset_index()

#Get Toronto neighborhoods data
toronto_neighborhoods = canada_neighborhoods[canada_neighborhoods['Borough'].str.contains('Toronto')].reset_index(drop=True)

print(toronto_neighborhoods.shape)
toronto_neighborhoods.head()

### Get venues data

In [None]:
# Define Foursquare Credentials and Version
CLIENT_ID = '5NMOAVIALBOSTNQVCJJRVWT0ZHT1URBDTWHOYBLIENYSQQDD' # your Foursquare ID
CLIENT_SECRET = 'R3NSNU03XXZQLAFGUG2JYKEYL1MM3Y5AX2O2X53HGJXY4ECW' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

In [None]:
LIMIT = 100

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [None]:
# Get NYC venues data
nyc_venues = getNearbyVenues(names=nyc_neighborhoods['Neighborhood'],
                             latitudes=nyc_neighborhoods['Latitude'],
                             longitudes=nyc_neighborhoods['Longitude']
                            )
nyc_venues.to_csv('./Data/nyc_venues.csv') #export to csv file

print(nyc_venues.shape)
nyc_venues.head()

In [None]:
# Get Toronto venues data
toronto_venues = getNearbyVenues(names=toronto_neighborhoods['Neighborhood'],
                                 latitudes=toronto_neighborhoods['Latitude'],
                                 longitudes=toronto_neighborhoods['Longitude']
                                )
toronto_venues.to_csv('./Data/toronto_venues.csv') #export to csv file

print(toronto_venues.shape)
toronto_venues.head()