In [74]:

import lxml
from bs4 import BeautifulSoup
import folium  # map rendering library
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
import matplotlib.colors as colors
import matplotlib.cm as cm
# tranform JSON file into a pandas dataframe
from pandas.io.json import json_normalize
import requests  # library to handle requests
# convert an address into latitude and longitude values
from geopy.geocoders import Nominatim
import json  # library to handle JSON files
import pandas as pd  # library for data analsysis
import numpy as np  # library to handle data in a vectorized manner
print('Importing Libraries')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
print('.')
print('.')
# Matplotlib and associated plotting modules
# import k-means from clustering stage
print('.')
print('Done')


Importing Libraries
.
.
.
Done


In [75]:
# downloads and parses data (uses older version of wiki page for proper formatting)
r = requests.get(
    'https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=876823784', timeout=5.00)
soup = BeautifulSoup(r.text, 'html.parser')
table = soup.find('table', attrs={'class': 'wikitable sortable'})

# retrieves page headers
headers = table.find_all('th')
for i, head in enumerate(headers):
    headers[i] = str(headers[i]).replace(
        "<th>", "").replace("</th>", "").replace("\n", "")


# finds all elements and skips first in table
rows = table.find_all('tr')
rows = rows[1:len(rows)]

# used to skip meta symbols and line feeds between rows
for i, row in enumerate(rows):
    rows[i] = str(rows[i]).replace(
        "\n</td></tr>", "").replace("<tr>\n<td>", "")

# creates dataframe, expands rows, and drops old row
df = pd.DataFrame(rows)
df[headers] = df[0].str.split("</td>\n<td>", n=2, expand=True)
df.drop(columns=[0], inplace=True)

# skip not assigned boroughs:
df = df.drop(df[(df.Borough == "Not assigned")].index)

# give "Not assigned" Neighborhoods same name as Borough:
df.Neighbourhood.replace("Not assigned", np.nan, inplace=True)


# copy Borough value to Neighborhood if NaN:
df.Neighbourhood.fillna(df.Borough, inplace=True)
# drop duplicate rows:
df = df.drop_duplicates()

# extract titles from columns
df.update(
    df.Neighbourhood.loc[
        lambda x: x.str.contains('title')
    ].str.extract('title=\"([^\"]*)', expand=False))

df.update(
    df.Borough.loc[
        lambda x: x.str.contains('title')
    ].str.extract('title=\"([^\"]*)', expand=False))

# delete Toronto annotation from Neighbourhood:
df.update(
    df.Neighbourhood.loc[
        lambda x: x.str.contains('Toronto')
    ].str.replace(", Toronto", ""))
df.update(
    df.Neighbourhood.loc[
        lambda x: x.str.contains('Toronto')
    ].str.replace("\(Toronto\)", ""))

# combine multiple neighborhoods with the same post code
df2 = pd.DataFrame({'Postcode': df.Postcode.unique()})
df2['Borough'] = pd.DataFrame(list(set(
    df['Borough'].loc[df['Postcode'] == x['Postcode']])) for i, x in df2.iterrows())
df2['Neighborhood'] = pd.Series(list(set(
    df['Neighbourhood'].loc[df['Postcode'] == x['Postcode']])) for i, x in df2.iterrows())
df2['Neighborhood'] = df2['Neighborhood'].apply(lambda x: ', '.join(x))
df2.dtypes

df2.head()


  df.Neighbourhood.loc[


Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park (Toronto),Queen's Park


In [76]:
# add Geo-spatial data
dfll = pd.read_csv("TO_geospatial_data.csv")
dfll.rename(columns={'Postal Code': 'Postcode'}, inplace=True)
dfll.set_index("Postcode")
df2.set_index("Postcode")
toronto_data = pd.merge(df2, dfll)
toronto_data.head()


Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park (Toronto),Queen's Park,43.662301,-79.389494


In [77]:
address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinates for Toronto ON are latitude: {}, longitute: {}.'.format(
    latitude, longitude))


The geographical coordinates for Toronto ON are latitude: 43.6534817, longitute: -79.3839347.


In [78]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)

map_toronto


In [79]:
CLIENT_ID = 'GA5LNQMSFSRYYWKSU3FX4ITIVOFNSXXACIHKE3PAHSYI3DNB'  # your Foursquare ID
# your Foursquare Secret
CLIENT_SECRET = 'SFXBL03XIKKFYFMSBZTK1Q0GE2OXEGBZWWN4QIMMN32S3PT5'
VERSION = '20220409'  # Foursquare API version

print('Foursquare API Credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


Foursquare API Credentials:
CLIENT_ID: GA5LNQMSFSRYYWKSU3FX4ITIVOFNSXXACIHKE3PAHSYI3DNB
CLIENT_SECRET:SFXBL03XIKKFYFMSBZTK1Q0GE2OXEGBZWWN4QIMMN32S3PT5


In [80]:
toronto_data.loc[0, 'Neighborhood']


'Parkwoods'

In [81]:
# neighborhood latitude value
neighborhood_latitude = toronto_data.loc[0, 'Latitude']
# neighborhood longitude value
neighborhood_longitude = toronto_data.loc[0, 'Longitude']

neighborhood_name = toronto_data.loc[0, 'Neighborhood']  # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name,
                                                               neighborhood_latitude,
                                                               neighborhood_longitude))


Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


In [82]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v3/places/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

response = requests.request("GET", url)

print(response.text)

{"meta":{"code":410,"errorType":"deprecated","errorDetail":"Usage of the V2 Places API has been deprecated for new Projects. Please see our updated documentation for V3 for more details: https:\/\/docs.foursquare.com\/reference","requestId":"6252323d10c69b605073d202"},"response":{}}
