# Assignment week 3 - part 2 - neighbourhoods in Toronto

In [3]:
import pandas as pd
import numpy as np
from urllib.request import urlopen
from bs4 import BeautifulSoup

## Part 1 (as previously submitted)

Get HTML and create BeautifulSoup object. Find the table within the page and strip the strings therein.

In [4]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
html = urlopen(url)
soup = BeautifulSoup(html, 'html')

In [5]:
# soup.table.find_all('td')
raw_postcodes = []
for string in soup.table.stripped_strings:
    raw_postcodes.append(string)

Create a dataframe with the right column names.

In [6]:
column_names = ['PostalCode','Borough','Neighborhood']
df_postcodes = pd.DataFrame(columns=column_names)

Fill each column with the key data from the scraped website. Remove rows where Borough is not assigned.

In [7]:
df_postcodes['PostalCode'] = raw_postcodes[3::3]
df_postcodes['Borough'] = raw_postcodes[4::3]
df_postcodes['Neighborhood'] = raw_postcodes[5::3]
df_postcodes = df_postcodes[df_postcodes.Borough != 'Not assigned']

Combine rows where postcodes are repeated. Aggregate data in the Neighborhood column. Replace any Neighborhoods which show as "Not Assigned" with the corresponding Borough name.

In [8]:
grouped_postcodes = df_postcodes.groupby(['PostalCode','Borough'], sort=False).agg(lambda x: ', '.join(x)).reset_index()
grouped_postcodes.loc[grouped_postcodes.Neighborhood == 'Not assigned', 'Neighborhood'] = grouped_postcodes.Borough
grouped_postcodes.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [9]:
grouped_postcodes.shape

(103, 3)

## Part 2 (as previously submitted)

Getting the latitude and the longitude coordinates of each neighborhood - and creating a new dataframe with combined data.

In [10]:
# import geocoder

Couldn't reliably get geocoder to work - using csv file instead (per instructions).

In [11]:
coords = pd.read_csv('../Geospatial_Coordinates.csv')
coords.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
grouped_postcodes_ll = grouped_postcodes.merge(coords, how='left', left_on='PostalCode', right_on='Postal Code')
grouped_postcodes_ll.drop(columns=['Postal Code'], inplace = True)
grouped_postcodes_ll.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


## Part 3

Explore and cluster the neighborhoods in Toronto. You can decide to work with only boroughs that contain the word Toronto and then replicate the same analysis we did to the New York City data. It is up to you.

Just make sure:

* to add enough Markdown cells to explain what you decided to do and to report any observations you make.
* to generate maps to visualize your neighborhoods and how they cluster together.

In [15]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium

What's the lat and long for Toronto? Find using geopy.

In [2]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


Create map of Toronto showing all postcodes. Using latitude + 0.5 to ensure all postcodes show and are centered nicely.

In [32]:
map_toronto = folium.Map(location=[latitude+0.05, longitude], zoom_start=11)

# add markers to map
for lat, lng, postcode in zip(grouped_postcodes_ll['Latitude'], 
                              grouped_postcodes_ll['Longitude'], 
                              grouped_postcodes_ll['PostalCode']):
    label = '{}'.format(postcode)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Decided to focus on boroughs that contain the word Toronto - so create new df on that basis and replot map.

In [35]:
focus_postcodes = grouped_postcodes_ll[grouped_postcodes_ll['Borough'].str.contains('Toronto')]
focus_postcodes.reset_index(inplace=True)
focus_postcodes.head()

Unnamed: 0,index,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
1,9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
2,15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,19,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [37]:
focus_postcodes.shape

(38, 6)

In [38]:
map_toronto = folium.Map(location=[latitude+0.02, longitude], zoom_start=12)

# add markers to map
for lat, lng, postcode in zip(focus_postcodes['Latitude'], 
                              focus_postcodes['Longitude'], 
                              focus_postcodes['PostalCode']):
    label = '{}'.format(postcode)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [40]:
# @hidden_cell

CLIENT_ID = 'FEQWDLEIPOQ3CQHSC5CJYGP3AZNLMISYWU320DIRXSOHH1WL' # your Foursquare ID
CLIENT_SECRET = 'IX4I4V02TVPD3PIQSHLINLVGMKGOP12F1QZCP3RBNXDBIB35' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FEQWDLEIPOQ3CQHSC5CJYGP3AZNLMISYWU320DIRXSOHH1WL
CLIENT_SECRET:IX4I4V02TVPD3PIQSHLINLVGMKGOP12F1QZCP3RBNXDBIB35
