# Exploring Neighborhood in Toronto

In [1]:
import pandas as pd #import pandas library as pd
import numpy as np #import numpy library as np

In [2]:
print("Hello Capstone Project Course!")

Hello Capstone Project Course!


In [3]:
#Pandas' read_html function will traverse through the webpage looking for tabular data and convert tables into a list of dataframes. 
#Since the Canada postcodes page has only one table, our dataframe will be the first element of the list.
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]

In [4]:
df.rename(columns={'Postal code':'PostalCode'}, inplace=True)
df.head() #print first 5 rows

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [5]:
df = df[df.Borough != 'Not assigned'] #process the cells that have an assigned borough

In [6]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [7]:
#combine rows with same Postal code and more than one Neighborhood into one row with the neighborhoods separated with a comma
df.replace('/', ',', regex=True, inplace=True)

In [8]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park , Harbourfront"
5,M6A,North York,"Lawrence Manor , Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


In [9]:
#replace neighborhood having Non assigned value with the same as the Borough value
df['Neighborhood'] = df['Neighborhood'].replace('Not assigned', df['Borough'])

In [10]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park , Harbourfront"
5,M6A,North York,"Lawrence Manor , Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


In [11]:
df.shape

(103, 3)

In [12]:
latlan = 'http://cocl.us/Geospatial_data'
lldf = pd.read_csv(latlan)

In [13]:
lldf.rename(columns={'Postal Code':'PostalCode'}, inplace=True)
lldf.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [14]:
df = df.merge(lldf, how='inner') #get the latitude and the longitude coordinates of each neighborhood

In [15]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494


In [16]:
#!conda install -c conda-forge folium=0.5.0 --yes
import folium
print('Folium installed and imported!')

Folium installed and imported!


In [17]:
# Toronto latitude and longitude values
toronto_latitude = 43.651070
toronto_longitude = -79.347015

In [18]:
# create map of Toronto and display it
toronto_map = folium.Map(location=[toronto_latitude, toronto_longitude], zoom_start=12)
# display the map of Toronto
toronto_map

In [19]:
# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_map)  
    
toronto_map

In [20]:
#group the markers into different clusters
#These clusters can be thought of as pockets of Toronto which you can then analyze separately.
from folium import plugins
# let's start again with a clean copy of the map of Toronto
toronto_map = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 12)
# instantiate a mark cluster object for the neighborhood in the dataframe
neigh = plugins.MarkerCluster().add_to(toronto_map)
# loop through the dataframe and add each data point to the mark cluster
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=label,
    ).add_to(neigh)
# display map
toronto_map