In [138]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize
from pandas.io.html import read_html
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

Solving environment: | ^C
failed

CondaError: KeyboardInterrupt



Scraping the Toronto Postal Codes table from wikipedia

In [163]:
page = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wikitables = read_html(page, attrs= {"class":"wikitable"})

In [164]:
df_toronto = wikitables[0]

In [165]:
df_toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


Dropping the rows that do not have a Borough assigned

In [166]:
indices = df_toronto[df_toronto["Borough"] == "Not assigned"].index

In [167]:
df_toronto.drop(indices, inplace =True)

In [168]:
df_toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


We need to assign values to Neighbourhoods that have a Not assigned as their values to their Boroughs

In [169]:
i = 0 
lim = df_toronto.shape[0]
while i < lim:
    if df_toronto['Neighbourhood'].iloc[i] == 'Not assigned':
        df_toronto['Neighbourhood'].iloc[i] = df_toronto['Borough'].iloc[i]
    i = i + 1

We group the data by Postcode and Borough so that all the neighbourhoods with the same postcode can be in the same row

In [170]:
toronto = df_toronto.groupby(['Postcode','Borough'],as_index=False, sort=False).agg( ','.join)

In [171]:
toronto.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park
5,M9A,Queen's Park,Queen's Park
6,M1B,Scarborough,"Rouge,Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens,Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson,Garden District"


In [172]:
print('The number of rows in the new DataFrame is' ,toronto.shape[0])

The number of rows in the new DataFrame is 103


We read in the longitude and latitude values of each postal code and merge it with our existing dataframe 

In [173]:
longlats = pd.read_csv('https://cocl.us/Geospatial_data')

In [174]:
longlats.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [175]:
longlats.rename(columns={'Postal Code':'Postcode'}, inplace = True)

In [176]:
result = pd.merge(toronto, longlats, on='Postcode')

In [177]:
result.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494


In [178]:
result.shape

(103, 5)

We group our Boroughs together to analyse the number of Neighbourhoods in each Borough

In [179]:
r_1 = result.groupby('Borough', as_index = True).count()

In [180]:
r_1

Unnamed: 0_level_0,Postcode,Neighbourhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Central Toronto,9,9,9,9
Downtown Toronto,19,19,19,19
East Toronto,5,5,5,5
East York,5,5,5,5
Etobicoke,11,11,11,11
Mississauga,1,1,1,1
North York,24,24,24,24
Queen's Park,1,1,1,1
Scarborough,17,17,17,17
West Toronto,6,6,6,6


In [181]:
print('There are {} Boroughs in Toronto, Canada'.format(r_1.shape[0]))

There are 11 Boroughs in Toronto, Canada


In [182]:
print("The Borough with the most Neighbourhoods is North York and so let us explore it further")

The Borough with the most Neighbourhoods is North York and so let us explore it further


We make a dataframe that contains the largest Borough's neighbourhoods so that we can map it out and put markers on the map

In [183]:
North_Y = result[result['Borough'] == 'North York'][['Borough','Neighbourhood','Latitude','Longitude']]

In [184]:
North_Y.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,North York,Parkwoods,43.753259,-79.329656
1,North York,Victoria Village,43.725882,-79.315572
3,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
7,North York,Don Mills North,43.745906,-79.352188
10,North York,Glencairn,43.709577,-79.445073


We find the coordinates of the Borough so that we can map it and put markers of each neighbourhood on it 

In [185]:
address = 'North York, Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of North York are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of North York are 43.7543263, -79.44911696639593.


In [186]:
# create map of Manhattan using latitude and longitude values
northyork_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(North_Y['Latitude'],North_Y['Longitude'], North_Y['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(northyork_map)  
    
northyork_map