# preparing dataset
## 1. load table from Wikipedia using pandas' read_html

In [1]:
pip install lxml

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from pandas.io.html import read_html
page = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
wikitable = read_html(page, attrs={"class": "wikitable"})
print(wikitable)

[    Postcode           Borough  \
0        M1A      Not assigned   
1        M2A      Not assigned   
2        M3A        North York   
3        M4A        North York   
4        M5A  Downtown Toronto   
5        M6A        North York   
6        M6A        North York   
7        M7A  Downtown Toronto   
8        M8A      Not assigned   
9        M9A         Etobicoke   
10       M1B       Scarborough   
11       M1B       Scarborough   
12       M2B      Not assigned   
13       M3B        North York   
14       M4B         East York   
15       M4B         East York   
16       M5B  Downtown Toronto   
17       M5B  Downtown Toronto   
18       M6B        North York   
19       M7B      Not assigned   
20       M8B      Not assigned   
21       M9B         Etobicoke   
22       M9B         Etobicoke   
23       M9B         Etobicoke   
24       M9B         Etobicoke   
25       M9B         Etobicoke   
26       M1C       Scarborough   
27       M1C       Scarborough   
28       M1C 

## 2. remove all rows that contain "Not assigned"

In [15]:
type(wikitable[0])
df = pd.DataFrame(wikitable[0])

In [16]:
df2 = df[~df["Borough"].str.contains("Not assigned")]
print(df2["Borough"])

2            North York
3            North York
4      Downtown Toronto
5            North York
6            North York
7      Downtown Toronto
9             Etobicoke
10          Scarborough
11          Scarborough
13           North York
14            East York
15            East York
16     Downtown Toronto
17     Downtown Toronto
18           North York
21            Etobicoke
22            Etobicoke
23            Etobicoke
24            Etobicoke
25            Etobicoke
26          Scarborough
27          Scarborough
28          Scarborough
30           North York
31           North York
32            East York
33     Downtown Toronto
34                 York
37            Etobicoke
38            Etobicoke
             ...       
234           Etobicoke
235         Scarborough
238    Downtown Toronto
239    Downtown Toronto
242           Etobicoke
243           Etobicoke
244           Etobicoke
245         Scarborough
248    Downtown Toronto
249    Downtown Toronto
250    Downtown 

## 3. group the data

In [17]:
grouped = df2.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(lambda Neighbourhood: ','.join(Neighbourhood)).reset_index()
df3 = pd.DataFrame(grouped)
print(df3)

    Postcode           Borough  \
0        M1B       Scarborough   
1        M1C       Scarborough   
2        M1E       Scarborough   
3        M1G       Scarborough   
4        M1H       Scarborough   
5        M1J       Scarborough   
6        M1K       Scarborough   
7        M1L       Scarborough   
8        M1M       Scarborough   
9        M1N       Scarborough   
10       M1P       Scarborough   
11       M1R       Scarborough   
12       M1S       Scarborough   
13       M1T       Scarborough   
14       M1V       Scarborough   
15       M1W       Scarborough   
16       M1X       Scarborough   
17       M2H        North York   
18       M2J        North York   
19       M2K        North York   
20       M2L        North York   
21       M2M        North York   
22       M2N        North York   
23       M2P        North York   
24       M2R        North York   
25       M3A        North York   
26       M3B        North York   
27       M3C        North York   
28       M3H  

In [18]:
df3.shape

(103, 3)

## 4. add longitude and latitude to dataframe

In [19]:
lat_long = pd.read_csv("Geospatial_Coordinates.csv")
lat_long = lat_long.rename(columns={'Postal Code': 'Postcode'})
print(lat_long)

    Postcode   Latitude  Longitude
0        M1B  43.806686 -79.194353
1        M1C  43.784535 -79.160497
2        M1E  43.763573 -79.188711
3        M1G  43.770992 -79.216917
4        M1H  43.773136 -79.239476
5        M1J  43.744734 -79.239476
6        M1K  43.727929 -79.262029
7        M1L  43.711112 -79.284577
8        M1M  43.716316 -79.239476
9        M1N  43.692657 -79.264848
10       M1P  43.757410 -79.273304
11       M1R  43.750072 -79.295849
12       M1S  43.794200 -79.262029
13       M1T  43.781638 -79.304302
14       M1V  43.815252 -79.284577
15       M1W  43.799525 -79.318389
16       M1X  43.836125 -79.205636
17       M2H  43.803762 -79.363452
18       M2J  43.778517 -79.346556
19       M2K  43.786947 -79.385975
20       M2L  43.757490 -79.374714
21       M2M  43.789053 -79.408493
22       M2N  43.770120 -79.408493
23       M2P  43.752758 -79.400049
24       M2R  43.782736 -79.442259
25       M3A  43.753259 -79.329656
26       M3B  43.745906 -79.352188
27       M3C  43.725

In [24]:
df4 = pd.concat([df3, lat_long], axis=1)
print(df4)

    Postcode           Borough  \
0        M1B       Scarborough   
1        M1C       Scarborough   
2        M1E       Scarborough   
3        M1G       Scarborough   
4        M1H       Scarborough   
5        M1J       Scarborough   
6        M1K       Scarborough   
7        M1L       Scarborough   
8        M1M       Scarborough   
9        M1N       Scarborough   
10       M1P       Scarborough   
11       M1R       Scarborough   
12       M1S       Scarborough   
13       M1T       Scarborough   
14       M1V       Scarborough   
15       M1W       Scarborough   
16       M1X       Scarborough   
17       M2H        North York   
18       M2J        North York   
19       M2K        North York   
20       M2L        North York   
21       M2M        North York   
22       M2N        North York   
23       M2P        North York   
24       M2R        North York   
25       M3A        North York   
26       M3B        North York   
27       M3C        North York   
28       M3H  

## 5. create map with all Neighbourhoods

In [25]:
pip install geopy

Note: you may need to restart the kernel to use updated packages.


In [26]:
from geopy.geocoders import Nominatim
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [29]:
pip install folium

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K    100% |████████████████████████████████| 92kB 1.8MB/s ta 0:00:011
[?25hCollecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/81/6d/31c83485189a2521a75b4130f1fee5364f772a0375f81afff619004e5237/branca-0.4.0-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.0 folium-0.10.1
Note: you may need to restart the kernel to use updated packages.


In [30]:
import folium

In [42]:
# create map of Manhattan using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df4['Latitude'], df4['Longitude'], df4['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## 6. create map with markers for "Downtown Toronto"

In [43]:
df5 = df4[df4["Borough"]=="Downtown Toronto"]
print(df5)

   Postcode           Borough  \
50      M4W  Downtown Toronto   
51      M4X  Downtown Toronto   
52      M4Y  Downtown Toronto   
53      M5A  Downtown Toronto   
54      M5B  Downtown Toronto   
55      M5C  Downtown Toronto   
56      M5E  Downtown Toronto   
57      M5G  Downtown Toronto   
58      M5H  Downtown Toronto   
59      M5J  Downtown Toronto   
60      M5K  Downtown Toronto   
61      M5L  Downtown Toronto   
66      M5S  Downtown Toronto   
67      M5T  Downtown Toronto   
68      M5V  Downtown Toronto   
69      M5W  Downtown Toronto   
70      M5X  Downtown Toronto   
75      M6G  Downtown Toronto   
85      M7A  Downtown Toronto   

                                        Neighbourhood Postcode   Latitude  \
50                                           Rosedale      M4W  43.679563   
51                         Cabbagetown,St. James Town      M4X  43.667967   
52                               Church and Wellesley      M4Y  43.665860   
53                             

In [46]:
# create map of Manhattan using latitude and longitude values
map_downtown_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df5['Latitude'], df5['Longitude'], df5['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown_toronto)  
    
map_downtown_toronto