# Part1 Import Postal Code dataset

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

In [2]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]

In [3]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [4]:
df.drop(df[df['Borough'] == 'Not assigned'].index, axis = 0,inplace = True)

### Explanation and Assumptions
* The work used pandas.read_html to get table from Wikipedia page
* After removed all 'Not assigned' rows in Borough, No 'NaN' value exists in Neighborhood column
* Assume all Postal Code that assigned borough has at least one neighborhood

In [5]:
df.shape

(103, 3)

# Part2 Import geographical coordinates

In [6]:
geo_coor = pd.read_csv("http://cocl.us/Geospatial_data")

In [7]:
df = df.merge(geo_coor, how = 'left', on = 'Postal Code')

In [8]:
df.sort_values(by = ['Postal Code','Borough'],inplace = True)

In [9]:
df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
12,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
18,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
22,M1G,Scarborough,Woburn,43.770992,-79.216917
26,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
32,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
38,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
44,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
51,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
58,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


# Part3 Analysis Ideas 

* Most of boroughs share similar postal code. For example, Scarborough share postal code like 'M1#'
* Some boroughs have different types of postal code like North York has both 'M6#' and 'M9#'. Assume the geographical division is changed in recent years
* Try to plot all borough centriods to the map. And analyze the venues in each borough. Then group the boroughs into different clusters.

In [10]:
!pip install folium



In [11]:
from geopy.geocoders import Nominatim
import folium

In [12]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [13]:
map_toronto = folium.Map(location = [latitude, longitude], zoom_start = 11)

for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity=0.7,
        parse_html=False
    ).add_to(map_toronto)
    
map_toronto