In [1]:
from bs4 import BeautifulSoup

In [2]:
import urllib.request
import csv
import pandas as pd

In [3]:
url2 = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

In [4]:
page = urllib.request.urlopen(url2)
soup = BeautifulSoup(page, 'html.parser')

In [5]:
table_address = soup.find('table')
info = table_address.find_all('td')

count = len(info)

In [6]:
postcode = []
borough = []
neighborhood = []

for i in range(0, count, 3):
    postcode.append(info[i].text.strip())
    borough.append(info[i+1].text.strip())
    neighborhood.append(info[i+2].text.strip())

In [7]:
df1 = pd.DataFrame(data=[postcode, borough, neighborhood]).transpose()
df1.columns = ['Postcode', 'Borough', 'Neighborhood']

In [8]:
df1.drop(df1[df1['Borough'] == 'Not assigned'].index, inplace=True)
df1.loc[df1.Neighborhood == 'Not assigned', "Neighborhood"] = df1.Borough

In [9]:
df2 = df1.groupby(['Postcode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()
df2.columns = ['Postcode', 'Borough', 'Neighborhood']

In [10]:
df2

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [11]:
df2.shape

(103, 3)

In [12]:
import pandas as pd
df_coordinate = pd.read_csv('http://cocl.us/Geospatial_data')
df_coordinate.columns = ['Postcode', 'Latitude', 'Longitude']

df_add = pd.merge(df2, df_coordinate, on=['Postcode'], how='inner')

In [13]:
df_add

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [14]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_add['Borough'].unique()),
        df_add.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


In [15]:
!conda install -c conda-forge geopy --yes
!conda install -c conda-forge folium=0.5.0 --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    certifi-2019.6.16          |           py36_1         149 KB  conda-forge
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.49-py_0         conda-forge
    geopy:           1.20.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

In [16]:
from geopy.geocoders import Nominatim

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium

In [17]:
df_explore=df_add[df_add['Borough'].str.contains('Toronto')]
df_explore

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
45,M4P,Central Toronto,Davisville North,43.712751,-79.390197
46,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
47,M4S,Central Toronto,Davisville,43.704324,-79.38879
48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
49,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049


In [18]:
address = 'Toronto, Canada'
geolocator = Nominatim(user_agent="trt_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

map_1 = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, borough, neighborhood in zip(df_explore['Latitude'], df_explore['Longitude'], 
                                           df_explore['Borough'], df_explore['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_1)  
map_1