In [2]:
import pandas as pd

In [3]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
df = df[df["Borough"]!="Not assigned"]
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [5]:
geo = pd.read_csv("Geospatial_Coordinates.csv")
geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Using merge to get the Lat and Long from other dataframe

In [6]:
df = df.merge(geo,on="Postal Code",how="inner")
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


Filter only Toronto

In [26]:
df_toronto = df[df["Borough"].str.contains("Toronto")]
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031


Cluster to 8 clusters

In [50]:
kclusters = 8
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=kclusters)
kmeans.fit(df[["Latitude","Longitude"]])
kmeans.labels_

array([4, 1, 5, 0, 5, 2, 7, 3, 1, 5, 0, 2, 7, 1, 1, 5, 0, 2, 7, 1, 5, 0,
       7, 1, 5, 5, 4, 3, 3, 1, 5, 0, 4, 3, 6, 1, 5, 5, 4, 3, 0, 1, 5, 5,
       1, 3, 6, 1, 5, 0, 6, 4, 3, 6, 1, 3, 0, 6, 1, 3, 6, 3, 3, 0, 6, 4,
       3, 3, 5, 0, 6, 4, 3, 3, 5, 0, 2, 6, 4, 5, 5, 0, 4, 5, 5, 4, 5, 5,
       2, 6, 4, 5, 5, 2, 6, 7, 5, 5, 2, 5, 1, 2, 2])

Create the colors

In [48]:
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

Now create the map and the markers

In [49]:
import folium
map = folium.Map(location=[43.654260, -79.360636], zoom_start=11)
for index,row in df.iterrows():
    cluster = kmeans.labels_[index]
    folium.CircleMarker(
        [row["Latitude"], row["Longitude"]],
        radius=5,
        popup=row["Postal Code"],
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map)
map