In [4]:
# importing all the require libraries
import numpy as np 
import pandas as pd
import requests
import lxml.html as lh
import bs4 as bs
import urllib.request

In [5]:
# importing the data from the url
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
res = requests.get(url)
soup = bs.BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0]
df = pd.read_html(str(table))
data = pd.read_json(df[0].to_json(orient='records'))


In [6]:
data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [7]:
Data = data[data['Borough'] != 'Not assigned']

In [8]:
Data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [28]:
#If a cell has a borough but a Not assigned  neighborhood, then the neighborhood will be the same as the borough.
Data['Neighbourhood'] = np.where(Data['Neighbourhood'] == 'Not assigned', Data['Borough'], Data['Neighbourhood'])

In [10]:
Data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [11]:
Data = Data.groupby(['Postal Code', 'Borough'], as_index=False).agg(','.join)

In [12]:
Data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [13]:
Data.shape

(103, 3)

In [19]:
#impoting coordinates
url1 = "http://cocl.us/Geospatial_data"
coordinates= pd.read_csv(url1)

In [20]:
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [22]:
# Merging the coodinates based on the postal code in cordinates frame and the data fram
Locationdata = pd.merge(Data, coordinates, on='Postal Code')

In [23]:
Locationdata.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [26]:
# importing the required libraries
import folium
import matplotlib.colors as colors
from sklearn.cluster import KMeans

In [27]:
#plotting on the map
toronto_map = folium.Map(location=[43.65, -79.4], zoom_start=12)

X = Locationdata['Latitude']
Y = Locationdata['Longitude']
Z = np.stack((X, Y), axis=1)

kmeans = KMeans(n_clusters=4, random_state=0).fit(Z)

clusters = kmeans.labels_
colors = ['red', 'green', 'blue', 'yellow']
Locationdata['Cluster'] = clusters

for latitude, longitude, borough, cluster in zip(Locationdata['Latitude'], Locationdata['Longitude'], Locationdata['Borough'], Locationdata['Cluster']):
    label = folium.Popup(borough, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color=colors[cluster],
        fill_opacity=0.7).add_to(toronto_map)  

toronto_map