<h1>Part 1: Creating the Data Frame of Neighborhoods.


In [1]:
!pip install beautifulsoup4
import pandas as pd
import requests
import numpy as np


from bs4 import BeautifulSoup



In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
text = requests.get(url).text
soup = BeautifulSoup(text,'html')

In [3]:
neigh = []
table = soup.find('table')
for row in table.find_all('td'):
    if row.span.text == 'Not assigned':
        pass
    else:
        x = {}
        x['Postal Code'] = row.p.text[:3]
        x['Borough'] = row.span.text.split('(')[0]
        x['Neighborhood'] = row.span.text.split('(')[1].strip(')').replace(' /',',')
        neigh.append(x)
df = pd.DataFrame(neigh)
df



Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East TorontoBusiness reply mail Processing Cen...,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [4]:
df.shape

(103, 3)

<h1>Part 2: Getting the Coordinates of Postal Codes

In [5]:
link = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv'
coord = pd.read_csv(link)
coord.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [9]:

df2 = pd.merge(df,coord,on = 'Postal Code')
df2.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


<h1>Part 3: Exploring and Clustering

Using Boroughs that contain "Toronto"

In [7]:
df3 = df2[df2['Borough'].str.contains('Toronto')]
df3.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [12]:
!pip install folium
import folium
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors



In [20]:
map_toronto = folium.Map(location=[43.651070,-79.347015],zoom_start=10)
for ln, lt, b ,n in zip(df3['Longitude'],df3['Latitude'],df3['Borough'],df3['Neighborhood']):
    label = '{}, {}'.format(n, b)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lt,ln],
        fill=True,
        radius=5,
        popup = label).add_to(map_toronto)
map_toronto

In [51]:
k = 4
cluster = KMeans(n_clusters = k).fit(df3[['Latitude','Longitude']])
df3['Cluster'] = cluster.labels_
df3.head()

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype = np.float
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype = np.float
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype = np.float
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype = np.float
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype = np.float
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  max_iter=max_iter, verbose=verbose)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype = np.

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0
19,M4E,East Toronto,The Beaches,43.676357,-79.293031,3
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0


In [58]:
map_c = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

#x = np.arange(k)
#ys = [i + x + (i*x)**2 for i in range(k)]
#colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
#rainbow = [colors.rgb2hex(i) for i in colors_array]

for ln, lt, b ,n ,col in zip(df3['Longitude'],df3['Latitude'],df3['Borough'],df3['Neighborhood'],df3['Cluster']):
    label = '{}, {}'.format(n, col)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lt,ln],
        fill=True,
        radius=5,
        popup=label).add_to(map_c)
map_c

TypeError: 'module' object is not subscriptable