### Import the necessary libraries

In [234]:
import requests 
import pandas as pd 
import numpy as np
from bs4 import BeautifulSoup

### Scrap the URL and get the table data 

In [174]:
URL = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
res = requests.get(URL).text
soup = BeautifulSoup(res,'lxml')
df_list = []
for items in soup.find('table', class_= 'wikitable sortable').find_all('tr')[1::]:
    data = items.find_all(['td'])
    try:
        postcode = data[0].get_text()       
        borough = data[1].get_text()
        neighbourhood = data[2].get_text().rstrip('\n')
    except IndexError:pass
    df_list.append((postcode,borough,neighbourhood))

print(df_list[0:3])

[('M1A', 'Not assigned', 'Not assigned'), ('M2A', 'Not assigned', 'Not assigned'), ('M3A', 'North York', 'Parkwoods')]


### Convert the above list data into PANDAS DataFrame

In [198]:
df_data = pd.DataFrame(df_list, columns=['Postal Code', 'Borough', 'Neighbourhood'])

In [199]:
df_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Check the shape of the DataFrame

In [200]:
df_data.shape

(289, 3)

### Get the count of Column Borough = Not assigned

In [201]:
df_data['Borough'].eq('Not assigned').sum()

77

### Get the count of Column Neighbourhood = Not assigned

In [202]:
df_data['Neighbourhood'].eq('Not assigned').sum()

78

### Remove the rows where Borough = Not assigned 

In [203]:
df_data = df_data[df_data.Borough != 'Not assigned']

### Shape of the DataFrame after removing rows, where Borough column equal to Not assigned values

In [204]:
df_data.shape

(212, 3)

### Get the rows where Neighbourhood is equal to Not assigned values

In [205]:
df_data['Neighbourhood'].eq('Not assigned').sum()

1

In [206]:
df_data.loc[df_data['Neighbourhood'] == 'Not assigned']

Unnamed: 0,Postal Code,Borough,Neighbourhood
8,M7A,Queen's Park,Not assigned


### Copy the value of Borough column to the Neighbourhood column, if Neighbourhood == Not assigned 

In [207]:
df_data['Neighbourhood'] = np.where(df_data['Neighbourhood'] == 'Not assigned', df_data['Borough'], df_data['Borough'])

In [208]:
df_data['Neighbourhood'].eq('Not assigned').sum()

0

### Merge the Neighbourhoods for the same Postal Code 

In [210]:
df_data = df_data.groupby(['Postal Code', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index()

## Final Shape of the Notebook

In [211]:
df_data.shape

(103, 3)

## Upload the Geo Co-Ordinates CSV File 

In [213]:
# The code was removed by Watson Studio for sharing.

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [195]:
df_cor.shape

(103, 3)

## Merge the 2 DataFrames - df_data and df_cor

In [214]:
df_final = pd.merge(df_data, df_cor, on='Postal Code')

In [216]:
df_final.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Scarborough, Scarborough",43.806686,-79.194353
1,M1C,Scarborough,"Scarborough, Scarborough, Scarborough",43.784535,-79.160497
2,M1E,Scarborough,"Scarborough, Scarborough, Scarborough",43.763573,-79.188711
3,M1G,Scarborough,Scarborough,43.770992,-79.216917
4,M1H,Scarborough,Scarborough,43.773136,-79.239476


# Exploring and Clustering the Neighbourhoods in Toronto

In [221]:
df_final['Borough'].value_counts()

North York          24
Downtown Toronto    18
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East Toronto         5
East York            5
York                 5
Queen's Park         1
Mississauga          1
Name: Borough, dtype: int64

## Considering only those Boroughs which contains the word Toronto

In [250]:
df_toronto = df_final[df_final['Borough'].str.contains('Toronto')]

In [251]:
df_toronto.shape

(38, 5)

In [252]:
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
37,M4E,East Toronto,East Toronto,43.676357,-79.293031
41,M4K,East Toronto,"East Toronto, East Toronto",43.679557,-79.352188
42,M4L,East Toronto,"East Toronto, East Toronto",43.668999,-79.315572
43,M4M,East Toronto,East Toronto,43.659526,-79.340923
44,M4N,Central Toronto,Central Toronto,43.72802,-79.38879


### Import the necessary libraries

In [253]:
from geopy.geocoders import Nominatim

### Get the latitude and longitude values of Toronto City.

In [236]:
address = 'Toronto'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


### Create map of Toronto using latitude and longitude values

In [262]:
# install and import folium library 
!pip -q install folium
import folium 

In [264]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

## Superimpose the neighborhoods on the Toronto map 

In [266]:
# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
            [lat, lng],radius=5, popup=label, color='blue', fill=True, fill_color='#3186cc', fill_opacity=0.7).add_to(map_toronto) 

map_toronto    