### Import Necessary Libraries & Packages

In [1]:
import numpy as np # For vector data
import pandas as pd # For data analsysis
import requests # For web requests
from bs4 import BeautifulSoup # For Web-Scraping

### Web-Scraping to Fetch the Desired Data

In [2]:
# Fetch the webpage having data
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')

# Find the table headers
headers = []
for thead in soup.find('tr').find_all('th'):
    headers.append(thead.text.strip())

# Scrape the Toronto FSA Data
fsa_data = []
for row in soup.find('tbody').find_all('tr'):
    fsa = []
    for td in row.find_all('td'):
        fsa.append(td.text.strip())
    fsa_data.append(fsa)

### Transform the fetched data into Pandas DataFrame with 3 columns

In [3]:
toronto_df = pd.DataFrame(fsa_data[1:], columns=headers) # Excluded the blank row 1
#toronto_df.head()

### Drop the rows which do not have any Borough value

In [4]:
toronto_df = toronto_df[toronto_df.Borough != 'Not assigned'].reset_index(drop=True)
#toronto_df.head()

### Concatenate the Neighbourhoods for each Postcode

In [5]:
df_aggr = toronto_df.groupby(['Postcode', 'Borough'], sort=False)['Neighbourhood'].apply(','.join).reset_index()
#df_aggr.head()

### Replace 'Not assigned' Neighbourhood with the corrsponding Borough

In [6]:
df_aggr.loc[df_aggr.Neighbourhood == 'Not assigned', 'Neighbourhood']\
= df_aggr[df_aggr['Neighbourhood'] == 'Not assigned'].Borough
#df_aggr.head()

### Print the Shape of the DataFrame

In [7]:
df_aggr.shape

(103, 3)

### Import the CSV having Geo-Coordinates in DataFrame

In [8]:
df_geo = pd.read_csv('Geospatial_Coordinates.csv')
df_geo.columns = ['Postcode', 'Latitude', 'Longitude']
#df_geo.head()

### Merge the two DataFrames to get a DataFrame having Lat-Long for each Postcode

In [9]:
df_aggr_geo = pd.merge(df_aggr, df_geo, on='Postcode')
#df_aggr_geo.head()

# Mapping & Analysis : Part3

### Import the necessary Libraries

In [12]:
import folium # For map rendering
from geopy.geocoders import Nominatim # For address to lat-long conversion
import matplotlib.cm as cm  # For Mapping
import matplotlib.colors as colors # For Mapping
from sklearn.cluster import KMeans # For Clustering

### Fetch Geo-Coordinates of Toronto

In [13]:
city = 'Toronto, Canada'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(city)
lat = location.latitude
long = location.longitude
print('Geograpical coordinate of Toronto are {}, {}.'.format(lat, long))

Geograpical coordinate of Toronto are 43.653963, -79.387207.


### Create a Toronto BaseMap

In [18]:
map_toronto = folium.Map(location=[lat, long], zoom_start=10)

# add markers to map
for lt, lng, borough, neighborhood in zip(df_aggr_geo['Latitude'], df_aggr_geo['Longitude'], df_aggr_geo['Borough'], df_aggr_geo['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lt, lng],
        radius=3,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto