## Initial Part taken from Previous Exercise | Geo part has been added Later

### Import Necessary Libraries & Packages

In [1]:
import numpy as np # For vector data
import pandas as pd # For data analsysis
import requests # For web requests
from bs4 import BeautifulSoup # For Web-Scraping

### Web-Scraping to Fetch the Desired Data

In [2]:
# Fetch the webpage having data
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')

# Find the table headers
headers = []
for thead in soup.find('tr').find_all('th'):
    headers.append(thead.text.strip())

# Scrape the Toronto FSA Data
fsa_data = []
for row in soup.find('tbody').find_all('tr'):
    fsa = []
    for td in row.find_all('td'):
        fsa.append(td.text.strip())
    fsa_data.append(fsa)

### Transform the fetched data into Pandas DataFrame with 3 columns

In [3]:
toronto_df = pd.DataFrame(fsa_data[1:], columns=headers) # Excluded the blank row 1
#toronto_df.head()

### Drop the rows which do not have any Borough value

In [4]:
toronto_df = toronto_df[toronto_df.Borough != 'Not assigned'].reset_index(drop=True)
#toronto_df.head()

### Concatenate the Neighbourhoods for each Postcode

In [5]:
df_aggr = toronto_df.groupby(['Postcode', 'Borough'], sort=False)['Neighbourhood'].apply(','.join).reset_index()
#df_aggr.head()

### Replace 'Not assigned' Neighbourhood with the corrsponding Borough

In [6]:
df_aggr.loc[df_aggr.Neighbourhood == 'Not assigned', 'Neighbourhood']\
= df_aggr[df_aggr['Neighbourhood'] == 'Not assigned'].Borough
#df_aggr.head()

### Print the Shape of the DataFrame

In [7]:
df_aggr.shape

(103, 3)

# Geo-coordinates Part for this Exercise Added Here 

### Import the CSV having Geo-Coordinates in DataFrame

In [8]:
df_geo = pd.read_csv('Geospatial_Coordinates.csv')
df_geo.columns = ['Postcode', 'Latitude', 'Longitude']
df_geo.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merge the two DataFrames to get a DataFrame having Lat-Long for each Postcode

In [9]:
df_aggr_geo = pd.merge(df_aggr, df_geo, on='Postcode')
df_aggr_geo.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


In [10]:
df_aggr_geo[df_aggr_geo.Postcode == 'M5G']

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383


In [11]:
df_aggr_geo.shape

(103, 5)

In [12]:
df_aggr_geo.head(20)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937
