## Importing the Libraries

In [3]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import csv 
import json
import xml
import numpy as np
 



## Download the Wiki link

In [4]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
WikiToronto = requests.get(url).text

## Parsing the URL into a BeautifulSoup Object

In [5]:
Toronto_soup = BeautifulSoup(WikiToronto, 'lxml')

## Finding the Table that is required

In [6]:
Toronto_table=Toronto_soup.find('table')


## Finding all of the fields data

In [7]:
 fields = Toronto_table.find_all('td')
len(fields)

864

## Create the Data frame

In [8]:
  
 
postcode = []
borough = []
neighbourhood = []

 
for i in range(0, len(fields), 3):
    postcode.append(fields[i].text.strip())
    borough.append(fields[i+1].text.strip())
    neighbourhood.append(fields[i+2].text.strip())


df_tor = pd.DataFrame(data=[postcode, borough, neighbourhood]).transpose()
df_tor.columns = ['Postalcode', 'Borough', 'Neighborhood']
df_tor.head()

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


## Removing the "Not assigned" Boroughs from the DataFrame

In [11]:
df_tor['Borough'].replace('Not assigned', np.nan, inplace=True)
df_tor.dropna(subset=['Borough'], inplace=True)
df_tor.head()

Unnamed: 0,Postalcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


### Replacing the "Not assigned" Neighborhoods with Borough Name

In [12]:
df_tor[df_tor['Neighborhood'].str.match('Not assigned')]


Unnamed: 0,Postalcode,Borough,Neighborhood
8,M7A,Queen's Park,Not assigned


In [14]:
df_tor['Neighborhood'].replace('Not assigned', "Queen's Park", inplace=True)


In [15]:
df_tor[df_tor['Neighborhood'].str.match('Not assigned')]


Unnamed: 0,Postalcode,Borough,Neighborhood


In [16]:
 #Visualizing the first few rows of the dataframe
df_tor.head(10)

Unnamed: 0,Postalcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


### Grouping the Neighborhoods together for the same Postalcode and Borough 

In [17]:
tor_df = df_tor.groupby(['Postalcode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()


In [18]:
tor_df.head(10)


Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


## Displaying the shape of the dataframe 

In [19]:
tor_df.shape


(103, 3)