## Analyzing Canada's Neighborhood

### 1- Import the libraries

In [167]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy
import requests

### 2- Scraping the web page 

In [168]:
site = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

#### If the result starts with number two, the page was successfully accessed

In [169]:
site.status_code

200

Create a BeautifulSoup object

In [170]:
soup = BeautifulSoup(site.content, 'html.parser')

Check the title of web page

In [171]:
print(soup.title)

<title>List of postal codes of Canada: M - Wikipedia</title>


Find and create a list of the table

In [363]:
table_html = soup.table
print(table_html.find_all('th'))

[<th>Postcode</th>, <th>Borough</th>, <th>Neighbourhood
</th>]


In [364]:
table = [text.get_text() for text in table_html.find_all('td')]

In [365]:
table[:3]

['M1A', 'Not assigned', 'Not assigned\n']

Create a list with the Postcode, Borough and Neighbourhood

In [366]:
postcode = [table[i] for i in range(0, len(table), 3)]
postcode[:3]

['M1A', 'M2A', 'M3A']

In [367]:
borough = [table[i] for i in range(1, len(table), 3)]
borough[:3]

['Not assigned', 'Not assigned', 'North York']

In [368]:
neighbourhood = [table[i] for i in range(2, len(table), 3)]
neighbourhood = [s.replace('\n', '') for s in neighbourhood]
neighbourhood = [s.replace('Not assigned', borough[i]) for i, s in enumerate(neighbourhood)]
neighbourhood[:3]

['Not assigned', 'Not assigned', 'Parkwoods']

Create a dictionary

In [369]:
dict_table = {
    'Postcode': postcode,
    'Borough': borough,
    'Neighbourhood': neighbourhood}

### 3- Creating a Pandas DataFrame

In [370]:
df_table = pd.DataFrame(dict_table, columns=['Postcode', 'Borough', 'Neighbourhood'])

Clean and drop the row with 'Not assigned'

In [371]:
not_assign = df_table[((df_table['Borough'] == 'Not assigned'))].index

In [372]:
df_table.drop(not_assign, inplace=True)

Group the Neighbourhood with PostCode

In [399]:
grouped = df_table.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(lambda x: "%s" % ', '.join(x))

In [400]:
df = pd.DataFrame(grouped)

Displaying the final result

In [406]:
df.style

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighbourhood
Postcode,Borough,Unnamed: 2_level_1
M1B,Scarborough,"Rouge, Malvern"
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
M1J,Scarborough,Scarborough Village
M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
M1N,Scarborough,"Birch Cliff, Cliffside West"


In [407]:
df.shape

(103, 1)