# Step 1: Import libraries


In [109]:
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
import numpy as np

## Step 2: Scrape data using BeautifulSoup and insert into pandas DF

In [110]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(url).text

#making soup object
soup = bs(source, 'lxml')
# print(soup.prettify())

#table that we want to extract from
table = soup.find('table', class_='wikitable sortable')

#setting up lists to insert to dataframe later
A=[]
B=[]
C=[]

#iterate through rows and cells
for row in table.findAll('tr'):
    data = row.findAll('td')
    if len(data)==3:
        A.append(data[0].find(text=True).strip())
        B.append(data[1].find(text=True).strip())
        C.append(data[2].find(text=True).strip())

#import into pandas df
Canada_Postal_Codes = pd.DataFrame(np.column_stack([A,B,C]),
                                  columns=['Postcode','Borough','Neighborhood'])
# Canada_Postal_Codes.shape # rows before dropping borough "Not Assigned"

## Step 3: Removing values from DF, then grouping

In [111]:
#dropping not assigned borough  values
Canada_Postal_Codes = Canada_Postal_Codes[Canada_Postal_Codes.Borough != 'Not assigned']
# Canada_Postal_Codes.shape

In [112]:
#groupby postcode
Canada_Postal_Codes = Canada_Postal_Codes.groupby(['Postcode','Borough'])['Neighborhood'].apply(', '.join).reset_index()

## Step 4: Assign Borough values to Neighborhood if there's no assigned value in the column 

In [113]:
#assigning borough to neighborhood for not assigned values
New_Neighborhood = np.where(Canada_Postal_Codes.Neighborhood == 'Not assigned', Canada_Postal_Codes.Borough,Canada_Postal_Codes.Neighborhood)
Canada_Postal_Codes['New Neighborhood']=New_Neighborhood

In [114]:
# formatting
Canada_Postal_Codes.drop('Neighborhood', axis=1, inplace=True)

In [115]:
# formatting
Canada_Postal_Codes = Canada_Postal_Codes.rename(columns={'New Neighborhood': 'Neighborhood'})

## Step 5: Show DF head

In [119]:
Canada_Postal_Codes.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## Step 6: Printing DF shape

In [117]:
Canada_Postal_Codes.shape

(103, 3)