# 1. Importing libraries

In [66]:
import numpy as np
import pandas as pd

# 2. Importing Wikipedia table to DataFrame
### Found how do it here:
#### https://stackoverflow.com/questions/15724034/how-to-convert-wikipedia-wikitable-to-python-pandas-dataframe
#### https://stackoverflow.com/questions/39120853/converting-an-html-table-in-pandas-dataframe

In [69]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', attrs={"class": "wikitable"}, skiprows=1)[0]
df

Unnamed: 0,0,1,2
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


## 3. Renaming columns

In [70]:
df = df.rename({0:'PostalCode',1:'Borough',2:'Neighborhood'},axis=1)
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


## 4. Removing columns with 'Not assigned' borough

In [71]:
df = df.drop(df[(df.Borough == 'Not assigned')].index)
df

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


## 5. Grouping neigborhoods by postal codes

In [72]:
df = df.groupby(['PostalCode','Borough'])[['Neighborhood']].agg(', '.join)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighborhood
PostalCode,Borough,Unnamed: 2_level_1
M1B,Scarborough,"Rouge, Malvern"
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
M1J,Scarborough,Scarborough Village
M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
M1N,Scarborough,"Birch Cliff, Cliffside West"


## 6. Searching for not assigned neighborhoods with filled borough

In [73]:
df[df['Neighborhood']=='Not assigned'] 

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighborhood
PostalCode,Borough,Unnamed: 2_level_1
M7A,Queen's Park,Not assigned


## 7. Replacing not assigned neighborhood with corresponding borough name

In [77]:
df.loc[df['Neighborhood'] == 'Not assigned'] = "Queen's Park"
df[df['Neighborhood']=="Queen's Park"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighborhood
PostalCode,Borough,Unnamed: 2_level_1
M7A,Queen's Park,Queen's Park


## 8. Printing shape of dataframe

In [78]:
df.shape

(103, 1)