### Reading the Toronto FSA table from the postal codes of Canada wiki and assigning it to a dataframe

In [126]:
import pandas as pd

headings = ['Postcode', 'Borough', 'Neighbourhood']
tables_in_wiki = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', header=0)

toronto_fsa_table = []
for each_table in tables_in_wiki:
    current_headings = each_table.columns.values[:3]
    if current_headings.tolist()==headings:
        toronto_fsa_table = each_table

print("Shape of Toronto FSA dataframe is " + str(toronto_fsa_table.shape))
toronto_fsa_table.head()

Shape of Toronto FSA dataframe is (288, 3)


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Removing the cells from Toronto FSA dataframe with a Borough that is `Not assigned`

In [127]:
toronto_fsa_table = toronto_fsa_table[toronto_fsa_table['Borough'] != 'Not assigned']
print("Shape of dataframe after droppping cells with Borough = 'Not assigned' is " + str(toronto_fsa_table.shape))
toronto_fsa_table.head()

Shape of dataframe after droppping cells with Borough = 'Not assigned' is (211, 3)


Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


## Combining multiple rows having same postal code area into one row with the neighborhoods separated by comma

In [137]:
unique_postal_code = set(list(toronto_fsa_table['Postcode']))
dict_of_postal_code = { i : ['',''] for i in unique_postal_code }

# Creating a dictonary (hash_map) in format {"Postalcode":["Borough","Neighbourhood_1,Neighbourhood_2"]}
for _,row in toronto_fsa_table.iterrows():
    if dict_of_postal_code[row['Postcode']][0] == '':
        dict_of_postal_code[row['Postcode']][0] = row['Borough']
    if dict_of_postal_code[row['Postcode']][1] == '':
        dict_of_postal_code[row['Postcode']][1] = row['Neighbourhood']
    else:
        dict_of_postal_code[row['Postcode']][1] += (", " + row['Neighbourhood'])

# Converting {"Postalcode":["Borough","Neighbourhood_1,Neighbourhood_2"]} 
#               to
# [["Postalcode_1","Borough_1","Neighbourhood_1,Neighbourhood_2"],
#  ["Postalcode_2","Borough_3","Neighbourhood_1,Neighbourhood_2"],]
list_of_rows = []        
for key, value in dict_of_postal_code.items():
    temp_row = [key,value[0],value[1]]
    list_of_rows.append(temp_row)

# Converting list of rows into dataframe
final_dataframe = pd.DataFrame(listOfUniquePostalCode, columns = ['Postcode','Borough','Neighbourhood']) 
print("Shape of dataframe after merging the rows with same postal code: " + str(final_dataframe.shape))
final_dataframe.head()

Shape of dataframe after merging the rows with same postal code: (103, 3)


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M7R,Mississauga,Canada Post Gateway Processing Centre
1,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
2,M5V,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo..."
3,M4B,East York,"Woodbine Gardens, Parkview Hill"
4,M2R,North York,Willowdale West


## Setting neighborhood = borough, when neighborhood = `Not assigned`

In [138]:
for _,row in final_dataframe.iterrows():
    if row['Neighbourhood'] == 'Not assigned':
        row['Neighbourhood'] = row['Borough']
        print("The Postal_Code = {} with row having neighborhood = 'Not assigned'".format(row["Postcode"]))

The Postal_Code = M7A with row having neighborhood = 'Not assigned'


## Printing the shape of final dataframe

In [139]:
final_dataframe.shape

(103, 3)