# Exploring the Neighborhoods of Toronto with Pandas

This notebook contains the codes to read and process the Toronto neighborhoods table from https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

In [1]:
import pandas as pd # for reading and processing tabular data

## Read the Toronto neighborhoods table from Wikipedia

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

df = pd.read_html(url, header=0)[0]
df.columns = ['PostalCode', 'Borough', 'Neighborhood'] # rename columns
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [3]:
# Remove rows with 'Not assigned' values for 'Borough'
df = df[df.Borough != 'Not assigned']
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


## Merge neighborhoods of the same postal code into a single row

In [4]:
df = df.groupby(by=['PostalCode', 'Borough']).agg(list)
df.Neighborhood = df.Neighborhood.str.join(', ')
df.reset_index(inplace=True)
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


## Obtain boroughs with no assigned neighborhoods and replace them

Neighborhoods with a 'Not assigned' value is replaced with their borough's name

In [5]:
# Obtain neighborhoods with 'Not assigned' values and replace them with the borough name
na_indices = df.index[df.Neighborhood.str.contains('Not assigned')].tolist()
for i in na_indices:
    df.iloc[i, 2] = df.iloc[i, 1]

df.iloc[na_indices]

Unnamed: 0,PostalCode,Borough,Neighborhood
85,M7A,Queen's Park,Queen's Park


In [6]:
# Get the shape of the final DataFrame
print('The Toronto neighborhoods table consists of %i rows' % df.shape[0])

The Toronto neighborhoods table consists of 103 rows
