# 1 Neighborhoods in Toronto

In [31]:
# Import required libraries
import pandas as pd


In [33]:
# get list of postal codes of Toronto, Canada
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

data = pd.read_html(url, header=0)

df = pd.DataFrame(data[0])
df.rename(columns={"Postcode": "Postal Code"}, inplace=True)

print('shape:', df.shape)
df.head()


shape: (287, 3)


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [34]:
# Ignore cells with a borough that is 'Not assigned'
df.replace({'Borough':{'Not assigned' : None}}, inplace=True)
df.dropna(inplace=True)

print('shape:', df.shape)
df.head()


shape: (210, 3)


Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [35]:
# fix 'Not assigned' in Neighborhood column
for index, row in df.iterrows():
    if(row['Neighborhood'] == "Not assigned"):
        print(row)
        row['Neighborhood'] = row['Borough']
        print(row)


Postal Code              M7A
Borough         Queen's Park
Neighborhood    Not assigned
Name: 7, dtype: object
Postal Code              M7A
Borough         Queen's Park
Neighborhood    Queen's Park
Name: 7, dtype: object


In [36]:
# group Neighborhood by Postcode
df_grp = df.groupby(['Postal Code','Borough'])['Neighborhood'].apply(', '.join).reset_index()

df_grp.head()


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [37]:
# print dataframe shape
df_grp.shape


(103, 3)

# 2 Geocode the data

In [26]:
# get the geographical coordinates data
!wget -q -O 'Geospatial_Coordinates.csv' https://cocl.us/Geospatial_data
print('Data downloaded!')

Data downloaded!


In [38]:
# create dataframe from csv
geocodes = pd.read_csv('Geospatial_Coordinates.csv')

geocodes.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [40]:
# merge Geographical coordinates with Toronto neighborhood dataframe
toronto_geo = pd.merge(df_grp, geocodes, left_on='Postal Code', right_on='Postal Code')

toronto_geo.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# 3 Analysis