In [1]:
import pandas as pd 
import numpy as np

## Part I: Preprocessing

In [2]:
# Importing Toronto data copie from the link https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M
toronto_raw = pd.read_csv("toronto.csv")
toronto_raw.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [3]:
# Toronto DataFrame shape 
toronto_raw.shape

(180, 3)

In [4]:
# Checking the datatypes 
toronto_raw.dtypes

Postal Code     object
Borough         object
Neighborhood    object
dtype: object

In [5]:
# Checking the Borough categories
toronto_raw.Borough.unique()

array(['Not assigned', 'North York', 'Downtown Toronto', 'Etobicoke',
       'Scarborough', 'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

In [6]:
# Eliminating "Not assigned" values
toronto = toronto_raw[~toronto_raw.Borough.str.contains('Not assigned')]
toronto

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [7]:
toronto_grouped = toronto.groupby("Postal Code").sum()

In [8]:
toronto_grouped

Unnamed: 0_level_0,Borough,Neighborhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern, Rouge"
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
...,...,...
M9N,York,Weston
M9P,Etobicoke,Westmount
M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


## Part II - Adding Geographical Information | Latitude and Longitude
<b>Tasks</b>: 
<ol>
    <li>Download Geospatial Coordinates Data</li>
    <li>Merge Latitude and Longitude with the Neighborhoods Data</li>
</ol>

In [9]:
# Loading Geospatial Data
geocoordinates = pd.read_csv("Geospatial_Coordinates.csv")

In [12]:
# Merging data: geocoordinates + neighborhoods
toronto_df = pd.merge(toronto_grouped, geocoordinates, on='Postal Code', how='left')

In [18]:
toronto_df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
