In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


In [2]:
# Load data from csv file
csv_file='Toronto_postcode.csv'
df=pd.read_csv(csv_file)
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [3]:
# Ignore cells with a borough that is Not assigned.
df.drop(df.loc[df['Borough']=='Not assigned'].index, inplace=True)
df.shape

(103, 3)

In [4]:
print('There are {} uniques postal codes.'.format(len(df['Postal code'].unique())))

There are 103 uniques postal codes.


In [5]:
# Merge rows with same postal code
df.groupby('Postal code').agg(Borough=('Borough', ', '.join),
                          Neighborhood=('Neighborhood', ', '.join))
df.reset_index(inplace = True, drop = True) 
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [6]:
# Set neighborhoods seperated by comma
df['Neighborhood']=df['Neighborhood'].str.replace('/',',')
# Rename colums
df.columns=['PostalCode','Borough','Neighborhood']
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


In [7]:
# Rename unassigned neighborhood
for i in range(len(df)):
    if df.loc[i,'Neighborhood']=='Not assigned':
        df.loc[i,'Neighborhood']=df.loc[i,'Borough']

In [8]:
print('There are {} lines in the data frame.'.format(df.shape[0]))

There are 103 lines in the data frame.


In [9]:
## Part 2: Use google Maps Geocoding to get the latitude and longitude values of each postal code of Toronto city.##
########### google geocoder never returns any results, so skip this part  ##############
 !conda install -c conda-forge geocoder --yes 
 import geocoder # import geocoder

for i in range(len(df)):
    # get current postal code
    postal_code=df.loc[i,'PostalCode']
    print(postal_code)
    # initialize your variable to None
    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng   
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]

    df.loc[i,'Latitude'] = latitude
    df.loc[i,'Longitude'] = longitude
    print('{} : {}, {}'.format(postal_code,latitude,longitude))

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    chardet-3.0.4              |py36h9f0ad1d_1006         188 KB  conda-forge
    click-7.1.1                |     pyh8c360ce_0          64 KB  conda-forge
    cryptography-2.8           |   py36h45558ae_2         628 KB  conda-forge
    future-0.18.2              |   py36h9f0ad1d_1         714 KB  conda-forge
    geocoder-1.38.1            |             py_1          53 KB  conda-forge
    pysocks-1.7.1              |   py36h9f0ad1d_1          27 KB  conda-forge
    ratelim-0.1.6              |             py_2           6 KB  conda-forge
    urllib3-1.25.8             |   py36h9f0ad1d_1         160 KB  conda-forge
    ---------------------

In [9]:
##  load longitude and latitude from csv file since google geocoder never returns results
csv_geo_file='Toronto_Geospatial_Coordinates.csv'
df_geo=pd.read_csv(csv_geo_file)
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
for i in range(len(df)):
    for j in range(len(df_geo)):
        if df.loc[i,'PostalCode']==df_geo.loc[j,'Postal Code']:
            df.loc[i,'Latitude'] = df_geo.loc[j,'Latitude']
            df.loc[i,'Longitude'] = df_geo.loc[j,'Longitude']
            break
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
