<h1>Adding latitude & longitude to Canada Postal Code DataFrame</h1>

Import Required Libraries

In [1]:
import pandas as pd
import geocoder

<ol>
    <li>Read Canada's Postal code table from the url</li>
    <li>Remove any unassigned Borough</li>
<li>Replace any unassigned Neighbourhood with Borough name</li>
<li>Any repeated postal code is merged and Neighborhoods are combined with a ","</li>
 </ol>

In [2]:
#Read Canada's Postal code table from the url
df_Canada=pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
df_Canada=df_Canada.rename(columns={"Neighbourhood": "Neighborhood", "Community": "Borough"})
#Remove any unassigned Borough
df_Canada=df_Canada[df_Canada.Borough != 'Not assigned'] # removing unassigned borough
df_Canada.reset_index(drop=True)

#Replace unassigned neighbourhood with borough name
i=0    
while(i<103):
    if (df_Canada.iloc[i,2]=="Not assigned"):
        df_Canada.iloc[i,2]=df_Canada.iloc[i,1]    
    i=i+1
df_Canada.reset_index(drop=True)

#Any repeated postal code is merged and Neighborhoods are combined with a ","
df_Canada=df_Canada.groupby(['Postal Code','Borough'])['Neighborhood'].apply(lambda x: ','.join(x.astype(str))).reset_index()
df_Canada

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


Option 1: Adding lat and long to the Canada postal code dataframe using geocoder

In [3]:
lat_long=None
lat=[]
long=[]
df_Canada2=df_Canada.copy()
postal_code=df_Canada2['Postal Code'].tolist()
for code in postal_code:
    while(lat_long==None):
        g=geocoder.arcgis('{},Toronto,Ontario'.format(code))
        lat_long=g.latlng
    lat.append(lat_long[0])
    long.append(lat_long[1])
    lat_long=None
df_Canada2["Latitude"]=lat
df_Canada2["Longitude"]=long

Option 2: Read lat & long csv file provided in coursera then merge it with Canada Postal code dataframe

In [4]:
df_geocode=pd.read_csv("https://cocl.us/Geospatial_data")
df_Canada = pd.merge(df_Canada,df_geocode, on='Postal Code')
df_Canada

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
