# Toronto Project (Part 2 of 3)

*Stefano Magarotto*

The second part consists in getting the latitude and the longitude coordinates of each neighborhood in order to utilize the Foursquare location data.

### Step 0. Recreate Part 1

In [2]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json # library to handle JSON files
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium # map rendering library
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df=pd.read_html(url, header=0)[0]
df.rename(columns={"Postcode": "PostalCode", "Neighbourhood":"Neighborhood"}, inplace=True)
df = df[df['Borough'] != 'Not assigned']
df["Neighborhood"] = df.groupby("PostalCode")["Neighborhood"].transform(lambda x: ', '.join(x))
df = df.drop_duplicates()
if(df.index.name != 'PostalCode'):
    df = df.set_index('PostalCode')
df['Neighborhood'].replace("Not assigned", df["Borough"],inplace=True)
df.head()

Unnamed: 0_level_0,Borough,Neighborhood
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Harbourfront, Regent Park"
M6A,North York,"Lawrence Heights, Lawrence Manor"
M7A,Queen's Park,Queen's Park


### Step 1. Getting the latitude and longitude from geocoder, even if is unreliable

Geocoder sometimes returns None instead of the latitude and longitude so we need a loop to make sure we received the coordinates. If it is too unreliable, we can use a csv file already prepared (Geospatial_Coordinates.csv)

In [4]:
# Option1
import geocoder
def get_geocoder(postal_code_from_df):
     # initialize your variable to None
     lat_lng_coords = None
     # loop until you get the coordinates
     while(lat_lng_coords is None):
       g = geocoder.google('{}, Toronto, Ontario'.format(postal_code_from_df))
       lat_lng_coords = g.latlng
     latitude = lat_lng_coords[0]
     longitude = lat_lng_coords[1]
     return latitude,longitude

#for i in range(0,len(df)): 
#     df['Latitude'], df['Longitude'] = zip(*df['PostalCode'].apply(get_geocoder))
        
#Option 2
df_ll = pd.read_csv("Geospatial_Coordinates.csv")
df_ll.columns = ["PostalCode", "Latitude", "Longitude"]
if(df_ll.index.name != 'PostalCode'):
    df_ll = df_ll.set_index('PostalCode')
    
df_ll.head()

Unnamed: 0_level_0,Latitude,Longitude
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476


### Step 2. Requested results

In [5]:
df = df.join(df_ll)
df.head()

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.753259,-79.329656
M4A,North York,Victoria Village,43.725882,-79.315572
M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
M7A,Queen's Park,Queen's Park,43.662301,-79.389494
