## Get the latitude and longitude of Toronto's neighbourhoods

In [1]:
import pandas as pd
from opencage.geocoder import OpenCageGeocode
from pprint import pprint
import os

In [2]:
key = 'e72498be68814d2bb71995226221d6fa'

#key = os.environ['OPENCAGE_KEY']
geocoder = OpenCageGeocode(key)

#### Test that reverse geocoding works

In [3]:
results = geocoder.reverse_geocode(44.8303087, -0.5761911)
pprint(results)

[{'annotations': {'DMS': {'lat': "44° 49' 49.11132'' N",
                          'lng': "0° 34' 34.28796'' E"},
                  'MGRS': '30TXQ9159666958',
                  'Maidenhead': 'IN94rt09ug',
                  'Mercator': {'x': -64141.3, 'y': 5564710.789},
                  'OSM': {'edit_url': 'https://www.openstreetmap.org/edit?node=2754853042#map=17/44.83031/-0.57619',
                          'url': 'https://www.openstreetmap.org/?mlat=44.83031&mlon=-0.57619#map=17/44.83031/-0.57619'},
                  'UN_M49': {'regions': {'EUROPE': '150',
                                         'FR': '250',
                                         'WESTERN_EUROPE': '155',
                                         'WORLD': '001'},
                             'statistical_groupings': ['MEDC']},
                  'callingcode': 33,
                  'currency': {'alternate_symbols': [],
                               'decimal_mark': ',',
                               'html_entity': 

#### Test that geocoding works 

In [4]:
postal_code = 'M5G'
results = geocoder.geocode('{}, Toronto, Ontario'.format(postal_code))
print(u'%f;%f;%s;%s' % (results[0]['geometry']['lat'], 
                        results[0]['geometry']['lng'],
                        results[0]['components']['country_code'],
                        results[0]['annotations']['timezone']['name']))


43.656400;-79.386000;ca;America/Toronto


#### Read the data containing the postcodes, borough and neighbourhood information for Toronto

In [5]:
df = pd.read_csv('./data/toronto_neighbourhoods.csv', sep=';')
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


Define a function that returns the coordinates of a location given it's postal code and knowing that we are in the city of Toronto

In [6]:
def get_latlng(row, geocoder):
    postal_code = row.Postcode
    results = geocoder.geocode('{}, Toronto, Ontario'.format(postal_code))
    return ('{};{}'.format(results[0]['geometry']['lat'], results[0]['geometry']['lng'])) 

In [7]:
df['latlng'] = df.apply(get_latlng, axis=1, args=(geocoder,))
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,latlng
0,M1B,Scarborough,"Rouge,Malvern",43.653963;-79.387207
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.653963;-79.387207
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.7678;-79.1866
3,M1G,Scarborough,Woburn,43.7657168;-79.2218984
4,M1H,Scarborough,Cedarbrae,43.7686;-79.2389


#### Split the latitude and Longitude into separate columns

In [8]:
df['Latitude'] = df['latlng'].apply(lambda x: x.split(';')[0])
df['Longitude'] = df['latlng'].apply(lambda x: x.split(';')[1])

In [9]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,latlng,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.653963;-79.387207,43.653963,-79.387207
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.653963;-79.387207,43.653963,-79.387207
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.7678;-79.1866,43.7678,-79.1866
3,M1G,Scarborough,Woburn,43.7657168;-79.2218984,43.7657168,-79.2218984
4,M1H,Scarborough,Cedarbrae,43.7686;-79.2389,43.7686,-79.2389


In [10]:
df[df['Postcode']=='M5G'].head()

Unnamed: 0,Postcode,Borough,Neighbourhood,latlng,Latitude,Longitude
57,M5G,Downtown Toronto,Central Bay Street,43.6564;-79.386,43.6564,-79.386


# Backup for further use

In [11]:
df.to_csv('./data/toronto_neighborhoods.csv', index=False, sep=';')