# Segmenting and Clustering Neighborhoods in Toronto pt. 2

Install required packages.

In [1]:
%%capture
!pip install -r requirements.txt

Load packages

In [2]:
import pandas as pd
import geocoder
import pickle
import requests
import os

Set constants

In [3]:
GEOCODE_CACHE = 'data/geodata.pickle'
POST_CODES_FILE = 'out/postal_codes.pickle'

Instantiate function that uses the geocode package to retrieve the geocoding response from the Google maps API and check if it contains the expected data.

In [4]:
def geocode(post_code):
    g = geocoder.google(f'{post_code}, Toronto, Ontario', components='country:CA', session=session)
    if g.geojson:
        geojson = g.geojson
        if 'features' in geojson and \
            len(geojson['features']) > 0 and \
            'properties' in geojson['features'][0] and \
            'lat' in geojson['features'][0]['properties']:
            return geojson
    return None

Instantiate an array to store the geocoding results. Load existing results if they exist.

In [5]:
geojson_collection = []
if os.path.isfile(GEOCODE_CACHE):
    with open(GEOCODE_CACHE, 'rb') as handle:
        geojson_collection = pickle.load(handle)

Instantiate function to lookup geocoding results stored in the array.

In [6]:
def lookup_postcode(post_code, geojson_collection):
    for geojson in geojson_collection:
        if geojson['features'][0]['properties']['postal'] == post_code:
            return geojson
    return None

Loop through the post codes and look up the geodata either from the array or using the geocoder package and store the result in back to the array.

In [7]:
postal_codes = pd.read_pickle(POST_CODES_FILE)

In [8]:
with requests.Session() as session:
    for i, row in postal_codes.iterrows():
        geojson = lookup_postcode(row.Postcode, geojson_collection)
        if not geojson:
            geojson = geocode(row.Postcode)
            if geojson:
                geojson_collection.append(geojson)
        if geojson:
            latitude = geojson['features'][0]['properties']['lat']
            longitude = geojson['features'][0]['properties']['lng']
            postal_codes.loc[postal_codes.Postcode == row.Postcode, 'latitude'] = latitude
            postal_codes.loc[postal_codes.Postcode == row.Postcode, 'longitude'] = longitude
            
postal_codes.to_pickle(POST_CODES_FILE)
with open(GEOCODE_CACHE, 'wb') as handle:
    pickle.dump(geojson_collection, handle, protocol=pickle.HIGHEST_PROTOCOL)
postal_codes

Unnamed: 0,Borough,Postcode,Neighbourhood,latitude,longitude
0,Central Toronto,M4N,Lawrence Park,43.728020,-79.388790
1,Central Toronto,M4P,Davisville North,43.712751,-79.390197
2,Central Toronto,M4R,North Toronto West,43.715383,-79.405678
3,Central Toronto,M4S,Davisville,43.704324,-79.388790
4,Central Toronto,M4T,"Moore Park, Summerhill East",43.689574,-79.383160
...,...,...,...,...,...
98,York,M6C,Humewood-Cedarvale,43.693781,-79.428191
99,York,M6E,Caledonia-Fairbanks,43.689026,-79.453512
100,York,M6M,"Del Ray, Keelesdale, Mount Dennis, Silverthorn",43.691116,-79.476013
101,York,M6N,"The Junction North, Runnymede",43.673185,-79.487262
