In [94]:
import googlemaps
import pandas as pd
import re
import numpy as np

In [95]:
import os
gmaps = googlemaps.Client(key=os.environ.get('GOOGLEMAPS_API_KEY'))

In [96]:
dtype = {year: np.int64 for year in range(2001,2020)}
df = pd.read_csv('data/census/1710013501-eng.csv', thousands=",", dtype=dtype)
df.head()

Unnamed: 0,Geography,2001,2002,2003,2004,2005,2006,2007,2008,2009,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,All census metropolitan areas and census agglo...,25121190,25464948,25745344,26035077,26332444,26666157,26972594,27313943,27682785,...,28365332,28733706,29086709,29425513,29680350,30059970,30482398,30979142,31485410,31869048
1,"All census metropolitan areas, Canada",21174905,21500787,21753380,22011568,22277962,22584463,22857870,23163156,23502329,...,24124407,24454805,24776585,25084157,25315512,25660482,26044849,26495277,26959203,27306305
2,"St. John's (CMA), Newfoundland and Labrador",176468,177626,179026,181059,181720,183847,186173,189575,193867,...,202376,204537,206257,208228,209191,211003,212144,212344,213204,214014
3,"Halifax (CMA), Nova Scotia",369252,374445,378091,380752,381940,384658,386402,389919,393688,...,402074,404376,405598,407061,408017,414015,421713,429895,439529,448544
4,"Moncton (CMA), New Brunswick",123093,124560,126103,127702,128823,130507,132060,133860,136211,...,140698,142871,144217,145708,146367,148467,150438,152560,155870,158695


In [97]:
# remove areas outside census areas + canada as a whole
not_cities = df[df["Geography"].str.contains(r"census", regex=True)]
cities = df.drop(index=not_cities.index)

In [98]:
# remove "(CA)" and "(CMA)" for city name
cities['city_name'] = cities['Geography'].map(lambda x: re.sub(r' \(CM?A\)', '', x))

In [100]:
# Feed each city name into googlemaps geocoding and store lat/lon results in data frame
def geocode(city):
    ''' Calls googlemaps geocode API using the formatted city_name row, 
        returns lat or lng from API call, as specified by direction'''
    results = gmaps.geocode(city)
    if results:
        return (results[0]['geometry']['location']['lat'], results[0]['geometry']['location']['lng'])
    else:
        print('No results for city: ', city)
        return (np.nan, np.nan)
    
# geocode each city and store lat/lon in data frame
cities['lat'], cities['lon'] = zip(*cities['city_name'].map(lambda x: geocode(x)))

# drop the cities we could not geocode (lat/lon will be np.nan)
cities.dropna(axis=0, inplace=True)

No results for city:  Campbellton, New Brunswick/Quebec
No results for city:  Hawkesbury, Ontario/Quebec


In [101]:
# Find the year-to-year change in population for each city
for year in range(2002, 2020): 
    cities['diff_' + str(year)] = cities[str(year)] - cities[str(year-1)] 

In [102]:
# Find the biggest increase and decrease and create a new column scaled between max ranges of -1 and 1
max_diff = cities.filter(regex='diff_').max(axis=1).max()
min_diff = cities.filter(regex='diff_').min(axis=1).min()
max_change = max(abs(min_diff), max_diff)
for year in range(2002, 2020): 
    cities['scaled_diff_' + str(year)] = cities["diff_"+ str(year)] / max_change

In [87]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

for year in range(2002, 2020):
    cities['sig_diff_' + str(year)] = cities['scaled_diff_' + str(year)].apply(sigmoid)

In [104]:
# Create a new column that chooses a colour on a spectrum between red->light red for popl decline, light green->green based on pop growth

# The relative population change is broken up over 20 shades between -1 -> 0 and 20 shades between 0 -> 1
# We  find where each city's relative population change is between -1 and 1 and assign the appropriate shade of colour

from colour import Color
color_steps = 20

red = Color("red")
light_red = Color(rgb=(255/255,204/255,203/255))
neg_colors = list(red.range_to(light_red,color_steps))

light_green = Color(rgb=(144/255, 238/255, 144/255))
green = Color("green")
pos_colors = list(light_green.range_to(green, color_steps))

def get_color_spectrum(row, year):
    x = row['scaled_diff_' + str(year)]
    if x>=0.:
        relative_change = np.linspace(0.,1.,color_steps)
        spectrum = pos_colors
    else:
        relative_change = np.linspace(-1.,0.,color_steps)
        spectrum = neg_colors
    
    idx = list(map(lambda i: i >= x, relative_change)).index(True)
    color = [int(c*255) for c in spectrum[idx].rgb]
        
    return color
    
for year in range(2002, 2020):
    cities['color_spec_' + str(year)] = cities.apply(get_color_spectrum, args=(year,), axis=1)

In [105]:
cities.to_csv('data/cities.csv')

In [2]:
#from ast import literal_eval
def load_data():
    dtype['lat'] = np.float64
    dtype['lon'] = np.float64
    data =  pd.read_csv('data/cities.csv', dtype=dtype)
    return data

cities = load_data()

In [106]:
import geopandas as geo

gdf = geo.read_file('data/gis/census_boundaries/lcma000b16a_e.shp')

In [114]:
cities['CMANAME'], cities['PRNAME'] = zip(*cities['city_name'].map(lambda x: x.split(",")))

In [115]:
cities['province']

2       Newfoundland and Labrador
3                     Nova Scotia
4                   New Brunswick
5                   New Brunswick
6                          Quebec
                  ...            
158              British Columbia
159              British Columbia
160              British Columbia
161                         Yukon
162         Northwest Territories
Name: province, Length: 158, dtype: object

In [113]:
gdf

Unnamed: 0,CMAUID,CMAPUID,CMANAME,CMATYPE,PRUID,PRNAME,geometry
0,512,35512,Brockville,D,35,Ontario,"MULTIPOLYGON (((7494625.237 1128257.371, 74950..."
1,442,24442,Trois-Rivières,B,24,Quebec / Québec,"MULTIPOLYGON (((7685035.983 1380936.549, 76866..."
2,444,24444,Shawinigan,D,24,Quebec / Québec,"POLYGON ((7634181.689 1409252.829, 7634554.614..."
3,447,24447,Drummondville,K,24,Quebec / Québec,"POLYGON ((7705178.249 1332485.077, 7705186.949..."
4,450,24450,Granby,K,24,Quebec / Québec,"POLYGON ((7703435.440 1270659.020, 7703893.906..."
...,...,...,...,...,...,...,...
151,845,48845,Cold Lake,D,48,Alberta,"POLYGON ((5048702.580 2200109.577, 5048763.483..."
152,850,48850,Grande Prairie,K,48,Alberta,"POLYGON ((4566556.417 2468181.671, 4565974.171..."
153,860,48860,Wood Buffalo,K,48,Alberta,"POLYGON ((5154537.534 2795507.449, 5155183.874..."
154,865,48865,Wetaskiwin,D,48,Alberta,"POLYGON ((4804472.494 2108080.594, 4804278.609..."


In [139]:
df_with_geo = cities.merge(gdf, on='CMANAME', indicator=True, how='left')
df_with_geo[df_with_geo._merge == 'left_only']


Unnamed: 0,Geography,2001,2002,2003,2004,2005,2006,2007,2008,2009,...,province,CMANAME,PRNAME_x,CMAUID,CMAPUID,CMATYPE,PRUID,PRNAME_y,geometry,_merge
9,"Ottawa - Gatineau (CMA), Ontario/Quebec",1127684,1144036,1159243,1170640,1181489,1189979,1205632,1225487,1246116,...,Ontario/Quebec,Ottawa - Gatineau,Ontario/Quebec,,,,,,,left_only
10,"Ottawa - Gatineau (CMA), Ontario part, Ontario",854593,867412,877683,884435,891423,896678,907573,921850,936513,...,Ontario part,Ottawa - Gatineau,Ontario part,,,,,,,left_only
11,"Ottawa - Gatineau (CMA), Quebec part, Quebec",273091,276624,281560,286205,290066,293301,298059,303637,309603,...,Quebec part,Ottawa - Gatineau,Quebec part,,,,,,,left_only
25,"Greater Sudbury (CMA), Ontario",164210,163810,164213,164622,165278,166751,167308,168016,168148,...,Ontario,Greater Sudbury,Ontario,,,,,,,left_only
51,"Campbellton (CA), New Brunswick part, New Brun...",15668,15372,15240,14998,14816,14592,14444,14325,14187,...,New Brunswick part,Campbellton,New Brunswick part,,,,,,,left_only
52,"Campbellton (CA), Quebec part, Quebec",3008,3004,3012,3015,3057,3091,3148,3206,3276,...,Quebec part,Campbellton,Quebec part,,,,,,,left_only
75,"Hawkesbury (CA), Ontario part, Ontario",10702,10829,10986,11103,11189,11201,11098,11015,10931,...,Ontario part,Hawkesbury,Ontario part,,,,,,,left_only
76,"Hawkesbury (CA), Quebec part, Quebec",1341,1356,1362,1344,1345,1415,1386,1399,1478,...,Quebec part,Hawkesbury,Quebec part,,,,,,,left_only
117,"Lloydminster (CA), Alberta/Saskatchewan",24777,25339,26064,26720,27380,28087,29142,29909,30737,...,Alberta/Saskatchewan,Lloydminster,Alberta/Saskatchewan,,,,,,,left_only
118,"Lloydminster (CA), Alberta part, Alberta",13608,14175,14732,15205,15815,16560,17297,17769,18189,...,Alberta part,Lloydminster,Alberta part,,,,,,,left_only


In [128]:
gdf.loc[gdf['CMANAME']=='Thunder Bay']

Unnamed: 0,CMAUID,CMAPUID,CMANAME,CMATYPE,PRUID,PRNAME,geometry
110,595,35595,Thunder Bay,B,35,Ontario,"MULTIPOLYGON (((6444029.840 1406040.931, 64441..."


In [129]:
cities.loc[cities['CMANAME']=='Thunder Bay']

Unnamed: 0,Geography,2001,2002,2003,2004,2005,2006,2007,2008,2009,...,color_spec_2014,color_spec_2015,color_spec_2016,color_spec_2017,color_spec_2018,color_spec_2019,city,province,CMANAME,PRNAME
28,"Thunder Bay (CMA), Ontario",126696,126776,127369,127619,127570,127090,125977,125440,125043,...,"[255, 203, 202]","[255, 203, 202]","[131, 237, 131]","[131, 237, 131]","[131, 237, 131]","[131, 237, 131]",Thunder Bay,Ontario,Thunder Bay,Ontario
