In [94]:
import googlemaps
import pandas as pd
import re
import numpy as np

In [95]:
import os
gmaps = googlemaps.Client(key=os.environ.get('GOOGLEMAPS_API_KEY'))

In [96]:
dtype = {year: np.int64 for year in range(2001,2020)}
df = pd.read_csv('data/census/1710013501-eng.csv', thousands=",", dtype=dtype)
df.head()

Unnamed: 0,Geography,2001,2002,2003,2004,2005,2006,2007,2008,2009,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,All census metropolitan areas and census agglo...,25121190,25464948,25745344,26035077,26332444,26666157,26972594,27313943,27682785,...,28365332,28733706,29086709,29425513,29680350,30059970,30482398,30979142,31485410,31869048
1,"All census metropolitan areas, Canada",21174905,21500787,21753380,22011568,22277962,22584463,22857870,23163156,23502329,...,24124407,24454805,24776585,25084157,25315512,25660482,26044849,26495277,26959203,27306305
2,"St. John's (CMA), Newfoundland and Labrador",176468,177626,179026,181059,181720,183847,186173,189575,193867,...,202376,204537,206257,208228,209191,211003,212144,212344,213204,214014
3,"Halifax (CMA), Nova Scotia",369252,374445,378091,380752,381940,384658,386402,389919,393688,...,402074,404376,405598,407061,408017,414015,421713,429895,439529,448544
4,"Moncton (CMA), New Brunswick",123093,124560,126103,127702,128823,130507,132060,133860,136211,...,140698,142871,144217,145708,146367,148467,150438,152560,155870,158695


In [97]:
# remove areas outside census areas + canada as a whole
not_cities = df[df["Geography"].str.contains(r"census", regex=True)]
cities = df.drop(index=not_cities.index)

In [98]:
# remove "(CA)" and "(CMA)" for city name
cities['city_name'] = cities['Geography'].map(lambda x: re.sub(r' \(CM?A\)', '', x))

In [100]:
# Feed each city name into googlemaps geocoding and store lat/lon results in data frame
def geocode(city):
    ''' Calls googlemaps geocode API using the formatted city_name row, 
        returns lat or lng from API call, as specified by direction'''
    results = gmaps.geocode(city)
    if results:
        return (results[0]['geometry']['location']['lat'], results[0]['geometry']['location']['lng'])
    else:
        print('No results for city: ', city)
        return (np.nan, np.nan)
    
# geocode each city and store lat/lon in data frame
cities['lat'], cities['lon'] = zip(*cities['city_name'].map(lambda x: geocode(x)))

# drop the cities we could not geocode (lat/lon will be np.nan)
cities.dropna(axis=0, inplace=True)

No results for city:  Campbellton, New Brunswick/Quebec
No results for city:  Hawkesbury, Ontario/Quebec


In [101]:
# Find the year-to-year change in population for each city
for year in range(2002, 2020): 
    cities['diff_' + str(year)] = cities[str(year)] - cities[str(year-1)] 

In [102]:
# Find the biggest increase and decrease and create a new column scaled between max ranges of -1 and 1
max_diff = cities.filter(regex='diff_').max(axis=1).max()
min_diff = cities.filter(regex='diff_').min(axis=1).min()
max_change = max(abs(min_diff), max_diff)
for year in range(2002, 2020): 
    cities['scaled_diff_' + str(year)] = cities["diff_"+ str(year)] / max_change

In [87]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

for year in range(2002, 2020):
    cities['sig_diff_' + str(year)] = cities['scaled_diff_' + str(year)].apply(sigmoid)

In [104]:
# Create a new column that chooses a colour on a spectrum between red->light red for popl decline, light green->green based on pop growth

# The relative population change is broken up over 20 shades between -1 -> 0 and 20 shades between 0 -> 1
# We  find where each city's relative population change is between -1 and 1 and assign the appropriate shade of colour

from colour import Color
color_steps = 20

red = Color("red")
light_red = Color(rgb=(255/255,204/255,203/255))
neg_colors = list(red.range_to(light_red,color_steps))

light_green = Color(rgb=(144/255, 238/255, 144/255))
green = Color("green")
pos_colors = list(light_green.range_to(green, color_steps))

def get_color_spectrum(row, year):
    x = row['scaled_diff_' + str(year)]
    if x>=0.:
        relative_change = np.linspace(0.,1.,color_steps)
        spectrum = pos_colors
    else:
        relative_change = np.linspace(-1.,0.,color_steps)
        spectrum = neg_colors
    
    idx = list(map(lambda i: i >= x, relative_change)).index(True)
    color = [int(c*255) for c in spectrum[idx].rgb]
        
    return color
    
for year in range(2002, 2020):
    cities['color_spec_' + str(year)] = cities.apply(get_color_spectrum, args=(year,), axis=1)

In [105]:
cities.to_csv('data/cities.csv')

In [2]:
#from ast import literal_eval
def load_data():
    dtype = {"diff_" + str(year): np.int64 for year in range(2001,2020)}
    dtype['lat'] = np.float64
    dtype['lon'] = np.float64
    data =  pd.read_csv('data/cities.csv', dtype=dtype)
    for year in range(2002,2020):
        #data['color_'+str(year)] = data['color_'+str(year)].apply(literal_eval)
        data['abs_'+str(year)] = data['diff_'+str(year)].abs().apply(np.log2)
    return data

cities = load_data()

In [4]:
from colour import Color
red = Color("red")
colors = list(red.range_to(Color("green"),20))
    
a = -0.5
b = 0
c = 0.5
d = 1

import numpy as np

index



10

In [19]:
[int(c*255) for c in red.rgb]

[255, 0, 0]