### Choropleth Map Maker with Tooltips
This leverages the other notebooks to produce a maps that highlight key datapoints.
At the moment, the highest payoff is in using geojson because of the tooltip functionality
the geojson source file has income related dictionaries that can drawn up a level in the data structure
and can be accessed direclty to produce maps that highlight different income sources and their geographic distribution

In [1]:
import folium
import pandas as pd
import json

In [2]:
from branca.colormap import linear

In [3]:
import geopandas as gpd

In [4]:
print(folium.__version__)

0.7.0


In [None]:
%ls

### Open the source file

In [5]:
# source file is produced by the 'Neighbourhoods from tinydb to Geojson' notebook
source_file = 'Kitchener_Waterloo_Neighbourhoods_2.geojson'

In [6]:
# open the json source file created by using the output from 
# 'Kitchener Waterloo Cambridge Neighbourhoods from tinydb to Geojson' notebook

def open_source(s_file):
    try:
        with open(s_file) as f:
            wrf = json.load(f)
            return wrf
    except:
        print('did not open!')
wr = open_source(source_file)

##### explore the geojson file

In [7]:
wr.keys()

dict_keys(['type', 'features'])

In [8]:
type(wr['features'])

list

In [9]:
wr['features'][0].keys()

dict_keys(['type', 'id', 'geometry', 'properties'])

In [10]:
wr['features'][0]['id']

'Central'

In [11]:
wr['features'][0]['properties']

{'City': 'Waterloo',
 'Federal': {'Average_Family_Size': '2.0',
  'Average_Number_of_Visits': '4.388888888888889',
  'Number_of_Families': '18',
  'Number_of_People': '36',
  'Total_Hampers': '79'},
 'Neighbourhood': 'Central',
 'Other': {'Average_Family_Size': '4.375',
  'Average_Number_of_Visits': '4.0',
  'Number_of_Families': '8',
  'Number_of_People': '35',
  'Total_Hampers': '32'},
 'Partial or full time employment': {'Average_Family_Size': '2.3636363636363638',
  'Average_Number_of_Visits': '2.787878787878788',
  'Number_of_Families': '33',
  'Number_of_People': '78',
  'Total_Hampers': '92'},
 'Provincial': {'Average_Family_Size': '1.6031746031746033',
  'Average_Number_of_Visits': '2.9682539682539684',
  'Number_of_Families': '126',
  'Number_of_People': '202',
  'Total_Hampers': '374'},
 'Unknown or None': {'Average_Family_Size': '1.736842105263158',
  'Average_Number_of_Visits': '2.736842105263158',
  'Number_of_Families': '19',
  'Number_of_People': '33',
  'Total_Hampers':

In [12]:
# the file contains summary data for the # of HH served with different sources of income
wr['features'][0]['properties']['Partial or full time employment']

{'Average_Family_Size': '2.3636363636363638',
 'Average_Number_of_Visits': '2.787878787878788',
 'Number_of_Families': '33',
 'Number_of_People': '78',
 'Total_Hampers': '92'}

### collections of points that can be useful landmarks

In [None]:
# useful landmarks
com_centre_dct = {
    'sunnydale' : [43.4875544, -80.5492773],
    'kingsdale' : [43.4288341, -80.4592296],
    'chandler' : [43.4225421, -80.4997861],
    'courtland shelley' : [43.422374, -80.4621208],
    'paulander' : [43.4404586, -80.5178292],
    'vic hills' : [43.4434202, -80.5226071],
    'Breithaupt' : [43.4649031, -80.496113],
    'Bridgeport' : [43.4827653, -80.4778566],
    'Centreville Chicopee' : [43.4293085, -80.4301851],
    'Country Hills' : [43.4112366, -80.48266389999999],
    'Doon Pioneer Park' : [43.3939, -80.43689499999999],
    'Downtown' : [43.452483, -80.48968099999999],
    'Forest Heights' : [43.4274383, -80.5276236],
    'Mill Courtland' : [43.4376378, -80.487627],
    'Rockway Centre' : [43.4397346, -80.466707],
    'Stanley Park' : [43.4479761, -80.4505083]
    }

In [37]:
# important values for initializing a map
kw = {'location': [43.451413, -80.492713], 'zoom_start': 11} # values to initialize folium object

### Geojson mapping

We can use the Geojson file to paint a map of the basic shape.  This is a good step to validate and make sure the file is formatted properly

In [None]:
# create a function that we can use format the appearance of different shapes based on "feature" properties
def city_style_parser(feature):
    '''
    This function is used to return a hex colour code 
    for a folium.Geojson style_function
    (refer to https://htmlcolorcodes.com/ for colour codes)
    it takes a json 'feature' parsed by folium.GeoJson
    it has keys ['type', 'geometry', 'properties']
    properties has keys
    ['City', 'Neibourhood', 'highlight', 'style']
    '''
    if feature['properties']['City'] == 'Waterloo':
        return '#ffff00'
    elif feature['properties']['City'] == 'Kitchener':
        return '#85C1E9'  
    else:
        return '#F5B7B1'

In [None]:

m = folium.Map(**kw) # initalize the map
folium.GeoJson(wr,
              style_function=lambda feature: {
        'fillColor': city_style_parser(feature),
        'color': 'black',
        'weight': 2,
        'dashArray': '5, 5'
    }).add_to(m) # add the geojson data
m # make sure it looks correct

## create  maps

### load reference data that will be used to create the range of colours

#### load # of households served by neighbourhood in 2018 and use that to create a colour scale

In [13]:
n_hoods = pd.read_csv('~/datascience/mapping_notebooks/output_files/2018_caseload_neighbourhood_totals.csv')

In [14]:
n_hoods.head()

Unnamed: 0.1,Unnamed: 0,Neighbourhood,Implied_City,Neighbourhood HH Total
0,0,VANIER,Kitchener,538
1,1,VICTORIA HILLS,Kitchener,488
2,2,CITY COMMERCIAL CORE,Kitchener,393
3,3,LAURENTIAN HILLS,Kitchener,363
4,4,CENTREVILLE CHICOPEE,Kitchener,304


In [15]:
# drop the index that was inserted and the city
n_hoods.drop(n_hoods.columns[[0,2]], axis=1, inplace=True)

In [16]:
n_hoods.head()

Unnamed: 0,Neighbourhood,Neighbourhood HH Total
0,VANIER,538
1,VICTORIA HILLS,488
2,CITY COMMERCIAL CORE,393
3,LAURENTIAN HILLS,363
4,CENTREVILLE CHICOPEE,304


In [17]:
# make a colour scale for use in a choropleth if we only want to key off of Number of Households served.  Otherwise, in
# later cells, we can use different criteria
# for references on the different colour scales
# https://nbviewer.jupyter.org/github/python-visualization/folium/blob/master/examples/Colormaps.ipynb
colormap = linear.YlGn_09.scale(
    n_hoods['Neighbourhood HH Total'].min(),
    n_hoods['Neighbourhood HH Total'].max())

print(colormap(5.0))

colormap

#ffffe3


In [18]:
# create a dictionary that can be used to create a chloropleth map
n_hood_dictionary = n_hoods.set_index('Neighbourhood')['Neighbourhood HH Total']

n_hood_dictionary['VANIER']

538

#### load # of households served by with different income sources and key data features of those households

In [19]:
nh_data = pd.read_csv('~/datascience/mapping_notebooks/output_files/2018_EFHP_Caseload_HH_NH_summary.csv')
# because the csv is made from a group by object there is some extra clutter and the header names are hard to parse
# so, rename the columns and drop the unneccesary layer of subcolumns that are an artifact of the group by
# the major columns are Neighbourhood, City,Total Number of Households,Household Primary Income Source,Number of Households
# Average family size,total number of people, Average number of visits, total number of visits

nh_data.columns = ['o_index', 'Neighbourhood', 'City', 
                 'Total_Number_of_Families', 'SOI', 'Number_of_Families',
                'Average_Family_Size', 'Number_of_People', 'Average_Number_of_Visits', 'Total_Hampers']
nh_data.drop(nh_data.index[0], inplace=True)
nh_data.drop(columns=['o_index'], inplace=True)
nh_data.Total_Number_of_Families = nh_data.Total_Number_of_Families.astype(int)
nh_data.drop(nh_data.columns[[1,2]], axis=1, inplace=True)

In [20]:
nh_data.head()

Unnamed: 0,Neighbourhood,SOI,Number_of_Families,Average_Family_Size,Number_of_People,Average_Number_of_Visits,Total_Hampers
1,VANIER,Provincial,287,2.770034843205575,795,3.4599303135888504,993
2,VANIER,Federal,96,3.1354166666666665,301,4.239583333333333,407
3,VANIER,Partial or full time employment,84,2.607142857142857,219,2.6904761904761907,226
4,VANIER,Unknown or None,45,2.8666666666666667,129,2.688888888888889,121
5,VANIER,Other,26,2.8076923076923075,73,3.576923076923077,93


In [21]:
sois = ['Provincial','Federal','Partial or full time employment','Unknown or None','Other']
soi_vs = ['Number_of_Families','Average_Family_Size','Number_of_People','Average_Number_of_Visits','Total_Hampers']
cheaders = ['Neighbourhood', 'SOI'] + soi_vs
h_names = set(n_hoods['Neighbourhood'])

In [22]:
nh_data[(nh_data['Neighbourhood']== 'Erbsville') & (nh_data['SOI'] == 'Federal')].empty

True

In [23]:
# insert default values if they are missing
for place in h_names:
    for source in sois:
        #tdf = nh_data[(nh_data['Neighbourhood']== place) & (nh_data['SOI'] == source)]
        if nh_data[(nh_data['Neighbourhood']== place) & (nh_data['SOI'] == source)].empty:
            df2a = pd.DataFrame([[place, source, 0, 0, 0, 0, 0]], columns=cheaders)
            nh_data = nh_data.append(df2a, ignore_index=True)
        

In [24]:
nh_data[(nh_data['Neighbourhood']== 'Erbsville') & (nh_data['SOI'] == 'Federal')].empty

False

In [25]:
nh_data.tail()


Unnamed: 0,Neighbourhood,SOI,Number_of_Families,Average_Family_Size,Number_of_People,Average_Number_of_Visits,Total_Hampers
380,HIDDEN VALLEY,Partial or full time employment,0,0,0,0,0
381,HIDDEN VALLEY,Unknown or None,0,0,0,0,0
382,HIDDEN VALLEY,Other,0,0,0,0,0
383,Laurelwood,Partial or full time employment,0,0,0,0,0
384,Laurelwood,Other,0,0,0,0,0


In [26]:
a = nh_data[(nh_data['Neighbourhood']== 'Rural East') & (nh_data['SOI'] == 'Other')]
a.empty

False

In [27]:
# v = soi_totals[(soi_totals['Neighbourhood'] == 'VANIER') & (soi_totals['SOI'] == 'Other')]
#nh_data[(nh_data['SOI'] == 'Provincial')]
income_dfs = {}
for soi in sois:
    income_dfs[soi]= (nh_data[(nh_data['SOI'] == soi)])
income_dfs['Provincial'].head()

Unnamed: 0,Neighbourhood,SOI,Number_of_Families,Average_Family_Size,Number_of_People,Average_Number_of_Visits,Total_Hampers
0,VANIER,Provincial,287,2.770034843205575,795,3.4599303135888504,993
5,VICTORIA HILLS,Provincial,290,2.9172413793103447,846,3.4827586206896552,1010
10,CITY COMMERCIAL CORE,Provincial,232,1.9267241379310345,447,3.2974137931034484,765
15,LAURENTIAN HILLS,Provincial,212,3.1226415094339623,662,3.3632075471698117,713
20,CENTREVILLE CHICOPEE,Provincial,179,2.7094972067039107,485,2.888268156424581,517


In [30]:
# filter out the unwanted cells that do not match the criteria we need.  In this case, all the SOI's that do not equal
# 'Provincial'
# change the datatypes of the df to numeric values, otherwise the next cell will fail
soi_criteria = 'Provincial'
soi_col = 'Number_of_Families'


soi_df = income_dfs[soi_criteria]
soi_df[soi_vs] = soi_df[soi_vs].apply(pd.to_numeric)
soi_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


Unnamed: 0,Neighbourhood,SOI,Number_of_Families,Average_Family_Size,Number_of_People,Average_Number_of_Visits,Total_Hampers
0,VANIER,Provincial,287,2.770035,795,3.45993,993
5,VICTORIA HILLS,Provincial,290,2.917241,846,3.482759,1010
10,CITY COMMERCIAL CORE,Provincial,232,1.926724,447,3.297414,765
15,LAURENTIAN HILLS,Provincial,212,3.122642,662,3.363208,713
20,CENTREVILLE CHICOPEE,Provincial,179,2.709497,485,2.888268,517


#### create an alternate colour scale to use in the custom geojson layer map below (m4)

In [31]:
# make a colour scale for use in a choropleth
# for references on the different colour scales
# https://nbviewer.jupyter.org/github/python-visualization/folium/blob/master/examples/Colormaps.ipynb


soi_df = income_dfs[soi_criteria]

soi_colormap = linear.YlGn_09.scale(
    soi_df[soi_col].min(),
    soi_df[soi_col].max())

print(soi_colormap(5.0))

soi_colormap

#feffdf


#### create a dictionary for use in m4 to key off of the number of households relying on the criteria chosen.  In this case  SOI= Provincial

In [33]:
# n_hood_dictionary = n_hoods.set_index('Neighbourhood')['Neighbourhood HH Total']
soi_dictionary = soi_df.set_index('Neighbourhood')[soi_col]
soi_dictionary

Neighbourhood
VANIER                             287
VICTORIA HILLS                     290
CITY COMMERCIAL CORE               232
LAURENTIAN HILLS                   212
CENTREVILLE CHICOPEE               179
MT. HOPE HURON PARK                188
MILL COURTLAND WOODSIDE PARK       160
Columbia                           131
SOUTHDALE                          129
Central                            126
CHERRY HILL                        134
STANLEY PARK                        89
FOREST HILL                         89
Lakeshore                           77
Willowdale                          83
Beechwood                           79
HERITAGE PARK                       74
KING EAST                           90
FOREST HEIGHTS                      65
WESTMOUNT                           77
LAURENTIAN WEST                     52
PIONEER PARK                        56
HIGHLAND WEST                       49
COUNTRY HILLS                       56
Lincoln                             67
CENTRAL FRE

In [34]:
# update the features dictionary with data that we want to use to make tooltips to show up on the map
# the folium Geojson method isn't able to dig down into dictionaries (or I don't know how to make it dig down)
# so we will need to bring the data up a level
# for an example of the structure of teh data refer to this:

wr['features'][0]['properties']['Provincial']

{'Average_Family_Size': '1.6031746031746033',
 'Average_Number_of_Visits': '2.9682539682539684',
 'Number_of_Families': '126',
 'Number_of_People': '202',
 'Total_Hampers': '374'}

In [35]:
# move the feature we want to use up a level in the geojson so we can use it as a tool tip value
# if we want to create different layers, you will need to add code to alter the labels of the data features in a way so that 
# they do not collide, i.e. if you want a layer for provincial and federal soi, add code that changes the keys from 
# 'Average_Family_Size' to 'Average_Family_Size_Provincial' so that they can be called up and used to add multiple layers
list_of_targets = ['Provincial', 'Federal', 'Partial or full time employment']


for nh in wr['features']: # for neighbourhood in list of features - see cell above for example of output dictionary
    for tgt in list_of_targets:
        d = nh['properties'].get(tgt, False) # i.e. 'Provincial'
        # pull the dictionary out and then seed the values one level up in the properties dictionary so we can access
        # them directly later with the geojson tooltip method.  this is used in m4 below

        if d:
            new_dictionary = {}
            for key in d.keys()
                new_key_string = '{}_{}'.format(tgt, key)
                new_dictionary[new_key_string] = d[key]
            nh['properties'].update(new_dictionary)

In [None]:
# alternatively, if we only want to use one feature execute this cell

for nh in wr['features']: # for neighbourhood in list of features - see cell above for example of output dictionary
    d = nh['properties'].get('Provincial', False) # i.e. 'Provincial'
    # pull the dictionary out and then seed the values one level up in the properties dictionary so we can access
    # them directly later with the geojson tooltip method.  this is used in m4 below
    if d:
        nh['properties'].update(d)

### Make a map showing number of HH living in neighbourhoods using geojson data

In [39]:
m = folium.Map(**kw)

folium.GeoJson(
    wr,
    name='households',
    style_function=lambda feature: {
        'fillColor': colormap(n_hood_dictionary.get(feature['properties']['Neighbourhood'], 0)),
        'color': 'black',
        'weight': 1,
        'dashArray': '5, 5',
        'fillOpacity': 0.9,
    }
).add_to(m)

folium.LayerControl().add_to(m)

colormap.caption = 'Number of Households Accessing EFHP in 2018'
colormap.add_to(m)


#m.save('~/datascience/mapping_notebooks/output_files/HH_total_neighbourhood_map.html')

m

### make a map with neighbourhood name's that popup when you hover over them

In [40]:
m3 = folium.Map(**kw)

''''Provincial': {'Average_Family_Size': '1.6031746031746033',
  'Average_Number_of_Visits': '2.9682539682539684',
  'Number_of_Families': '126',
  'Number_of_People': '202',
  'Total_Hampers': '374'}'''


folium.GeoJson(
    wr,
    name='households',
    style_function=lambda feature: {
        'fillColor': colormap(n_hood_dictionary.get(feature['id'], 0)),
        'color': 'black',
        'weight': 1,
        'dashArray': '5, 5',
        'fillOpacity': 0.9,
        'line_opacity':0.2},
    tooltip=folium.features.GeoJsonTooltip(fields=['Neighbourhood'],
                                              labels=False,
                                              sticky=False)
).add_to(m3)

folium.LayerControl().add_to(m)

colormap.caption = 'Number of Households Accessing EFHP in 2018'
colormap.add_to(m3)


#m3.save('~/datascience/mapping_notebooks/output_files/HH_total_neighbourhood_map_popup.html')
m3

In [41]:
m4 = folium.Map(**kw)
# ['Number_of_Families','Average_Family_Size','Number_of_People','Average_Number_of_Visits','Total_Hampers']
folium.GeoJson(
    wr,
    name='households with OW/ODSP as primary SOI',
    style_function=lambda feature: {
        'fillColor': soi_colormap(soi_dictionary.get(feature['id'], 0)),
        'color': 'black',
        'weight': 1,
        'dashArray': '5, 5',
        'fillOpacity': 0.9,
        'line_opacity':0.2},
    tooltip=folium.features.GeoJsonTooltip(fields=['Neighbourhood',
                                                  'Number_of_Families',
                                                  'Average_Family_Size',
                                                  'Number_of_People',
                                                  'Average_Number_of_Visits',
                                                  'Total_Hampers'],
                                             aliases=['<div style="background-color: lightyellow; color: black; padding: 3px; border: 2px solid black; border-radius: 3px;">'+item+'</div>' for item in ['Neighbourhood','Number_of_Families','Average_Family_Size','Number_of_People','Average_Number_of_Visits','Total_Hampers']],
                                             style="font-family: san serif;",
                                            localize=True)
).add_to(m4)

folium.LayerControl().add_to(m4)

soi_colormap.caption = 'Number of Households Accessing EFHP in 2018 on OW/ODSP'
soi_colormap.add_to(m4)


#m4.save('~/datascience/mapping_notebooks/output_files/HH_total_neighbourhood_map_popup_data_Prov.html')
m4

### Make a map using Choropleth Method, not Geojson

In [42]:
m5 = folium.Map(**kw)

folium.Choropleth(
    geo_data=wr,
    data=n_hoods,
    columns=['Neighbourhood', 'Neighbourhood HH Total'],
    key_on='feature.id',
    nan_fill_color='purple',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='# of HH accessing EFHP in 2018',
    highlight=True
    ).add_to(m5)


#m5.save('chloro_test.html')

m5