### Create geojson files for mapping 
This script takes a database of shape coordinates and builds a geojson file that can be used to generate maps with folium.
The first step is to find the shape file containing the coordinates and convert it to geojson with the help of http://gipong.github.io/shp2geojson.js/ or some of the helper scripts using Fiona
Step two is to merge the files we want to use with the help of the "geojson explorer and tinydb neighbourhood geometry database creator" notebook
Step three is this...


In [None]:
from tinydb import TinyDB, Query
import geojson
from geojson import Polygon, FeatureCollection, Feature

In [None]:
import pandas as pd

In [None]:
from collections import defaultdict

In [None]:
%ls *.json

#### Open the tinydb of shapes and basic labels

In [None]:
db = TinyDB('Waterloo_Kitchener_Cambridge.json') # previosly created by another notebook

WR = db.all()

if len(WR) != 0:
    print('data loaded!')
else:
    print('check file - nothing loaded')

In [None]:
%ls ~/datascience/mapping_notebooks/source_files/

### prep additional data to add to the geojson file

In [None]:
# read in the summary of neighbourhoods and the HH that live in them to add more details to the geojson file for use later
# in map making
# this source file is created by the Neighbourhood Discovery Tool
nh_data = pd.read_csv('~/datascience/mapping_notebooks/output_files/2018_EFHP_Caseload_HH_NH_summary.csv')

In [None]:
# because the csv is made from a group by object there is some extra clutter and the header names are hard to parse
# so, rename the columns and drop the unneccesary layer of subcolumns that are an artifact of the group by
nh_data.columns = ['o_index', 'Neighbourhood', 'City', 
                 'Total_Number_of_Families', 'SOI', 'Number_of_Families',
                'Average_Family_Size', 'Number_of_People', 'Average_Number_of_Visits', 'Total_Hampers']
nh_data.drop(nh_data.index[0], inplace=True)
nh_data.drop(columns=['o_index'], inplace=True)
nh_data.Total_Number_of_Families = nh_data.Total_Number_of_Families.astype(int)
nh_data.head()

In [None]:
# create a dictionary mapping neighbourhood to total hh served
# this can be changed or expanded by keying off of other statistics such as Number of People, Total Hampers, etc. 
fam_totals = nh_data[['Neighbourhood', 'Total_Number_of_Families']]
fam_totals.drop_duplicates(inplace=True, keep='last')
ftd = fam_totals.set_index('Neighbourhood')['Total_Number_of_Families'].to_dict()

In [None]:
# drop out the data that we don't need.  Focus on the SOI values
soi_totals = nh_data[['Neighbourhood',
                      'SOI','Number_of_Families',
                      'Average_Family_Size',
                      'Number_of_People',
                      'Average_Number_of_Visits',
                      'Total_Hampers']]
soi_totals.head()

In [None]:
soi_totals.tail()

In [None]:
# create containers of Neighbourhood names and Source of income types
hds = list(soi_totals['Neighbourhood'].drop_duplicates(keep='last'))
sois = set(soi_totals['SOI'])

In [None]:
# example values demonstrating the structure of what we need
v = soi_totals[(soi_totals['Neighbourhood'] == 'VANIER') & (soi_totals['SOI'] == 'Other')].values.tolist()
v[0][1:]

In [None]:
# use soi_totals to create a tree of dictionaries that we can write into the geojson file for use later
sub_headings = ['Number_of_Families', 'Average_Family_Size', 
                'Number_of_People', 'Average_Number_of_Visits', 'Total_Hampers']
zeros = ['0','0','0','0','0']

nvaldct = {} # a dictionary of 'Neigbhourhood' : dictionary of SOIs that return values, or zeros

for h in hds: # for n hood in list of n hoods
    srcs = {}
    for isrc in sois: # for soi in sources of income
        # turn the row into a list
        vlu = soi_totals[(soi_totals['Neighbourhood'] == h) & (soi_totals['SOI'] == isrc)].values.tolist()
        if vlu: # if the list is not empty
            od = dict(zip(sub_headings, vlu[0][2:])) # maps the headings onto the values to make a dictionary
            srcs[isrc] = od
        else: # if the list is empty
            ed = dict(zip(sub_headings, zeros)) # make a dictionary of zero values
            srcs[isrc] = ed
    nvaldct[h] = srcs # create a key in the dictionary of neighbourhoods that returns a dictionary of income sources and 
                      # values for it

In [None]:
nvaldct['VANIER']['Federal'] # example of a full set of info

In [None]:
nvaldct['VANIER']['Federal']['Average_Family_Size']

In [None]:
nvaldct['HIDDEN VALLEY'] # example of default values written in 

In [None]:
len(nvaldct)

In [None]:
# https://nbviewer.jupyter.org/github/jtbaker/folium/blob/geojsonmarker/examples/GeoJsonMarkersandTooltips.ipynb              

In [None]:
def create_feature_list(tiny_db_file, exclusions=None, xtra_properties={}):
    '''
    iterate through a tiny_db_file and build a list of features
    that can be used to create a feature collection object
    the tiny_db_file is contains Neighbourhood and City fields 
    
    exclusions: a list of Community Name strings in the database
    that should not be included in the feature list in situations 
    where it is helpful to create maps with only specific cities on them
    
    xtra_properties: is a dictionary keyed off community names
    that contains additional properties to include
    '''
       
    feature_list = [] # container to hold onto the features
    
    for community in WR:
        hood_name = community['name'] # neighbourhood name
        
        community_name = community['city'] # city name
        coords = [(x[1], x[0]) for x in community['coords']]
        
        poly = Polygon([coords])
        properties_dct = {**xtra_properties.get(hood_name, {}), # if there are some
                          'City': community_name, 
                          'Neighbourhood': hood_name}
        
           
        if community_name not in exclusions: # eliminate this if statement to include Cambridge
            features = Feature(geometry = poly, 
                               properties = properties_dct, 
                               id = hood_name)
            feature_list.append(features)
    return feature_list
    

In [None]:
feat_list = create_feature_list(WR, exclusions='Cambridge', xtra_properties=nvaldct)
len(feat_list) # make sure there is data

In [None]:
feat_list[0]

In [None]:
kwc_fc = FeatureCollection(feat_list) # create the feature collection

In [None]:
# write the data out to a file
with open('Kitchener_Waterloo_Neighbourhoods_2.geojson', 'w') as outfile:
      geojson.dump(kwc_fc, outfile)

print('all done')