<h1><span style="color:red">Generate Aggregate Maps</span></h1>

### This notebook reads numeric and categorical variables from the survey dataset and lets users compute a cloropleth map of a variable of interest aggregated by a spatial grouping, adds it to a new survey version, and publishes the survey to the user's surveys gallery

## 1. Retrieve survey parameters from the URL

In [None]:
%%javascript
function getQueryStringValue (key)
{  
    return unescape(window.location.search.replace(new RegExp("^(?:.*[&\\?]" + escape(key).replace(/[\.\+\*]/g, "\\$&") + "(?:\\=([^&]*))?)?.*$", "i"), "$1"));
}
IPython.notebook.kernel.execute("survey_url='".concat(getQueryStringValue("surveyurl")).concat("'"));
IPython.notebook.kernel.execute("views='".concat(getQueryStringValue("views")).concat("'"));
IPython.notebook.kernel.execute("view='".concat(getQueryStringValue("view")).concat("'"));
IPython.notebook.kernel.execute("user='".concat(getQueryStringValue("user")).concat("'"));
IPython.notebook.kernel.execute("csv_file='".concat(getQueryStringValue("csv")).concat("'")); 
IPython.notebook.kernel.execute("dzc_file='".concat(getQueryStringValue("dzc")).concat("'")); 
IPython.notebook.kernel.execute("params='".concat(getQueryStringValue("params")).concat("'")); 
IPython.notebook.kernel.execute("active_object='".concat(getQueryStringValue("activeobject")).concat("'")); 
IPython.notebook.kernel.execute("full_notebook_url='" + window.location + "'"); 

In [None]:
# common imports
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import Markdown, display
from ipyfilechooser import FileChooser

import geopandas as gpd
import pandas as pd    
import numpy as np
import panel as pn
import fiona
import re
import json
import io
import os

pn.extension()
def printmd(string):
    display(Markdown(string))

absolutePath = "../../temp_csvs/"

# local imports
import sys
sys.path.insert(1, '../../helpers')
import panel_libs as panellibs
import suave_integration as suaveint

## 2. Read the survey file

In [None]:
# read the csv file
df = panellibs.extract_data(absolutePath + csv_file)

# create a list of variable names
variables_df = pd.DataFrame({'varname':df.columns})
printmd("<b><span style='color:red'>All variables in the survey file:</span></b>")
print(variables_df.varname.values)

## 3. Select variable of interest and spatial grouping

In [None]:
# select variable to visualize aggregate zip code data
var_selector = pn.widgets.Select(name='Variable of Interest', options = df.columns.to_list())
options = ['numerical', 'categorical']
tag_selector = pn.widgets.Select(name='SuAVE Variable Type', options = options)

# select spatial variable for aggregation in input dataframe -- (zip codes, states, counties, tracts, etc.)
grouping_selector = pn.widgets.Select(name='Spatial Grouping Variable', options = df.columns.to_list())

pn.Column(pn.Row(var_selector, tag_selector), grouping_selector)

In [None]:
# retrieve variable of interest's name
selected = var_selector.value
suave_tag = tag_selector.value

# retrieve grouping variable's name
grouping_var = grouping_selector.value
print('Selected Variable of Interest: ' + selected + '\n' +
     'Selected Variable Type: ' + suave_tag + '\n' +
     'Selected Spatial Grouping Variable: ' + grouping_var)

## 4. Select spatial grouping geometry mapping file

Locally upload a geometry mapping file (up to 10 MB) or select one from the working directory.

In [None]:
# define accepted geometry mapping file formats
file_input = pn.widgets.FileInput(accept='.csv, .xlsx, .json, .zip, .geojson')
fc = FileChooser()
fc.filter_pattern = ['*.csv', '*.xlsx', '*.json', '*.zip', '*.geojson']

<span style="color:red">Skip the next cell if selecting a mapping file from the working directory</span>

In [None]:
# locally upload a geometry mapping file
file_input

In [None]:
# display locally uploaded mapping file
if file_input.filename != None:
    extension = file_input.filename.split('.')[1]
    if extension == 'csv':
        data = io.StringIO(file_input.value.decode('utf-8'))
        geo_df = pd.read_csv(data)
    elif extension == 'xlsx':
        data = io.BytesIO()
        data.write(file_input.value)
        data.seek(0)
        geo_df = pd.read_excel(data) 
    elif extension == 'json':
        data = file_input.value
        data = data.decode('utf-8')
        d = json.dumps(data)
        geo_df = pd.read_json(eval(d))
    elif extension == 'zip':
        data = file_input.value
        with fiona.BytesCollection(data) as file:
            geo_df = gpd.GeoDataFrame.from_features(file, crs=file.crs)
    else:
        geo_df = gpd.read_file(io.BytesIO(file_input.value))

<span style="color:red">Skip the next cell if mapping file has been uploaded locally</span>

In [None]:
# select geometry mapping file from working directory
display(fc)

In [None]:
# display mapping file
if fc.selected != None:
    file_extension = fc.selected.split('.')[1]
    if file_extension == 'csv':
        geo_df = pd.read_csv(fc.selected)
    elif file_extension == 'xlsx':
        geo_df = pd.read_excel(fc.selected)
    elif file_extension == 'json':
        geo_df = pd.read_json(fc.selected)
    else:
        geo_df = gpd.read_file(fc.selected)

geo_df = geo_df.dropna(axis=0) # drop NaNs if they exist in the mapping file
geo_df['None'] = None # placeholder columnm for no selection
geo_df.head()

In [None]:
# select relevant geometry column(s) from mapping file - (label col, merge col, geometry col)
label_selector = pn.widgets.Select(name='Select Geometry Label Column',value='None',options=geo_df.columns.to_list())
merge_selector = pn.widgets.Select(name='Select Merging Column',value='None',options = geo_df.columns.to_list())
geo_selector = pn.widgets.Select(name='Select Geometry Column',value='None',options = geo_df.columns.to_list())
pn.Column(label_selector,merge_selector,geo_selector)

In [None]:
# columns selected:
cols = [label_selector.value, merge_selector.value, geo_selector.value]
merge_col = merge_selector.value
geometry_col = geo_selector.value
print('Selected Geometry Label Column: ' + label_selector.value + '\n'
     'Selected Merging Column: ' + merge_col + '\n'
     'Selected Geometry Column: '  + geometry_col)

In [None]:
# ensure spatial entities in merging column are all unique
unique_entries = len(geo_df[merge_col].unique())
total_entries = geo_df.shape[0]
if unique_entries != total_entries:
    raise Exception('Non-unique spatial entities found in mapping file. Clean or engineer ' +
                    'merging column to contain a unique spatial entity per row in the mapping file.')

<h2><span style="color:red">4a. Optional: Clean spatial grouping variable and merging column</span></h2>


Replace this code block with your own cleaning functions prior to map creation. Skip this cell if the spatial grouping variable is cleaned in the original survey dataframe and the merging column is cleaned in the mapping file.

In [None]:
# sample cleaning functions
def clean_zip(data):
    """
    Cleans zip codes separated by '-' and returns as float
    """
    if type(data) == float:
        return data
    elif '-' in data:
        return float(data.split('-')[0])
    return float(data)

def clean_spaces(data):
    """
    Removes unnecessary whitespace from data
    """
    data = data.strip(' ')
    data = re.sub(r"\s+", " ", data)
    return data

# Ex. cleaning spatial grouping variable and merging column

# df[grouping_var] = df[grouping_var].apply(clean_zip)

# df[grouping_var] = df[grouping_var].apply(clean_spaces)
# geo_df[merge_col] = geo_df[merge_col].apply(clean_spaces)

# Ex. engineering merging column to contain all unique entries

# add col to ukraine mapping file
# geo_df[merge_col] = geo_df[merge_col] + ' -' + geo_df[cols[0]]
# geo_df.head()

## 5. Generate aggregate map CSV file

In [None]:
# generate aggregate data by spatial grouping from variable mapping 
def map_numerical(df, column):
    """
    Returns a dataframe of a numeric variable mean aggregated by spatial grouping
    """
    out = df.groupby(grouping_var)[column].mean().to_frame()
    col_selected = column.split('#')
    new_cols = [col_selected[0] + '_mean#' + col_selected[1]]
    out.columns = new_cols
    out = out.reset_index().fillna(0)
    out = out.rename_axis(None, axis=1)
    return out.round(3)

def map_categorical(df, column):
    """ 
    Returns a dataframe of a categorical variable counts aggregated by spatial grouping
    """
    out = df.groupby(grouping_var)[column].value_counts(normalize=True).unstack()
    col_selected = column.split('#')
    new_cols = [col_selected[0] + '_' + i + '#number' for i in out.columns]
    out.columns = new_cols
    out = out.reset_index().fillna(0)
    out = out.rename_axis(None, axis=1)
    return out.round(3)

try:
    if suave_tag == 'numerical':
        suave_out = map_numerical(df, selected) # mapping for numerical variables
    else:
        suave_out = map_categorical(df, selected) # mapping for categorical (radio/checkbox) variables
except:
    print("Incorrect SuAVE variable type selected. Restart the kernel, " +
          "reselect the SuAVE variable type, and rerun all cells.")
    raise
suave_out = suave_out.replace(0, '') # prevent displaying aggregations with no data
suave_out.head()

In [None]:
# add geometry data to output
geo_df = geo_df[cols]
geo_df[merge_col] = geo_df[merge_col].astype(str)

# set merge column data type to numeric if its a numeric variable
num_numeric = geo_df[merge_col].str.isnumeric().sum()
if num_numeric == geo_df.shape[0]:
    geo_df[merge_col] = geo_df[merge_col].astype(float)
    
# merge geometry data to input dataframe
suave_geo = suave_out.merge(geo_df, left_on=grouping_var, right_on=merge_col)
suave_geo = suave_geo.drop(columns = grouping_var)
suave_geo = suave_geo.rename(columns={geometry_col: 'geometry#hiddenmore'})

In [None]:
# display output survey dataframe
shared_entries = len(set(suave_out[grouping_var]).intersection(set(geo_df[merge_col])))
different_entries = set(suave_out[grouping_var]).difference(set(suave_geo[merge_col]))

if shared_entries == 0 and suave_geo.shape[0] == 0:
    raise Exception('Empty dataframe. No shared entries found to merge survey and mapping file. ' +
                    'Restart the kernel and ensure both the merging column in the mapping file ' +
                    'and the spatial grouping variable in the original survey are cleaned and have ' +
                    'shared entries to merge.')
elif different_entries != set():
    printmd("<b><span style='color:red'>USER WARNING: The following spatial entities could not be " +
            "matched " + str(different_entries) + ". Ensure these entries are found in the merging " +
            "column in the geometry mapping file.</span></b>")

suave_geo.head()

## 6. Save the new version of CSV file, and give a name to new survey

In [None]:
new_file = suaveint.save_csv_file(suave_geo, absolutePath, csv_file)

In [None]:
# Input survey name

from IPython.display import display
input_text = widgets.Text(placeholder='Enter Survey Name...')
output_text = widgets.Text()

def bind_input_to_output(sender):
    output_text.value = input_text.value

# Tell the text input widget to call bind_input_to_output() on submit
input_text.on_submit(bind_input_to_output)

printmd("<b><span style='color:red'>Input survey name here, press Enter, and then run the next cell:</span></b>")
# Display input text box widget for input
display(input_text)

display(output_text)

In [None]:
# Print survey name
survey_name = output_text.value
printmd("<b><span style='color:red'>Survey Name is: </span></b>" + survey_name)

## 7. Generate the survey and create survey URL

In [None]:
suaveint.create_survey(survey_url,new_file, survey_name, dzc_file, user, csv_file, view, views)