<h1><span style="color:red">Converting a #multi variable into a series of binary variables</span></h1>

Using this notbook, you can select a multiple-response variable and convert it into a series of binary variables, with values "Selected" and empty, possibly dropping the initial binary variables. You will have an option to process a survey file received from the current SuAVE application, or import a local CSV file. Then the notebook will let you create a new SuAVE survey with the updated survey file. 

The binary variables will be named as values in the original #multi variables.


## 1. Retrieve survey parameters from the URL

In [None]:
%%javascript
function getQueryStringValue (key)
{  
    return unescape(window.location.search.replace(new RegExp("^(?:.*[&\\?]" + escape(key).replace(/[\.\+\*]/g, "\\$&") + "(?:\\=([^&]*))?)?.*$", "i"), "$1"));
}
IPython.notebook.kernel.execute("survey_url='".concat(getQueryStringValue("surveyurl")).concat("'"));
IPython.notebook.kernel.execute("views='".concat(getQueryStringValue("views")).concat("'"));
IPython.notebook.kernel.execute("view='".concat(getQueryStringValue("view")).concat("'"));
IPython.notebook.kernel.execute("user='".concat(getQueryStringValue("user")).concat("'"));
IPython.notebook.kernel.execute("csv_file='".concat(getQueryStringValue("csv")).concat("'")); 
IPython.notebook.kernel.execute("dzc_file='".concat(getQueryStringValue("dzc")).concat("'")); 
IPython.notebook.kernel.execute("params='".concat(getQueryStringValue("params")).concat("'")); 
IPython.notebook.kernel.execute("active_object='".concat(getQueryStringValue("activeobject")).concat("'")); 
IPython.notebook.kernel.execute("full_notebook_url='" + window.location + "'"); 

## 2. Import libraries

In [None]:
# common imports
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import Markdown, display

import pandas as pd
pd.set_option('display.max_colwidth', 0)
    
import numpy as np
import panel as pn

pn.extension()
def printmd(string):
    display(Markdown(string))

absolutePath = "../../temp_csvs/"

# local imports
import sys
sys.path.insert(1, '../../helpers')
import panel_libs as panellibs
import suave_integration as suaveint

url_partitioned = full_notebook_url.partition('/operations')
base_url = url_partitioned[0];

In [None]:
# this function will create a set of binary variables given one #multi variable

def binarize(df, column, true_value=1, false_value=0, separator='|'):
    """
    return dataframe with additional new columns that binarize the values of this column
    parameters:
        * df: original dataframe
        * column: name of column that has multiple values per cell
        * separator: how the values are separated in the original column, e.g. by commas
        * true_value: what value dummy variables will take if present, e.g. 1 or 'Yes'
        * false_value: what value dummy variables will take if absent, e.g. 0 or 'No' or np.nan
    returns merged dataframe with new columns
    """
    dummies = df[column].str.get_dummies(sep=separator)
    for col in dummies.columns:
        dummies[col] = dummies[col].map({1: true_value, 0: false_value})
        print("Creating binary variable : '"+ col + "' from multiple-response variable : '" + column +"'")

    merged = pd.merge(df, dummies, left_index=True, right_index=True, how='inner')
    assert len(merged) == len(df) == len(dummies)
    return merged

# this function calls binarize for all defined #multi variables

def binarize_list(df, multi_columns, true_value=1, false_value=0, separator='|'):
    for col in multi_columns:
        df = binarize(df, col, true_value=true_value, false_value=false_value, separator=separator)
    return df

# this function deletes the #multi variables after processing

def delete_multies(df, list_columns):
    for col in list_columns:
        df = df.drop(col, axis=1)
    return df


## 3. Select a survey file from SuAVE or import a local CSV file

In [None]:
data_select = pn.widgets.RadioBoxGroup(name='Select notebook', options=['Load survey file from SuAVE', 
                                                                        'Import a local CSV file'], 
                                       inline=False)
data_select

In [None]:
data_input = pn.widgets.FileInput()
    
def check_selection():
    if data_select.value == 'Load survey file from SuAVE':
        global fname
        fname = absolutePath + csv_file
        printmd("<b><span style='color:red; font-size: 200%;'>Current SuAVE survey will be loaded. Continue to step 4.</span></b>")

    else:
        message = pn.pane.HTML("<b><span style='color:red; font-size: 200%;'>Upload data and continue to step 4.</span><br><span style='font-size: 150%;'>IMPORTANT: The local CSV file should not have SuAVE-specific variable names!</span></b>", width=700)
        return pn.Column(message, data_input)
    
check_selection()

## 4. Visualize the data and select #multi variables to process

In [None]:
if not pd.isnull(data_input.filename):
    fname = absolutePath + data_input.filename
    data_input.save(fname)

# df = extract_data(fname).fillna('')
df = panellibs.extract_data(fname)

panellibs.slider(df)

In [None]:
# Multi column selector
left_text = pn.Row("####Select #multi Variables to Process", margin=(0,0,-15,210))
multi_options = list(df.columns[['#multi' in col for col in df.columns]])
multi_select = pn.widgets.CrossSelector(height = 130, options=multi_options)

multi_cols = []
def save_multi(click):
    global multi_cols
    multi_cols = multi_select.value
    
# Process button
process_button = pn.widgets.Button(name='End Selection', button_type='primary', width=200, margin=(80,0,10,240))
process_button.param.watch(save_multi, ['clicks'])

# Display widgets
full_display = pn.Column(left_text, pn.Row(multi_select), pn.Row(process_button, width=200), css_classes=['widget-box']).servable()
full_display

## 5. Generate binary variables from #multi, and optionally remove the #multi

In [None]:
# run to create a new df with added binary variables

df_new = binarize_list(df, 
         multi_cols,
         true_value='Selected',
         false_value=np.nan,
         separator='|')

In [None]:
# delete the original #multi vars if needed
df_new = delete_multies(df_new,multi_cols)
df = df_new.copy().fillna('')

In [None]:
panellibs.slider(df)

## 6. Generate a new survey and open it in SuAVE

In [None]:
if data_select.value == 'Import a local CSV file':
    csv_file = data_input.filename
    dzc_file = ''
    
new_file = suaveint.save_csv_file(updated_df, absolutePath, csv_file)

In [None]:
#Input survey name

import ipywidgets as widgets
from IPython.display import display

input_text = widgets.Text(placeholder='Enter Survey Name...')
output_text = widgets.Text()

def bind_input_to_output(sender):
    output_text.value = input_text.value

# Tell the text input widget to call bind_input_to_output() on submit
input_text.on_submit(bind_input_to_output)

printmd("<b><span style='color:red'>Input survey name here, press Enter, and then run the next cell:</span></b>")
# Display input text box widget for input
display(input_text)

display(output_text)

In [None]:
#Print survey name
survey_name = output_text.value
printmd("<b><span style='color:red'>Survey Name is: </span></b>" + survey_name)

In [None]:
suaveint.create_survey(survey_url,new_file, survey_name, dzc_file, user, csv_file, view, views, data_select.value)