<h1><span style="color:red">Converting a series of binary variables to a single #multi variable</span></h1>

Using this notbook, you can define groups of binary variables and organize each group into a multiple-response variables, possibly dropping the initial binary variables. You will have an option to process a survey file received from the current SuAVE application, or import a local CSV file. Then the notebook will let you create a new SuAVE survey with the updated survey file. 



## 1. Retrieve survey parameters from the URL

In [1]:
%%javascript
function getQueryStringValue (key)
{  
    return unescape(window.location.search.replace(new RegExp("^(?:.*[&\\?]" + escape(key).replace(/[\.\+\*]/g, "\\$&") + "(?:\\=([^&]*))?)?.*$", "i"), "$1"));
}
IPython.notebook.kernel.execute("survey_url='".concat(getQueryStringValue("surveyurl")).concat("'"));
IPython.notebook.kernel.execute("views='".concat(getQueryStringValue("views")).concat("'"));
IPython.notebook.kernel.execute("view='".concat(getQueryStringValue("view")).concat("'"));
IPython.notebook.kernel.execute("user='".concat(getQueryStringValue("user")).concat("'"));
IPython.notebook.kernel.execute("csv_file='".concat(getQueryStringValue("csv")).concat("'")); 
IPython.notebook.kernel.execute("dzc_file='".concat(getQueryStringValue("dzc")).concat("'")); 
IPython.notebook.kernel.execute("params='".concat(getQueryStringValue("params")).concat("'")); 
IPython.notebook.kernel.execute("active_object='".concat(getQueryStringValue("activeobject")).concat("'")); 
IPython.notebook.kernel.execute("full_notebook_url='" + window.location + "'"); 

<IPython.core.display.Javascript object>

## 2. Import libraries

In [None]:
# common imports
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import Markdown, display

import pandas as pd
pd.set_option('display.max_colwidth', 0)
    
import numpy as np
import panel as pn

pn.extension()
def printmd(string):
    display(Markdown(string))

absolutePath = "../../temp_csvs/"

# local imports
import sys
sys.path.insert(1, '../../helpers')
import panel_libs as panellibs
import suave_integration as suaveint

url_partitioned = full_notebook_url.partition('/operations')
base_url = url_partitioned[0];

In [None]:
# this function organizes all values in a row into a #multi variable
# the options are: 
# a) the binary variables have specific non-empty conditions specified in condition_values
# b) the binary variables have a non-empty value that needs to be included; the other value (such as "not selected") is empty (NaN)

def summarize_row(row, condition_values=[True], any_not_nan=False, separator='|'):
    if any_not_nan:
        matching_columns = [col_name for col_name, val in row.iteritems() if not pd.isnull(val)]
    else:
        matching_columns = [col_name for col_name, val in row.iteritems() if val in condition_values]
    return separator.join(matching_columns)


# this function calls summarize_row for a selected set of binary variables

def unbinarize(df, columns, condition_values=[True], any_not_nan=False, separator='|'):
    """
    for every row: concatenate values that match condition_value with [separator]
    parameters:
        * df: dataframe
        * columns : the ones you want to unbinarize. 
        * condition_value: the value that these binary columns take when they are selected. e.g. True or 1
        * separator: the separator you want to use in the resulting list column
        * drop: remove columns before returning dataframe
    returns new column (pandas series) and original dataframe, perhaps with columns removed
    """
    return df[columns].apply(lambda row: summarize_row(row, condition_values, any_not_nan, separator), axis=1)

# this function calls unbinarize for all defined mappings

def unbinarize_mapping(df, map_columns, condition_values=[True], any_not_nan=False, separator='|'):
#     print(map_columns)
    for new_col, dummy_cols in map_columns.items():
#         print(new_col + ", " + str(dummy_cols) + ", " + ' OR '.join(condition_values) + ", " + separator)
        df[new_col] = unbinarize(
            df, dummy_cols, condition_values=condition_values, any_not_nan=any_not_nan, separator=separator
        )
    return df

# this function deletes variables that have been integrated into #multi

def delete_dummies(df, map_columns):
    for new_col, dummy_cols in map_columns.items():
        df = df.drop(dummy_cols, axis=1)
    return df

## 3. Select a survey file from SuAVE or import a local CSV file

In [None]:
data_select = pn.widgets.RadioBoxGroup(name='Select notebook', options=['Load survey file from SuAVE', 
                                                                        'Import a local CSV file'], 
                                       inline=False)
data_select

In [None]:
data_input = pn.widgets.FileInput()
    
def check_selection():
    if data_select.value == 'Load survey file from SuAVE':
        global fname
        fname = absolutePath + csv_file
        printmd("<b><span style='color:red; font-size: 200%;'>Current SuAVE survey will be loaded. Continue to step 4.</span></b>")

    else:
        message = pn.pane.HTML("<b><span style='color:red; font-size: 200%;'>Upload data and continue to step 4.</span><br><span style='font-size: 150%;'>IMPORTANT: The local CSV file should not have SuAVE-specific variable names!</span></b>", width=700)
        return pn.Column(message, data_input)
    
check_selection()

## 4. Visualize the data and select binary variables

In [None]:
if not pd.isnull(data_input.filename):
    fname = absolutePath + data_input.filename
    data_input.save(fname)

# df = extract_data(fname).fillna('')
df = panellibs.extract_data(fname)

panellibs.slider(df)

In [None]:
# Left panel
left_text = pn.Row("####Select Binary Variables", margin=(0,0,-15,270))
binary_selector = pn.widgets.CrossSelector(options=list(df.columns), width=630)
left_panel = pn.Column(left_text, binary_selector, css_classes=['widget-box'], margin=(0,30,0,0))

# Right panel
condition_text = pn.Row("####Select Condition", margin=(0,0,-20, 40))
condition = pn.widgets.Select(width=200, margin=(11,10,10,10))
condition_select = pn.Column(condition_text, condition, margin=(5,0,0,0))

multi_text = pn.Row("####Specify #multi Name", margin=(0,0,-20, 25))
multi_name = pn.widgets.TextInput(placeholder='Enter Name', width=200, margin=(11,10,10,10))
name_input = pn.Column(multi_text, multi_name, margin=(-5,0,0,0))
right_panel = pn.Column(condition_select, name_input, css_classes=['widget-box'], width=225, height=150)

apply_button = pn.widgets.Toggle(name='Create Mapping', button_type='primary',  margin=(30,0,10,10), width=200)

# Remappings display
remap_text = pn.pane.Markdown('#### Remappings ', width=950)

remappings = {}
condition_values = []
@pn.depends(binary_selector.param.value, apply_button.param.value)
def remap(b_var, apply):
    apply_button.value = False
    
    if (b_var != []):
        
        # Determines possible conditions based on variables selected
        possible_conditions = pd.unique(df[b_var].values.ravel('K'))
        condition.options = ['Not Empty'] + [value for value in possible_conditions if not pd.isnull(value)]
        if (multi_name.value != '') and apply:
            
            # Creates mapping
            new_multi = multi_name.value + '#multi'
            remappings[new_multi] = b_var
            condition_values.append(condition.value)
            
            # Refreshes selected binary values and name
            binary_selector.value = []
            multi_name.value = ''
            
            # Updates remapping display
            new_mapping = '- **'+ str(new_multi) + '**' +' &rarr; '+ str(remappings[new_multi])
            remap_text.object = remap_text.object + '\n' + new_mapping
            
            return remap_text
        
    return remap_text

# Display widgets
widgets = pn.Row(left_panel, pn.Column(right_panel, apply_button))
full_display = pn.Column(widgets, remap)
full_display

## 5a. Generate #multi from the defined groups of binary variables, and drop the binary variables (if specific values are selected)

In [None]:
# an example call for the case a) described above.
# remappings and condition_values are the two key inputs

df_new = df.pipe(
    unbinarize_mapping, remappings,condition_values=condition_values, separator='|'\
).pipe(delete_dummies, remappings)

df_new.head(10)

In [None]:
panellibs.slider(df_new)

## 5b. Generate #multi from the defined groups of binary variables, and drop the binary variables (using any non-NaN values)

In [None]:
# an example call for case b) described above.
# remappings and any_not_nan=True are the two key inputs

df_new2 = df.pipe(
    unbinarize_mapping, remappings, any_not_nan=True, separator='|'\
).pipe(delete_dummies, remappings)

df_new2.head(10)

In [None]:
panellibs.slider(df_new2)

In [None]:
# now write this back, or upload to SuAVE.
df = df_new2.copy().fillna('')


## 6. Generate a new survey and open it in SuAVE

In [None]:
if data_select.value == 'Import a local CSV file':
    csv_file = data_input.filename
    dzc_file = ''
    
new_file = suaveint.save_csv_file(updated_df, absolutePath, csv_file)

In [None]:
#Input survey name

import ipywidgets as widgets
from IPython.display import display

input_text = widgets.Text(placeholder='Enter Survey Name...')
output_text = widgets.Text()

def bind_input_to_output(sender):
    output_text.value = input_text.value

# Tell the text input widget to call bind_input_to_output() on submit
input_text.on_submit(bind_input_to_output)

printmd("<b><span style='color:red'>Input survey name here, press Enter, and then run the next cell:</span></b>")
# Display input text box widget for input
display(input_text)

display(output_text)

In [None]:
#Print survey name
survey_name = output_text.value
printmd("<b><span style='color:red'>Survey Name is: </span></b>" + survey_name)

In [None]:
suaveint.create_survey(survey_url,new_file, survey_name, dzc_file, user, csv_file, view, views, data_select.value)