# Converting a #multi variable into a series of binary variables

The binary variables will be named as values in the original #multi variables.
The values of the generated binary variableswill be Nan (for not selected) and "Selected" or similar.


In [None]:
import numpy as np
import pandas as pd
import panel as pn

pn.extension()

In [None]:
# reading a data file. Replace with survey file retrieved from SuAVE 

df = pd.read_csv('test_2multi.csv')

In [None]:
df.head()

In [None]:
# this function will create a set of binary variables given one #multi variable

def binarize(df, column, true_value=1, false_value=0, separator='|'):
    """
    return dataframe with additional new columns that binarize the values of this column
    parameters:
        * df: original dataframe
        * column: name of column that has multiple values per cell
        * separator: how the values are separated in the original column, e.g. by commas
        * true_value: what value dummy variables will take if present, e.g. 1 or 'Yes'
        * false_value: what value dummy variables will take if absent, e.g. 0 or 'No' or np.nan
    returns merged dataframe with new columns
    """
    dummies = df[column].str.get_dummies(sep=separator)
    for col in dummies.columns:
        dummies[col] = dummies[col].map({1: true_value, 0: false_value})
        print("Creating binary variable : '"+ col + "' from multiple-response variable : '" + column +"'")

    merged = pd.merge(df, dummies, left_index=True, right_index=True, how='inner')
    assert len(merged) == len(df) == len(dummies)
    return merged

# this function calls binarize for all defined #multi variables

def binarize_list(df, multi_columns, true_value=1, false_value=0, separator='|'):
    for col in multi_columns:
        df = binarize(df, col, true_value=true_value, false_value=false_value, separator=separator)
    return df

# this function deletes the #multi variables after processing

def delete_multies(df, list_columns):
    for col in list_columns:
        df = df.drop(col, axis=1)
    return df


In [None]:
# Here we specify the input: a list of #multi variables to process
# Eventually, they will be selected via some widget

multi_cols = ['Role#multi', 'Participated in apps#multi','Resources_created#multi' ]


In [None]:
# Multi column selector
left_text = pn.Row("####Select #multi Variables to Process", margin=(0,0,-15,210))
multi_options = list(df.columns[['#multi' in col for col in df.columns]])
multi_select = pn.widgets.CrossSelector(height = 130, options=multi_options)

multi_cols = []
def save_multi(click):
    global multi_cols
    multi_cols = multi_select.value
    
# Process button
process_button = pn.widgets.Button(name='End Selection', width=200, margin=(10,0,10,240))
process_button.param.watch(save_multi, ['clicks'])

# Display widgets
full_display = pn.Column(left_text, multi_select, process_button, css_classes=['widget-box'])
full_display

In [None]:
# run to create a new df with added binary variables

df_new = binarize_list(df, 
         multi_cols,
         true_value='Selected',
         false_value=np.nan,
         separator='|')


In [None]:
# delete the original #multi vars if needed
df_new = delete_multies(df_new,multi_cols)

In [None]:
df_new.head()

In [None]:
def slider(df):
    """
    slider creates an interactive display of a
    data frame.
    
    :param df: data frame
    :returns: interactive dataframe
    """
    
    # Row Selector widget
    row_selection = pn.widgets.IntSlider(name='Navigate Rows', width=350, 
                                         margin=(0,50,-15,0), end=len(df)-1)

    # Column Selector widget
    col_selection = pn.widgets.IntSlider(name='Navigate Columns', width=350, 
                                         margin=(0,0,5,0), end=len(df.columns))
    
    @pn.depends(row_selection.param.value, col_selection.param.value)
    def navigate_data(row=0, col=0):
        return df.iloc[row:row+5, col:col+10]
    
    sliders = pn.Row(row_selection, col_selection, margin=(0,0,0,10))
    full_widget = pn.Column(sliders, navigate_data)
    return full_widget

slider(df_new)

In [None]:
# save as new file (eventually, upload to SuAVE)
df_new.to_csv('test_2binary.csv', index=None)