# SuAVE Simple Variable Calculations
### This sample notebook will read numeric variables from a survey dataset and let users compute a derivative numeric variable and add it to new survey version

In [182]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

### 1. Retrieve survey parameters from the URL

In [1]:
%%javascript
function getQueryStringValue (key)
{  
    return unescape(window.location.search.replace(new RegExp("^(?:.*[&\\?]" + escape(key).replace(/[\.\+\*]/g, "\\$&") + "(?:\\=([^&]*))?)?.*$", "i"), "$1"));
}
IPython.notebook.kernel.execute("survey_url='".concat(getQueryStringValue("surveyurl")).concat("'"));
IPython.notebook.kernel.execute("views='".concat(getQueryStringValue("views")).concat("'"));
IPython.notebook.kernel.execute("view='".concat(getQueryStringValue("view")).concat("'"));
IPython.notebook.kernel.execute("user='".concat(getQueryStringValue("user")).concat("'"));
IPython.notebook.kernel.execute("csv_file='".concat(getQueryStringValue("csv")).concat("'")); 
IPython.notebook.kernel.execute("dzc_file='".concat(getQueryStringValue("dzc")).concat("'")); 
IPython.notebook.kernel.execute("params='".concat(getQueryStringValue("params")).concat("'")); 
IPython.notebook.kernel.execute("active_object='".concat(getQueryStringValue("activeobject")).concat("'")); 
IPython.notebook.kernel.execute("full_notebook_url='" + window.location + "'"); 

<IPython.core.display.Javascript object>

### 2. Read the survey file and extract numeric variables

In [2]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import pandas as pd


absolutePath = "../../temp_csvs/"

# read the csv file
file = open(absolutePath + csv_file, encoding="latin-1")
print(absolutePath + csv_file)
df = pd.read_csv(file)

# create a list of variable names
variables_df = pd.DataFrame({'varname':df.columns})
print(variables_df.varname.values)

# create a dictionary of #number variables with abbreviated and full variable names 
var_list = {n[:n.index('#')]:n for n in variables_df.varname.values if '#number' in n}
print(var_list)


../../temp_csvs/zaslavsk_Facebook_Ads_from_IRA__Russia.csv
['Ad Text#long' 'Ad ID#long' '#name' 'Ad Creation Date#date'
 'Ad End Date#date#hidden' 'Ad Impressions#number'
 'Impressions: Categories' 'Ad Clicks#number' 'Clicks: Categories'
 'Ad Landing Page#hidden' 'Posted by' 'Ad Spend, RUB#number'
 'Spend Rub, Categories' 'Ad Spend, USD#number' 'Target Interests#multi'
 'Target people who like#multi' 'Target Language#multi'
 'Target Location#multi' 'Target Age' 'Target Gender'
 'Target Multicultural Afinity#multi' 'Target Ad Platform#multi'
 'Target Custom Audience#multi' 'Ad Custom Includes#hidden'
 'Exclude: Multicultural Affinity#multi' 'Excluded Connections'
 'Exclude Location#multi' 'Ad Placement#multi' 'Reactions to Ad#number'
 'Reactions: Categories' 'Comments about Ad#number' 'Comments: Categories'
 'Shares of Ad#number' 'Shares: Categories' 'File name with Ad#hidden'
 'Year and month' '#img' '#href' 'Ad Image Status']
{'Reactions to Ad': 'Reactions to Ad#number', 'Ad Clicks': 

### 3. Define a new variable using the form ##

In [3]:
# Define a new Variable
a = widgets.Dropdown(options=var_list.keys())
b = widgets.Dropdown(options=['/','*'])
c = widgets.Dropdown(options=var_list.keys())
ui = widgets.VBox([a, b, c])
def f(a, b, c):
    return ((a, b, c))

formula = widgets.interactive_output(f, {'a': a, 'b': b, 'c': c})

display(ui, formula)

print("Select variables and the operation, then run the next cell")

VBox(children=(Dropdown(options=('Reactions to Ad', 'Ad Clicks', 'Ad Spend, USD', 'Comments about Ad', 'Shares…

Output()

Select variables and the operation, then run the next cell


### 4. Edit variable name as needed

In [4]:
# Give a Name to the New Variable
def f(Var_Name):
    return Var_Name
newvar = interact(f, Var_Name=a.value + b.value + c.value +'#number');
print("After defining variable name hit Enter, then run the next cell")

interactive(children=(Text(value='Ad Clicks/Ad Impressions#number', description='Var_Name'), Output()), _dom_c…

After defining variable name hit Enter, then run the next cell


### 5. Compute the new variable and format it for SuAVE

In [5]:
#Open the selected notebook
# In [73]:
# Compute the new variable

if b.value == '/':
    df[newvar.widget.result] = df[var_list[a.value]] / df[var_list[c.value]]
elif b.value == '*':
    df[newvar.widget.result] = df[var_list[a.value]] * df[var_list[c.value]]
elif b.value == '+':
    df[newvar.widget.result] = df[var_list[a.value]] + df[var_list[c.value]]
elif b.value == '-':
    df[newvar.widget.result] = df[var_list[a.value]] - df[var_list[c.value]]
    
# make sure there are no illegal NaN type values in this #number variable
df[newvar.widget.result].fillna('',inplace=True)
df[newvar.widget.result] = pd.to_numeric(df[newvar.widget.result], errors='coerce', downcast='float')
df[newvar.widget.result] = df[newvar.widget.result].apply(lambda x: '{:.6f}'.format(x))
df.replace(['None', 'nan'], pd.np.nan, inplace=True)

### 6. Save the new version of CSV file, and give a name to new survey

In [6]:
# new filename
new_file = absolutePath + csv_file[:-4]+'_v1.csv'
print(new_file)
df.to_csv(new_file, index=None)

../../temp_csvs/zaslavsk_Facebook_Ads_from_IRA__Russia_v1.csv


In [7]:
#Input survey name

from IPython.display import display
input_text = widgets.Text()
output_text = widgets.Text()

def bind_input_to_output(sender):
    output_text.value = input_text.value

# Tell the text input widget to call bind_input_to_output() on submit
input_text.on_submit(bind_input_to_output)

print("Input survey name here, then press Enter:")
# Display input text box widget for input
display(input_text)

display(output_text)
print('After setting new survey name, run the next cell')

Input survey name here, then press Enter:


Text(value='')

Text(value='')

After setting new survey name, run the next cell


In [8]:
#Print survey name
survey_name = output_text.value
print("Survey name is:", survey_name)

Survey name is: fb777


### 7. Generate the survey and create survey URL

In [12]:
#Parse url
upload_url = survey_url.split("/main")[0]

if "https" in upload_url:
    upload_url = upload_url.replace("s","",1)
    upload_url = upload_url + ":3001/uploadCSV"    
else:
    upload_url = upload_url + "/uploadCSV"

    
    
new_survey_url_base = survey_url.split(user)[0]

In [15]:
import requests
upload_data = {'name': input_text.value, 'dzc': dzc_file, 'user':user}
files = {"file": open(new_file, "rb")}
r = requests.post(upload_url, files=files, data=upload_data)
print(r.status_code, r.reason)

regex = re.compile('[^0-9a-zA-Z_]')
survey_url = survey_name
survey_url =  regex.sub('_', survey_url)

url = new_survey_url_base + user + "_" + input_text.value + ".csv" + "&views=" + views + "&view=" + view
print(url)
print ("Click the URL to open the new survey")


ConnectionError: HTTPConnectionPool(host='suave-dev.sdsc.edu', port=3001): Max retries exceeded with url: /uploadCSV (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7ff239b3f278>: Failed to establish a new connection: [Errno 111] Connection refused',))

In [227]:
upload_url

'http://suave-dev.sdsc.edu:3001/uploadCSV'