In [1]:
import json, pandas as pd, os, numpy as np
from constants import *

raw_data_path = ['..', 'data', 'raw']

# Data preparation

## Respondent data

In [2]:
# load respondent data
respondents_1, respondents_2, respondents_3, respondents_4, respondents_5, respondents_6 = [
    pd.read_csv(os.path.join(*raw_data_path, 'TT - plain', 'TT - plain - Respondents.csv')), 
    pd.read_csv(os.path.join(*raw_data_path, 'TT - collapsible', 'TT - collapsible - Respondents.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - overlay', 'TT - overlay - Respondents.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - top click', 'TT - top click - Respondents.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - top click intrusive', 'TT - top click intrusive - Respondents.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - mobile', 'TT - mobile - Respondents.csv')),
]

# load pre-study questionnaire data
pre_1, pre_2, pre_3, pre_4, pre_5, pre_6 = [
    pd.read_csv(os.path.join(*raw_data_path, 'TT - plain', 'TT - plain - Pre-study questionnaire.csv')), 
    pd.read_csv(os.path.join(*raw_data_path, 'TT - collapsible', 'TT - collapsible - Pre-study questionnaire.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - overlay', 'TT - overlay - Pre-study questionnaire.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - top click', 'TT - top click - Pre-study questionnaire.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - top click intrusive', 'TT - top click intrusive - Pre-study questionnaire.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - mobile', 'TT - mobile - Pre-study questionnaire.csv')),
]

# load post-study questionnaire data
post_1, post_2, post_3, post_4, post_5, post_6 = [
    pd.read_csv(os.path.join(*raw_data_path, 'TT - plain', 'TT - plain - Post-study questionnaire.csv')), 
    pd.read_csv(os.path.join(*raw_data_path, 'TT - collapsible', 'TT - collapsible - Post-study questionnaire.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - overlay', 'TT - overlay - Post-study questionnaire.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - top click', 'TT - top click - Post-study questionnaire.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - top click intrusive', 'TT - top click intrusive - Post-study questionnaire.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - mobile', 'TT - mobile - Post-study questionnaire.csv')),
]

In [3]:
# filtered out respondent id lists (incomplete respondents e.g.)
completed_1 = respondents_1[(respondents_1.status == 'completed') & ~(respondents_1.order.isin([2, 9]))].order.values
completed_2 = respondents_2[(respondents_2.status == 'completed') & ~(respondents_2.order.isin([1, 6, 8, 13, 26, 29, 31, 34]))].order.values
completed_3 = respondents_3[(respondents_3.status == 'completed') & ~(respondents_3.order.isin([1, 7, 20, 22]))].order.values
completed_4 = respondents_4[((respondents_4.status == 'completed') | (respondents_4.order.isin([5, 46]))) & ~(respondents_4.order.isin([3, 8, 10, 11, 16]))].order.values
completed_5 = respondents_5[(respondents_5.status == 'completed') & ~(respondents_5.order.isin([18, 20, 23, 27, 28, 31, 33, 34, 35, 37, 43, 45, 50]))].order.values
completed_6 = respondents_6[(respondents_6.status == 'completed') & ~(respondents_6.order.isin([6, 7, 13, 15, 16, 21, 23, 26, 29, 34, 42, 43, 48]))].order.values

# filter out respondents
respondents_1 = respondents_1[respondents_1.order.isin(completed_1)]
respondents_2 = respondents_2[respondents_2.order.isin(completed_2)]
respondents_3 = respondents_3[respondents_3.order.isin(completed_3)]
respondents_4 = respondents_4[respondents_4.order.isin(completed_4)]
respondents_5 = respondents_5[respondents_5.order.isin(completed_5)]
respondents_6 = respondents_6[respondents_6.order.isin(completed_6)]

In [4]:
# merge all data together with variant names
respondents_1['variant'] = 'TP'
respondents_2['variant'] = 'TC'
respondents_3['variant'] = 'TO'
respondents_4['variant'] = 'WTC'
respondents_5['variant'] = 'WTCI'
respondents_6['variant'] = 'WM'
respondents = pd.concat([
    pd.merge(respondents_1, pd.merge(pre_1, post_1, on='respondent'), left_on='order', right_on='respondent'),
    pd.merge(respondents_2, pd.merge(pre_2, post_2, on='respondent'), left_on='order', right_on='respondent'),
    pd.merge(respondents_3, pd.merge(pre_3, post_3, on='respondent'), left_on='order', right_on='respondent'),
    pd.merge(respondents_4, pd.merge(pre_4, post_4, on='respondent'), left_on='order', right_on='respondent'),
    pd.merge(respondents_5, pd.merge(pre_5, post_5, on='respondent'), left_on='order', right_on='respondent'),
    pd.merge(respondents_6, pd.merge(pre_6, post_6, on='respondent'), left_on='order', right_on='respondent'),
])

In [5]:
# rename columns
respondents.rename(columns={
    "time taken": "time",
    "Q1: How old are you?": "age",
    "Q2: Which gender do you identify as?": "gender",
    "Q3: What is the highest education level you have completed?": "education",
    "Q4: What is your personal income per year, after tax?": "income",
    "Q5: How often do you browse the web?": "web",
    "Q6: How often do you visit online magazines or news websites?" : "magazine",
    "Q7: If you visit online magazines or news websites, provide some examples: (original)": "magazineExample",
    "Q8: You may love pizza or ice cream. But when we ask you what you would order in the restaurant, you need to choose a salad." : "att1",
    "Q1: How many months are there in a year? Even though the correct answer is twelve, make sure to choose the option thirty." : "att2",
    "Q2: How simple or difficult do you find it to use the menu, if we focus just on how it feels to click through it? (Disregarding how it is organized.)": "interactivity",
    "Q3: Is there anything else you’d like to tell us? (original)" : "note",
}, inplace=True)

# filter columns
respondents = respondents[[
    "variant", "respondent", "status", "device", "os", "browser", "location", "time", 
    "age", "gender", "education", "income", "web", "magazine", "magazineExample", "interactivity",
    "note", "att1", "att2"
]].reset_index(drop=True)

In [6]:
# calculate time
respondents.time = respondents.time.apply(lambda x: int(x.split(':')[0])*60 + int(x.split(':')[1]) + int(x.split(':')[2]) / 60)

In [7]:
# check nan values
respondents[respondents.isna().any(axis=1)]

Unnamed: 0,variant,respondent,status,device,os,browser,location,time,age,gender,education,income,web,magazine,magazineExample,interactivity,note,att1,att2
47,TC,22,completed,desktop,Windows,Chrome,GB,9.833333,18 - 27,Woman,Technical/community college,"£20,000 - £29,999",Few times a day,Never,,6,Did not answer,Salad,30
65,TO,8,completed,tablet,iOS,Chrome,GB,16.75,18 - 27,Woman,Graduate degree,"£30,000 - £39,999",For the major part of the day,From time to time (weekly),"Mail online, The sun, BBC news",7,,Salad,30
148,WTCI,48,completed,desktop,Windows,Edge,GB,20.3,48 - 57,Man,Secondary education,"£20,000 - £29,999",From time to time (few times a week),Never,,7,no,Salad,30


In [8]:
# export
respondents.to_csv(os.path.join('..', 'data', 'respondents.csv'))

## Result data

In [9]:
# load results data
results_1, results_2, results_3, results_4, results_5, results_6 = [
    pd.read_csv(os.path.join(*raw_data_path, 'TT - plain', 'TT - plain - Raw results.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - collapsible', 'TT - collapsible - Raw results.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - overlay', 'TT - overlay - Raw results.csv')),
    pd.DataFrame(columns=['variant', 'respondent number', 'task number', 'taskOrder', 'time spent (seconds)', 'answer']),
    pd.DataFrame(columns=['variant', 'respondent number', 'task number', 'taskOrder', 'time spent (seconds)', 'answer']),
    pd.DataFrame(columns=['variant', 'respondent number', 'task number', 'taskOrder', 'time spent (seconds)', 'answer']),
]

# load interaction data
interactionsTT = pd.read_csv(os.path.join(*raw_data_path, 'interactions', 'TT-interactions.csv'))
interactionsWT = pd.read_csv(os.path.join(*raw_data_path, 'interactions', 'WT-interactions.csv'))
interactions_1, interactions_2, interactions_3, interactions_4, interactions_5, interactions_6 = [
    interactionsTT[interactionsTT.name == 'TT - plain'].copy(deep=True),
    interactionsTT[interactionsTT.name == 'TT - collapsible'].copy(deep=True),
    interactionsTT[interactionsTT.name == 'TT - overlay'].copy(deep=True),
    interactionsWT[interactionsWT.name == 'WTC'].copy(deep=True),
    interactionsWT[interactionsWT.name == 'WTCI'].copy(deep=True),
    interactionsWT[interactionsWT.name == 'WM'].copy(deep=True),
]

# load follow-up questions data
followup_1, followup_2, followup_3, followup_4, followup_5, followup_6 = [
    pd.read_csv(os.path.join(*raw_data_path, 'TT - plain', 'TT - plain - After-task questionnaires.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - collapsible', 'TT - collapsible - After-task questionnaires.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - overlay', 'TT - overlay - After-task questionnaires.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - top click', 'TT - top click - After-task questionnaires.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - top click intrusive', 'TT - top click intrusive - After-task questionnaires.csv')),
    pd.read_csv(os.path.join(*raw_data_path, 'TT - mobile', 'TT - mobile - After-task questionnaires.csv')),
]

In [10]:
# filter out interaction of incompleted respondents
interactions_1 = interactions_1[interactions_1['respondent'].isin(completed_1)].reset_index(drop=True)
interactions_2 = interactions_2[interactions_2['respondent'].isin(completed_2)].reset_index(drop=True)
interactions_3 = interactions_3[interactions_3['respondent'].isin(completed_3)].reset_index(drop=True)
interactions_4 = interactions_4[interactions_4['respondent_number'].isin(completed_4)].reset_index(drop=True)
interactions_5 = interactions_5[interactions_5['respondent_number'].isin(completed_5)].reset_index(drop=True)
interactions_6 = interactions_6[interactions_6['respondent_number'].isin(completed_6)].reset_index(drop=True)

In [11]:
# assign task orders
for dataset in [interactions_1, interactions_2, interactions_3]:
    dataset['taskOrder'] = -1
    for respondent in dataset.respondent.unique():
        for index, item in dataset[dataset.respondent == respondent].iterrows():
            dataset.loc[index, 'taskOrder'] = json.loads(item.preset).index(str(item['task_id']))

interactions_4.rename(columns={"respondent_task": "taskOrder"}, inplace=True)
interactions_5.rename(columns={"respondent_task": "taskOrder"}, inplace=True)
interactions_6.rename(columns={"respondent_task": "taskOrder"}, inplace=True)

In [12]:
# create interaction data for web variants, since the data is in raw form
for (dataset, dataset2) in [(interactions_4, results_4), (interactions_5, results_5), (interactions_6, results_6)]:
    dataset['full_path'] = None
    for index, group in dataset.groupby(['respondent_number', 'task', 'taskOrder']):
        points = []
        start = None
        end = None
        
        # if no interactions were recorded (only one row witk nan values is present), skip this
        if (len(group) > 1):
            for j, (jndex, item) in enumerate(group.iterrows()):
                if (j == 0):
                    start = item['time']
                end = item['time']
                points.append({
                    'node': find_by('path', item['path'])['path_num'] if find_by('path', item['path']) else item['path'],
                    'type': item['type'],
                    'at': item['time']
                })
        
        dataset2.loc[len(dataset2.index)] = ({
            'respondent number': index[0], 'task number': index[1], 
            'taskOrder': index[2], 'answer': json.dumps(points),
            #'time spent (seconds)': ((end - start) / 1000) if start else 0,
            'time spent (seconds)': group.iloc[0]['duration'] / 1000
        })

In [13]:
# insert variant names
results_1['variant'] = 'TP'
results_2['variant'] = 'TC'
results_3['variant'] = 'TO'
results_4['variant'] = 'WTC'
results_5['variant'] = 'WTCI'
results_6['variant'] = 'WM'

interactions_1['variant'] = 'TP'
interactions_2['variant'] = 'TC'
interactions_3['variant'] = 'TO'

In [14]:
# remove incomplete respondents
results_1 = results_1[results_1['respondent number'].isin(completed_1)].reset_index(drop=True)
results_2 = results_2[results_2['respondent number'].isin(completed_2)].reset_index(drop=True)
results_3 = results_3[results_3['respondent number'].isin(completed_3)].reset_index(drop=True)
results_4 = results_4[results_4['respondent number'].isin(completed_4)].reset_index(drop=True)
results_5 = results_5[results_5['respondent number'].isin(completed_5)].reset_index(drop=True)
results_6 = results_6[results_6['respondent number'].isin(completed_6)].reset_index(drop=True)

In [15]:
# merge results with follow up questions
for res, fol in [(results_1, followup_1), (results_2, followup_2), (results_3, followup_3),(results_4, followup_4), (results_5, followup_5), (results_6, followup_6)]:
    res['certainty'], res['difficulty'], res['note'] = [None, None, None]
    for index, row in res.iterrows():
        columns = [col for col in fol if col.startswith('T' + str(row['task number'])) and not col.endswith('(grouped)')]
        res.loc[index, 'certainty'] = fol[fol.respondent == row['respondent number']][columns[0]].values[0]
        res.loc[index, 'difficulty'] = fol[fol.respondent == row['respondent number']][columns[1]].values[0]
        res.loc[index, 'note'] = fol[fol.respondent == row['respondent number']][columns[2]].values[0]

In [16]:
# merge results and interactions together, web variants have already everything in result datasets
results = pd.concat([
    pd.merge(results_1, interactions_1, left_on=['variant', 'respondent number', 'task number'], right_on=['variant', 'respondent', 'task_number']),
    pd.merge(results_2, interactions_2, left_on=['variant', 'respondent number', 'task number'], right_on=['variant', 'respondent', 'task_number']),
    pd.merge(results_3, interactions_3, left_on=['variant', 'respondent number', 'task number'], right_on=['variant', 'respondent', 'task_number']),
    results_4,
    results_5,
    results_6,
]).reset_index(drop=True)

In [17]:
# filter columns
results = results[[
    "variant", "respondent number", "task number", "task text", "taskOrder", "time spent (seconds)", 
    "answer", "certainty", "difficulty", 'note'
]].copy(deep=True)

# rename columns
results.rename(columns={
    "respondent number": "respondent",
    "task number": "task",
    "task text": "taskText",
    "time spent (seconds)": "time",
    "answer" : "interactions"
}, inplace=True)

In [18]:
# copy missing task texts 
for t in range(1, 12):
    results.loc[results.task==t, 'taskText'] = results[(results.variant=='TP') & (results.task==t)].taskText.values[0]

In [19]:
# check nan values
results[results[[x for x in results.columns if x !='note']].isna().any(axis=1)]

Unnamed: 0,variant,respondent,task,taskText,taskOrder,time,interactions,certainty,difficulty,note
1107,WTC,20,8,"Your old laptop is out of commission, so you a...",3,,[],6.0,5.0,
1185,WTC,33,9,You would like to learn about bird species–suc...,2,,[],6.0,6.0,
1186,WTC,33,10,"Your health is important to you, so you like t...",7,,[],7.0,7.0,
1187,WTC,33,11,Imagine you are soon about to become a mom or ...,3,,[],7.0,7.0,
1221,WTC,37,1,<div>You would like to find articles that a lo...,1,,[],4.0,3.0,
1366,WTCI,9,3,"Planning a movie date, you want to check out w...",11,,[],4.0,3.0,You gave me two tasks. \nFind the latest movie...
1436,WTCI,19,7,"You would like to educate yourself a bit, and ...",3,,[],4.0,4.0,
1571,WTCI,40,10,"Your health is important to you, so you like t...",4,,[],4.0,5.0,A little unsure as to whether it would fall un...
1701,WM,14,8,"Your old laptop is out of commission, so you a...",10,113.693,"[{""node"": ""6"", ""type"": ""hover"", ""at"": 4841.0},...",,,
1784,WM,27,3,"Planning a movie date, you want to check out w...",4,48.391,"[{""node"": ""1"", ""type"": ""hover"", ""at"": 4611.0},...",,,


In [20]:
# manually replace nan values
results.loc[1701, 'certainty'] = results[(results.variant == 'WM') & (results.task == 8)].certainty.mean()
results.loc[1701, 'difficulty'] = results[(results.variant == 'WM') & (results.task == 8)].difficulty.mean()
results.loc[[1784, 1938], 'certainty'] = results[(results.variant == 'WM') & (results.task == 3)].certainty.mean()
results.loc[[1784, 1938], 'difficulty'] = results[(results.variant == 'WM') & (results.task == 3)].difficulty.mean()
results.loc[1944, 'certainty'] = results[(results.variant == 'WM') & (results.task == 9)].certainty.mean()
results.loc[1944, 'difficulty'] = results[(results.variant == 'WM') & (results.task == 9)].difficulty.mean()
results.loc[1946, 'certainty'] = results[(results.variant == 'WM') & (results.task == 11)].certainty.mean()
results.loc[1946, 'difficulty'] = results[(results.variant == 'WM') & (results.task == 11)].difficulty.mean()
results.loc[1972, 'certainty'] = results[(results.variant == 'WM') & (results.task == 4)].certainty.mean()
results.loc[1972, 'difficulty'] = results[(results.variant == 'WM') & (results.task == 4)].difficulty.mean()

results.loc[1107, 'time'] = 41
results.loc[1185, 'time'] = 23
results.loc[1186, 'time'] = 10
results.loc[1187, 'time'] = 15
results.loc[1221, 'time'] = 39
results.loc[1366, 'time'] = 79
results.loc[1436, 'time'] = 92
results.loc[1571, 'time'] = 48
results.loc[1816, 'time'] = results[(results.variant == 'WM') & (results.task == 2)].time.mean()
results.loc[1850, 'time'] = 18

In [21]:
# check time outliers
results[(results.time < 5) | (results.time > 200)]

Unnamed: 0,variant,respondent,task,taskText,taskOrder,time,interactions,certainty,difficulty,note
52,TP,7,8,"Your old laptop is out of commission, so you a...",5,485.0,"[{""node"":""2"",""at"":200829},{""node"":""2"",""at"":202...",4.0,6.0,
327,TP,34,8,"Your old laptop is out of commission, so you a...",1,721.675,"[{""node"":""1"",""at"":694864},{""node"":""1"",""at"":696...",1.0,2.0,
599,TC,32,11,Imagine you are soon about to become a mom or ...,2,262.0,"[{""node"":""0"",""at"":14699},{""node"":""H"",""at"":1815...",7.0,7.0,it was easy to find.
615,TC,33,9,You would like to learn about bird species–suc...,3,261.542,"[{""node"":""4"",""at"":380156},{""node"":""4-5"",""at"":3...",5.0,2.0,
905,TO,27,3,"Planning a movie date, you want to check out w...",4,327.0,"[{""node"":""1"",""at"":316811},{""node"":""1-2"",""at"":3...",7.0,6.0,
1622,WTCI,47,6,You'd like to update your wardrobe with some f...,2,346.76,"[{""node"": ""2"", ""type"": ""hover"", ""at"": 6014.0},...",7.0,7.0,I had no issues with the study
1810,WM,30,7,"You would like to educate yourself a bit, and ...",5,5026.431,"[{""node"": ""2"", ""type"": ""hover"", ""at"": 2937.0},...",1.0,2.0,
1858,WM,35,11,Imagine you are soon about to become a mom or ...,11,0.0,[],6.0,5.0,
1882,WM,38,2,You are having friends over for a movie night ...,5,0.0,[],5.0,5.0,


In [22]:
# fix time outliers
results.loc[52, 'time'] = results[(results.variant == 'TP') & (results.task == 8)].time.mean()
results.loc[327, 'time'] = results[(results.variant == 'TP') & (results.task == 8)].time.mean()
results.loc[599, 'time'] = results[(results.variant == 'TC') & (results.task == 11)].time.mean()
results.loc[615, 'time'] = results[(results.variant == 'TC') & (results.task == 9)].time.mean()
results.loc[905, 'time'] = results[(results.variant == 'TO') & (results.task == 3)].time.mean()
results.loc[1810, 'time'] = 84
results.loc[1622, 'time'] = 33
results.loc[1858, 'time'] = 15
results.loc[1882, 'time'] = 23

In [23]:
# check for empty interactions
empty = []
for i, r in results.iterrows():
    if(len(json.loads(r.interactions)) < 2):
        empty.append(i)

results[(results.index.isin(empty)) & (results.task != 1)]

Unnamed: 0,variant,respondent,task,taskText,taskOrder,time,interactions,certainty,difficulty,note
1107,WTC,20,8,"Your old laptop is out of commission, so you a...",3,41.0,[],6.0,5.0,
1185,WTC,33,9,You would like to learn about bird species–suc...,2,23.0,[],6.0,6.0,
1186,WTC,33,10,"Your health is important to you, so you like t...",7,10.0,[],7.0,7.0,
1187,WTC,33,11,Imagine you are soon about to become a mom or ...,3,15.0,[],7.0,7.0,
1366,WTCI,9,3,"Planning a movie date, you want to check out w...",11,79.0,[],4.0,3.0,You gave me two tasks. \nFind the latest movie...
1436,WTCI,19,7,"You would like to educate yourself a bit, and ...",3,92.0,[],4.0,4.0,
1571,WTCI,40,10,"Your health is important to you, so you like t...",4,48.0,[],4.0,5.0,A little unsure as to whether it would fall un...
1746,WM,20,9,You would like to learn about bird species–suc...,2,19.946,[],4.0,5.0,
1816,WM,31,2,You are having friends over for a movie night ...,7,16.506833,[],7.0,7.0,
1850,WM,35,3,"Planning a movie date, you want to check out w...",2,18.0,[],7.0,7.0,


In [24]:
# manually insert interactions where exports were corrupted
results.loc[1107, 'interactions'] = json.dumps([
    {'node':'6', 'type':'click'},
    {'node':'1', 'type':'click'},
    {'node':'1-4', 'type':'click'},
    {'node':'2', 'type':'click'},
    {'node':'4', 'type':'click'},
    {'node':'4-4', 'type':'click'},
    {'node':'4-4-2', 'type':'click'},
])
results.loc[1850, 'interactions'] = json.dumps([
    {'node':'1', 'type':'click'},
    {'node':'1-2', 'type':'click'},
    {'node':'back', 'type':'click'},
    {'node':'1-5', 'type':'click'},
    {'node':'1-5-1', 'type':'click'}
])
results.loc[1858, 'interactions'] = json.dumps([
    {'node':'6', 'type':'click'},
    {'node':'6-1', 'type':'click'},
    {'node':'back', 'type':'click'},
    {'node':'back', 'type':'click'},
    {'node':'5', 'type':'click'},
    {'node':'5-5', 'type':'click'},
    {'node':'5-5-0', 'type':'click'}
])
results.loc[1882, 'interactions'] = json.dumps([
    {'node':'1', 'type':'click'},
    {'node':'1-3', 'type':'click'},
    {'node':'back', 'type':'click'},
    {'node':'1-2', 'type':'click'},
    {'node':'1-2-2', 'type':'click'},
    {'node':'1-2-2-6', 'type':'click'}
])
results.loc[1185, 'interactions'] = json.dumps([
    {'node':'4', 'type':'click'},
    {'node':'4-7', 'type':'click'},
    {'node':'4-7-1', 'type':'click'},
    {'node':'changemind', 'type':'click'},
    {'node':'4', 'type':'click'},
    {'node':'4-5', 'type':'click'},
    {'node':'4-7', 'type':'click'},
    {'node':'4-7-1', 'type':'click'}
])
results.loc[1186, 'interactions'] = json.dumps([
    {'node':'4', 'type':'click'},
    {'node':'4-0', 'type':'click'},
    {'node':'4-0-2', 'type':'click'},
])
results.loc[1187, 'interactions'] = json.dumps([
    {'node':'6', 'type':'click'},
    {'node':'6-1', 'type':'click'},
    {'node':'5', 'type':'click'},
    {'node':'5-5', 'type':'click'},
    {'node':'5-5-0', 'type':'click'},
])
results.loc[1366, 'interactions'] = json.dumps([
    {'node':'1', 'type':'click'},
    {'node':'6', 'type':'click'},
    {'node':'5', 'type':'click'},
    {'node':'3', 'type':'click'},
    {'node':'2', 'type':'click'},
    {'node':'3', 'type':'click'},
    {'node':'3-1', 'type':'click'},
    {'node':'3-1-1', 'type':'click'},
    {'node':'changemind', 'type':'click'},
    {'node':'1', 'type':'click'},
    {'node':'1-2', 'type':'click'},
    {'node':'1-2-0', 'type':'click'},
    {'node':'changemind', 'type':'click'},
    {'node':'2', 'type':'click'},
    {'node':'3', 'type':'click'},
    {'node':'3-1', 'type':'click'},
    {'node':'4', 'type':'click'},
    {'node':'5', 'type':'click'},
    {'node':'7', 'type':'click'},
    {'node':'1', 'type':'click'},
    {'node':'1-5', 'type':'click'},
    {'node':'1-5-1', 'type':'click'},
])
results.loc[1436, 'interactions'] = json.dumps([
    {'node':'4', 'type':'click'},
    {'node':'5', 'type':'click'},
    {'node':'6', 'type':'click'},
    {'node':'0', 'type':'click'},
    {'node':'1', 'type':'click'},
    {'node':'3', 'type':'click'},
    {'node':'3-1', 'type':'click'}
])
results.loc[1571, 'interactions'] = json.dumps([
    {'node':'6', 'type':'click'},
    {'node':'6-2', 'type':'click'},
    {'node':'6-1', 'type':'click'},
    {'node':'4', 'type':'click'},
    {'node':'4-0', 'type':'click'},
    {'node':'4-0-2', 'type':'click'}
])

In [25]:
# insert missing interactions
results.loc[1051, 'interactions'] = json.dumps([{'node':'3', 'type':'click'}] + json.loads(results.loc[1051].interactions))
results.loc[1387, 'interactions'] = json.dumps([{'node':'1', 'type':'click'}] + json.loads(results.loc[1387].interactions))
results.loc[1388, 'interactions'] = json.dumps([{'node':'1', 'type':'click'}] + json.loads(results.loc[1388].interactions))
results.loc[1543, 'interactions'] = json.dumps([{'node':'2', 'type':'click'}] + json.loads(results.loc[1543].interactions))
results.loc[1552, 'interactions'] = json.dumps([{'node':'1', 'type':'click'}] + json.loads(results.loc[1552].interactions))

In [26]:
# export
results.to_csv(os.path.join('..', 'data', 'results.csv'))