# analysis

## ToDo

- ~~Importing more than one json file~~
- How to group by subjects models and movements
- Model specific confusion rate
- How to manage or represent all this data
- ...

## data structure

Grouping
- subjects
- models
    - type overall
    - individual configuration
- movement
    - type overall
    - with(out) contact

## loading data
json file needs to be normalized

In [502]:
import numpy as np
import pandas as pd
import json, os

In [503]:
path_to_json = '/users/hieu/git/turing_test_walking/data'

json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]
first_file = json_files.pop()
with open(first_file,'r') as f:
    one = json.loads(f.read())
data = pd.json_normalize(one)

#workaround for concatenating unnormalized data
for file in json_files:
    with open(file,'r') as f:
        one = json.loads(f.read())
    load = pd.json_normalize(one)
    data = pd.concat([data,load])

In [504]:
data

Unnamed: 0,rt,stimulus,button_pressed,trial_type,trial_index,time_elapsed,internal_node_id,subject,view_history,key_press,test_part,test_stimulus.position,test_stimulus.left,test_stimulus.right,test_stimulus.att_check,sona
0,973.000,<p>Welcome!<br> Please choose your language to...,1,html-button-response,0,980,0.0-0.0,bt4quvsv7wdsku4,,,,,,,,
1,2206.000,,,instructions,1,3188,0.0-1.0-0.0,bt4quvsv7wdsku4,"[{""page_index"":0,""viewing_time"":515},{""page_in...",,,,,,,
2,785.000,<p>Bereit?</p>,0,html-button-response,2,3976,0.0-3.0,bt4quvsv7wdsku4,,,,,,,,
3,,"<div style=""font-size:60px;"">+</div>",,html-keyboard-response,3,5981,0.0-4.0,bt4quvsv7wdsku4,,,,,,,,
4,,,,training,4,9769,0.0-5.0,bt4quvsv7wdsku4,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42,1274.935,<p>Welche Sequenz haben Sie als natürlicher em...,1,html-button-response,42,245523,0.0-7.0-3.8,onzokuqujwnax3t,,,response,0.0,bvh/return-bottle-training2-lines.txt,bvh/vr_prediction_models/model(tmp)-dataset(re...,False,
43,981.395,<p>Bereit?</p>,0,html-button-response,43,246508,0.0-7.0-0.9,onzokuqujwnax3t,,,,,,,,
44,,"<div style=""font-size:60px;"">+</div>",,html-keyboard-response,44,248512,0.0-7.0-1.9,onzokuqujwnax3t,,,,,,,,
45,,,,turing-test,45,254185,0.0-7.0-2.9,onzokuqujwnax3t,,,,,,,,


  ## parsing

In [505]:
# filtering test parts and creating new index

df = data[data['test_part'] == 'response'].reset_index()

# this line below for filtering attention checks overall out -> uncomment for using it
# df = data[(data['test_part'] == 'response') & (data['.att_check'] == False)].reset_index()

# must be integers for comparing
df['button_pressed'] = df['button_pressed'].astype(int)
df['test_stimulus.position'] = df['test_stimulus.position'].astype(int)

# renaming columns
df = df.rename(columns={"test_stimulus.left": "left",
                        "test_stimulus.right": "right",
                        "test_stimulus.att_check": "att_check",
                        "test_stimulus.position": "position",
                        "button_pressed":"pressed"})

In [506]:
df = df[['sona','subject','rt','att_check','pressed','position','left','right']]

In [507]:
df

Unnamed: 0,sona,subject,rt,att_check,pressed,position,left,right
0,,bt4quvsv7wdsku4,5540.000,False,0,0,bvh/pass-bottle-hold-training0-lines.txt,bvh/vr_prediction_models/model(dmp)-dataset(pa...
1,,bt4quvsv7wdsku4,792.000,False,0,0,bvh/pass-bottle-hold-training1-lines.txt,bvh/vr_prediction_models/model(tmp)-dataset(pa...
2,,bt4quvsv7wdsku4,2076.000,False,1,1,bvh/vr_prediction_models/model(vcgpdm)-dataset...,bvh/return-bottle-hold-training3-lines.txt
3,,bt4quvsv7wdsku4,1266.000,False,1,1,bvh/vr_prediction_models/model(tmp)-dataset(pa...,bvh/pass-bottle-training0-lines.txt
4,,bt4quvsv7wdsku4,2022.000,False,1,0,bvh/return-bottle-hold-training4-lines.txt,bvh/vr_prediction_models/model(vcgpdm)-dataset...
...,...,...,...,...,...,...,...,...
705,,onzokuqujwnax3t,1723.110,False,1,1,bvh/vr_prediction_models/model(tmp)-dataset(re...,bvh/return-bottle-training4-lines.txt
706,,onzokuqujwnax3t,1248.325,True,0,0,bvh/pass-bottle-training4-lines.txt,bvh/vr_prediction_models/model(dmp)-dataset(pa...
707,,onzokuqujwnax3t,1372.765,False,0,1,bvh/vr_prediction_models/model(vcgpdm)-dataset...,bvh/pass-bottle-training2-lines.txt
708,,onzokuqujwnax3t,1274.935,False,1,0,bvh/return-bottle-training2-lines.txt,bvh/vr_prediction_models/model(tmp)-dataset(re...


In [508]:
# without copy() there is an error when replacing strings
d = df.copy()

In [509]:
# removing unnecessary strings
d['left'] = d['left'].str.replace('bvh/vr_prediction_models/','')
d['left'] = d['left'].str.replace('bvh/','')
d['left'] = d['left'].str.replace('/final-lines.txt','')
d['left'] = d['left'].str.replace('-lines.txt','')
d['right'] = d['right'].str.replace('bvh/vr_prediction_models/','')
d['right'] = d['right'].str.replace('bvh/','')
d['right'] = d['right'].str.replace('/final-lines.txt','')
d['right'] = d['right'].str.replace('-lines.txt','')

In [510]:
# adding movement column for better grouping
num = 0
arr = []

for i in d['position']:
    if i == 1:
        arr.append(d['right'][num])
    else:
        arr.append(d['left'][num])
    num += 1

movement = []
for i in arr:
    x = i.split('-')
    movement.append('-'.join(x[:-1]))
d['movement'] = movement

In [511]:
# adding model column for easier grouping by
num = 0
arr = []
for i in d['position']:
    if i == 1:
        arr.append(d['left'][num])
    else:
        arr.append(d['right'][num])
    num += 1
d['model_set'] = arr

**old code for model column**

arr = []
for i in d['model_set']:
    string = i.split('-')
    string = string[0].replace('model(','')
    string = string.replace(')','')
    arr.append(string)
d['model'] = arr

In [512]:
# creating model column for better grouping by models
arr = []
for i in d['model_set']:
    if i.find('tmp')>0:
        arr.append('tmp')
    elif i.find('dmp')>0:
        arr.append('dmp')
    elif i.find('MAP')>0:
        arr.append('vcgpdm-map')
    elif i.find('ELBO')>0:
        arr.append('vcgpdm-elbo')
        
d['model'] = arr

In [513]:
# adding performance score
num = 0
arr = []
for i in d['pressed']:
    if i == d['position'][num]:
        arr.append(1)
    else:
        arr.append(0)
    num += 1
d['p_score'] = p_score

In [514]:
d

Unnamed: 0,sona,subject,rt,att_check,pressed,position,left,right,movement,model_set,model,p_score
0,,bt4quvsv7wdsku4,5540.000,False,0,0,pass-bottle-hold-training0,model(dmp)-dataset(pass-bottle-hold)-npsi(100)...,pass-bottle-hold,model(dmp)-dataset(pass-bottle-hold)-npsi(100)...,dmp,1
1,,bt4quvsv7wdsku4,792.000,False,0,0,pass-bottle-hold-training1,model(tmp)-dataset(pass-bottle-hold)-numprim(1...,pass-bottle-hold,model(tmp)-dataset(pass-bottle-hold)-numprim(1...,tmp,1
2,,bt4quvsv7wdsku4,2076.000,False,1,1,model(vcgpdm)-dataset(return-bottle-hold)-mode...,return-bottle-hold-training3,return-bottle-hold,model(vcgpdm)-dataset(return-bottle-hold)-mode...,vcgpdm-elbo,1
3,,bt4quvsv7wdsku4,1266.000,False,1,1,model(tmp)-dataset(pass-bottle)-numprim(4)-hol...,pass-bottle-training0,pass-bottle,model(tmp)-dataset(pass-bottle)-numprim(4)-hol...,tmp,1
4,,bt4quvsv7wdsku4,2022.000,False,1,0,return-bottle-hold-training4,model(vcgpdm)-dataset(return-bottle-hold)-mode...,return-bottle-hold,model(vcgpdm)-dataset(return-bottle-hold)-mode...,vcgpdm-elbo,0
...,...,...,...,...,...,...,...,...,...,...,...,...
705,,onzokuqujwnax3t,1723.110,False,1,1,model(tmp)-dataset(return-bottle)-numprim(14)-...,return-bottle-training4,return-bottle,model(tmp)-dataset(return-bottle)-numprim(14)-...,tmp,1
706,,onzokuqujwnax3t,1248.325,True,0,0,pass-bottle-training4,model(dmp)-dataset(pass-bottle)-npsi(70)-hold(4),pass-bottle,model(dmp)-dataset(pass-bottle)-npsi(70)-hold(4),dmp,1
707,,onzokuqujwnax3t,1372.765,False,0,1,model(vcgpdm)-dataset(pass-bottle)-mode(ELBO)-...,pass-bottle-training2,pass-bottle,model(vcgpdm)-dataset(pass-bottle)-mode(ELBO)-...,vcgpdm-elbo,0
708,,onzokuqujwnax3t,1274.935,False,1,0,return-bottle-training2,model(tmp)-dataset(return-bottle)-numprim(10)-...,return-bottle,model(tmp)-dataset(return-bottle)-numprim(10)-...,tmp,0


In [515]:
d

Unnamed: 0,sona,subject,rt,att_check,pressed,position,left,right,movement,model_set,model,p_score
0,,bt4quvsv7wdsku4,5540.000,False,0,0,pass-bottle-hold-training0,model(dmp)-dataset(pass-bottle-hold)-npsi(100)...,pass-bottle-hold,model(dmp)-dataset(pass-bottle-hold)-npsi(100)...,dmp,1
1,,bt4quvsv7wdsku4,792.000,False,0,0,pass-bottle-hold-training1,model(tmp)-dataset(pass-bottle-hold)-numprim(1...,pass-bottle-hold,model(tmp)-dataset(pass-bottle-hold)-numprim(1...,tmp,1
2,,bt4quvsv7wdsku4,2076.000,False,1,1,model(vcgpdm)-dataset(return-bottle-hold)-mode...,return-bottle-hold-training3,return-bottle-hold,model(vcgpdm)-dataset(return-bottle-hold)-mode...,vcgpdm-elbo,1
3,,bt4quvsv7wdsku4,1266.000,False,1,1,model(tmp)-dataset(pass-bottle)-numprim(4)-hol...,pass-bottle-training0,pass-bottle,model(tmp)-dataset(pass-bottle)-numprim(4)-hol...,tmp,1
4,,bt4quvsv7wdsku4,2022.000,False,1,0,return-bottle-hold-training4,model(vcgpdm)-dataset(return-bottle-hold)-mode...,return-bottle-hold,model(vcgpdm)-dataset(return-bottle-hold)-mode...,vcgpdm-elbo,0
...,...,...,...,...,...,...,...,...,...,...,...,...
705,,onzokuqujwnax3t,1723.110,False,1,1,model(tmp)-dataset(return-bottle)-numprim(14)-...,return-bottle-training4,return-bottle,model(tmp)-dataset(return-bottle)-numprim(14)-...,tmp,1
706,,onzokuqujwnax3t,1248.325,True,0,0,pass-bottle-training4,model(dmp)-dataset(pass-bottle)-npsi(70)-hold(4),pass-bottle,model(dmp)-dataset(pass-bottle)-npsi(70)-hold(4),dmp,1
707,,onzokuqujwnax3t,1372.765,False,0,1,model(vcgpdm)-dataset(pass-bottle)-mode(ELBO)-...,pass-bottle-training2,pass-bottle,model(vcgpdm)-dataset(pass-bottle)-mode(ELBO)-...,vcgpdm-elbo,0
708,,onzokuqujwnax3t,1274.935,False,1,0,return-bottle-training2,model(tmp)-dataset(return-bottle)-numprim(10)-...,return-bottle,model(tmp)-dataset(return-bottle)-numprim(10)-...,tmp,0


In [516]:
set(d['model_set'])

{'model(dmp)-dataset(pass-bottle)-npsi(10)-hold(0)',
 'model(dmp)-dataset(pass-bottle)-npsi(10)-hold(2)',
 'model(dmp)-dataset(pass-bottle)-npsi(10)-hold(3)',
 'model(dmp)-dataset(pass-bottle)-npsi(100)-hold(0)',
 'model(dmp)-dataset(pass-bottle)-npsi(100)-hold(2)',
 'model(dmp)-dataset(pass-bottle)-npsi(100)-hold(3)',
 'model(dmp)-dataset(pass-bottle)-npsi(20)-hold(2)',
 'model(dmp)-dataset(pass-bottle)-npsi(20)-hold(3)',
 'model(dmp)-dataset(pass-bottle)-npsi(30)-hold(0)',
 'model(dmp)-dataset(pass-bottle)-npsi(30)-hold(2)',
 'model(dmp)-dataset(pass-bottle)-npsi(30)-hold(4)',
 'model(dmp)-dataset(pass-bottle)-npsi(40)-hold(2)',
 'model(dmp)-dataset(pass-bottle)-npsi(40)-hold(3)',
 'model(dmp)-dataset(pass-bottle)-npsi(50)-hold(0)',
 'model(dmp)-dataset(pass-bottle)-npsi(50)-hold(1)',
 'model(dmp)-dataset(pass-bottle)-npsi(50)-hold(2)',
 'model(dmp)-dataset(pass-bottle)-npsi(50)-hold(3)',
 'model(dmp)-dataset(pass-bottle)-npsi(60)-hold(0)',
 'model(dmp)-dataset(pass-bottle)-npsi(60)-

## analysis
### overall confusion rate
#### with all attention checks included

In [517]:
correct = 0
for i in d['p_score']:
    right += i
confusion_rate = (len(d) - correct)/len(d)
print(confusion_rate)

1.0


#### without attention checks

In [518]:
no_check = d[d['att_check']==False]

In [519]:
correct = 0
for i in no_check['p_score']:
        correct += i
confusion_rate = (len(no_check) - correct)/len(no_check)
print(confusion_rate)

0.3042121684867395


In [520]:
no_check.shape

(641, 12)

### checking attention
#### if confusion rate is zero -> good

In [521]:
check_att = d[d['att_check']==True]

In [522]:
correct = 0
for i in check_att['p_score']:
        correct += i
confusion_rate = (len(check_att) - correct)/len(check_att)
print(confusion_rate)

0.10144927536231885


In [523]:
for i in d:
        print(d[i].shape)

(710,)
(710,)
(710,)
(710,)
(710,)
(710,)
(710,)
(710,)
(710,)
(710,)
(710,)
(710,)


In [535]:
len(set(d['model']))

4

In [525]:
454 * 30 / 100

136.2

In [533]:
no_check.groupby('model').mean()

Unnamed: 0_level_0,rt,pressed,position,p_score
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
dmp,1522.969135,0.503759,0.503759,0.879699
tmp,3062.806472,0.576687,0.472393,0.552147
vcgpdm-elbo,3342.669505,0.578947,0.464396,0.687307
vcgpdm-map,1324.9475,0.5,0.454545,0.772727


In [534]:
d.groupby('model').mean()

Unnamed: 0_level_0,rt,pressed,position,p_score
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
dmp,1377.67297,0.514851,0.5,0.886139
tmp,3062.806472,0.576687,0.472393,0.552147
vcgpdm-elbo,3342.669505,0.578947,0.464396,0.687307
vcgpdm-map,1324.9475,0.5,0.454545,0.772727
