# Analysis of Pilot Data from Chronic Worry Study 

## Basic study structure Round 1 Pilot:

1. Learn transition probabilities by pressing "1" or "0" and seeing how often each action leads to certain images. Before participant sees the image an action leads to (one image 60% of the time, another image 40% of the time), words of different valences are flashed on screen for 500 msec. Participants are instructed to ignore these words.


2. 30 second rest


3. Test period where two amounts of money are placed next to each image. Participant is asked to plan which action will get to the image with the most money.

3 conditions (2 rounds for each condition):

1. Rare Threat: Threatening words paired with rare transitions, and positive words paired with common transitions.
2. Common Threat: Threatening words paired with COMMON transitions, and positive words paired with RARE transitions.
3. Neutral: Neutral words paired with common and rare transitions. 

Test phase: 20 probes, each with varying amounts of money associated with each image.

### Get basic info on subjects


In [2]:
import pandas as pd
import numpy as np


worry_data=pd.read_csv('questionnaire_revised.csv')
print('# Males={}'.format(np.sum(1*(worry_data['Response']=='Male'))))

# Males=10


## Get Worry Questionnaire data

In [4]:
#set up variables
PSWQ_Scores=[]
reverse_scored=[1,3,8,10,11]
rs_dict={1:5,2:4,3:3,4:2,5:1}
current_score=0
reverse_score_counter=0
counter=0

#get row locations where PSWQ items are
ilocs=[]

#check if they faithfully filled out the questionnaire
rs_items=[]
non_rs_items=[]
sub=1
for row,data in worry_data.iterrows():
    if str(worry_data['Question Key'][row])=='END QUESTIONNAIRE':
        PSWQ_Scores.append(current_score)
        current_score=0
        reverse_score_counter=0
        counter=0
        print('\n Reverse_Score_Check for Subject {}'.format(sub))
        print('RS avg: {}'.format(np.mean(rs_items)))
        print('NON RS avg: {}'.format(np.mean(non_rs_items)))
        print('\n')
        sub+=1
        rs_items=[]
        non_rs_items=[]

    
    elif str(worry_data['Question Key'][row]).startswith('ps') and not str(worry_data['Question Key'][row]).endswith('quantised'):
        if len(worry_data['Question Key'][row])>5:
            if int(worry_data['Question Key'][row][-2:]) in reverse_scored:
                rs_items.append(int(worry_data['Response'][row]))
                current_score+=rs_dict[int(worry_data['Response'][row])]
                counter+=1
                reverse_score_counter+=1
                ilocs.append(row)
            else:
                non_rs_items.append(int(worry_data['Response'][row]))
                current_score+=int(worry_data['Response'][row])
                counter+=1
                ilocs.append(row)
        else:
            if int(worry_data['Question Key'][row][-1]) in reverse_scored:
                rs_items.append(int(worry_data['Response'][row]))
                current_score+=rs_dict[int(worry_data['Response'][row])]
                counter+=1
                reverse_score_counter+=1
                ilocs.append(row)
            else:
                current_score+=int(worry_data['Response'][row])
                non_rs_items.append(int(worry_data['Response'][row]))
                counter+=1
                ilocs.append(row)
            
            
print('Scores on PSWQ =\n {}'.format(PSWQ_Scores))
np.save('PSWQ_Scores_round1',PSWQ_Scores)



 Reverse_Score_Check for Subject 1
RS avg: 1.6
NON RS avg: 4.090909090909091



 Reverse_Score_Check for Subject 2
RS avg: 4.2
NON RS avg: 1.7272727272727273



 Reverse_Score_Check for Subject 3
RS avg: 1.2
NON RS avg: 2.5454545454545454



 Reverse_Score_Check for Subject 4
RS avg: 1.6
NON RS avg: 2.909090909090909



 Reverse_Score_Check for Subject 5
RS avg: 1.8
NON RS avg: 4.454545454545454



 Reverse_Score_Check for Subject 6
RS avg: 1.4
NON RS avg: 4.363636363636363



 Reverse_Score_Check for Subject 7
RS avg: 1.4
NON RS avg: 4.363636363636363



 Reverse_Score_Check for Subject 8
RS avg: 2.6
NON RS avg: 2.8181818181818183



 Reverse_Score_Check for Subject 9
RS avg: 3.6
NON RS avg: 3.1818181818181817



 Reverse_Score_Check for Subject 10
RS avg: 2.6
NON RS avg: 3.909090909090909



 Reverse_Score_Check for Subject 11
RS avg: 2.0
NON RS avg: 3.727272727272727



 Reverse_Score_Check for Subject 12
RS avg: 3.4
NON RS avg: 2.090909090909091



 Reverse_Score_Check for Subject

## Construct Dictionary to define optimal choice per condition and action

In [4]:
import pandas as pd
import numpy as np
#load  in data and just consider columsn of importance for analysis
df_task=pd.read_csv('task.csv')
df_task_r=df_task[['Participant Public ID','display','forced_choice','Response','image2','test_image1', 'test_image2','test_image1_value', 
                   'test_image2_value','image_query2', 'image_query1']]
df_task_r=df_task_r.replace('response_text_entry','query_internal_probability') 
#use these columns to calculate best-choice
df_fc=df_task_r[['forced_choice','image2','display']][pd.notnull(df_task_r['forced_choice'])] 
df_fc=df_fc.iloc[20:1327] #strip away practice rounds that come before actual task and only get 1st subject
zero_dict={}
one_dict={}
conditions=['rare_threat_1',
            'rare_threat_2',
            'common_threat_1',
            'common_threat_2',
            'neutral_1',
            'neutral_2']


for condition in conditions:
    met=0
    for row,data in df_fc.iterrows():
        if df_fc.loc[row][2]==condition:
            if met!=1:
                met=1
                for i in range(160):
                    if df_fc.loc[row+i][0].startswith('o'):

                        if df_fc.loc[row+i][1] in one_dict.keys():
                            one_dict[df_fc.loc[row+i][1]]+=1

                        else:
                            one_dict[df_fc.loc[row+i][1]]=1
              
                            
                    else:
                        if df_fc.loc[row+i][1] in zero_dict.keys():
                            zero_dict[df_fc.loc[row+i][1]]+=1
                        else:
                            zero_dict[df_fc.loc[row+i][1]]=1

best_answer_key={}
counter=0
for condition in conditions:
    op1=zero_dict[list(zero_dict)[counter]]
    op2=zero_dict[list(zero_dict)[counter+1]]
    if op1>op2:
        zero_best=list(zero_dict)[counter]
    else:
        zero_best=list(zero_dict)[counter+1]
    op1=one_dict[list(one_dict)[counter]]
    op2=one_dict[list(one_dict)[counter+1]]
    if op1>op2:
        one_best=list(one_dict)[counter]
    else:
        one_best=list(one_dict)[counter+1]
            
    best_answer_key[condition]=[[0,zero_best],[1,one_best]]
    counter+=2

#print best answer key per condition
print('Best Answer Dictionary: {}'.format(best_answer_key))

  interactivity=interactivity, compiler=compiler, result=result)


Best Answer Dictionary: {'rare_threat_1': [[0, 'Pinecone 1.jpg'], [1, 'Pumpkin 1.jpg']], 'rare_threat_2': [[0, 'Keyboard 3.jpg'], [1, 'Office supplies 2.jpg']], 'common_threat_1': [[0, 'Fire hydrant 1.jpg'], [1, 'Fence 2.jpg']], 'common_threat_2': [[0, 'Bricks 1.jpg'], [1, 'Barrels 1.jpg']], 'neutral_1': [[0, 'Snow 3.jpg'], [1, 'Skyscraper 1.jpg']], 'neutral_2': [[0, 'Clean 1.jpg'], [1, 'Cotton swabs 3.jpg']]}


## Get summary statistics for optimal behavior per condition

In [7]:
from scipy import stats
#best answers per condition: lists
rt1=[]
rt2=[]
ct1=[]
ct2=[]
n1=[]
n2=[]

value_difference=[]

#subjective_queries



tally=0
invalid_scores={}
counter=0
conditions=[]
start_new_test_set=0
start_new_subject=0
best_action_tally=0

for row,data in df_task_r.iterrows():
    if str(df_task_r['display'][row]).startswith('test'):
        if counter==0:
            conditions.append(df_task_r['display'][row][5:])
            condition_info=best_answer_key[conditions[counter]]
            new_condition=0
            counter+=1

        elif df_task_r['display'][row][5:]!=conditions[counter-1]:
                    conditions.append(df_task_r['display'][row][5:])
                    condition_info=best_answer_key[conditions[counter]]
                    new_condition=1
                    counter+=1
        else:
            new_condition=0
                    
        #Get values and convert from strings to floating point
        value1=df_task_r['test_image1_value'][row]
        if "p" in value1:
            value1=float(value1[0:2])*0.01
        else:
            value1=float(value1[1])
        value2=df_task_r['test_image2_value'][row]
        if "p" in value2:
            value2=float(value2[0:2])*0.01
        else:
            value2=float(value2[1])
        
        
        
        if value1>value2:
            best_option=df_task_r['test_image1'][row]
        else:
            best_option=df_task_r['test_image2'][row]
        
            
        #get response and convert to integer
        try:
            current_response=int(df_task_r['Response'][row])
        except:  
              current_response='n/a'
                
            
        
        
        # determine if participant made best choice
        for info_total in condition_info:
            for info in info_total:
                if best_option == str(info):
                    best_action=info_total[0]
        if new_condition==1:          
           
                if conditions[counter-2]=='rare_threat_1':
                    rt1.append(best_action_tally)

                elif conditions[counter-2]=='rare_threat_2':
                    rt2.append(best_action_tally)
  
                elif conditions[counter-2]=='common_threat_1':
                    ct1.append(best_action_tally)

                elif conditions[counter-2]=='common_threat_2':
                    ct2.append(best_action_tally)

                elif conditions[counter-2]=='neutral_1':
                    n1.append(best_action_tally)

                elif conditions[counter-2]=='neutral_2':
                    n2.append(best_action_tally)
                 
                best_action_tally=0
                if current_response==best_action:
                    best_action_tally+=1
                    
        elif row==17255:
            print(best_action_tally)
            ct2.append(best_action_tally)
            
                
            
        else:
            if current_response==best_action:
                best_action_tally+=1
                
        
        
                
# DELETE subs scoring below 50% right on neutral rounds 
# bad_subs_indices=[2,4,7,10] 
# bad_subs_indices.sort(reverse=True)
# for index in bad_subs_indices:
#     del rt1[index]
#     del rt2[index]
#     del ct1[index]
#     del ct2[index]
#     del n1[index]
#     del n2[index]

    
rt1=np.array(rt1)
rt2=np.array(rt2)
ct1=np.array(ct1)
ct2=np.array(ct2)
n1=np.array(n1)
n2=np.array(n2)

print('PSWQ Scores: {}'.format(PSWQ_Scores))
print('\n\n')

print('Rare Threat 1')
print(rt1)
print('Mean = {}, SD={}'.format(np.mean(rt1),np.std(rt1)))
print('\n\n')        
print('Rare Threat 2')

print(rt2)
print('Mean = {}, SD={}'.format(np.mean(rt2),np.std(rt2)))
print('\n\n')        

print('Common Threat 1')

print(ct1)
print('Mean = {}, SD={}'.format(np.mean(ct1),np.std(ct1)))
print('\n\n')        

print('Common Threat 2')
print(ct2)
print('Mean = {}, SD={}'.format(np.mean(ct2),np.std(ct2)))

print('\n\n')        

print('Neutral 1')

print(n1)
print('Mean = {}, SD={}'.format(np.mean(n1),np.std(n1)))
print('\n\n')        

print('Neutral 2')

print(n2)
print('Mean = {}, SD={}'.format(np.mean(n2),np.std(n2)))
print('\n\n')        

print('Accuracy per subject (based on neutral conditions): {}'.format((n1+n2)/2.0))

print('Effect of positive stimuli on rare: {}'.format((np.mean(rt1+rt2)-np.mean(ct1+ct2))))
print('Effect of emotion: {}'.format(np.mean(n1+n2)-((np.mean(ct1+ct2)+np.mean(rt1+rt2))/2.0)))

#compute T test
print('\nDependent Sample T-test Effect of emotion distraction on learning and/or planning:')
all_emotion=(ct1+ct2+rt2+rt1)/4.0
effect_of_valence=((ct1+ct2)-(rt1+rt2))/2.0
r,pval=stats.pearsonr(effect_of_valence,PSWQ_Scores)
print('correlation valence effect with PSWQ: {}, p-val: {}\n'.format(r,pval))

ef_valence_no_outlier=np.delete(effect_of_valence,3)
PSWQ_nooutlier=np.delete(PSWQ_Scores,3)
r,pval=stats.pearsonr(ef_valence_no_outlier,PSWQ_nooutlier)
print('\ncorrelation valence effect with PSWQ NO outlier: {}, p-val: {}\n'.format(r,pval))


for item in range(len(PSWQ_Scores)):
    print('PSWQ: {}, Valence_Effect: {}'.format(PSWQ_Scores[item],effect_of_valence[item]))
all_neutral=(n1+n2)/2.0

#effect of worry on distraction
distraction_effect=effect_of_valence-all_neutral
r,pval=stats.pearsonr(distraction_effect,PSWQ_Scores)
print('r distraction w/ worry: {}, p-val: {}'.format(r,pval))

t_stat,pval=stats.ttest_rel(all_emotion,all_neutral)
print('T-statistic: {}, p-value: {}'.format(t_stat,pval))



20
PSWQ Scores: [67, 28, 52, 54, 70, 71, 71, 48, 47, 60, 61, 36, 62]



Rare Threat 1
[ 8 20  2  1  7 16 18  1 20 15  2 20 19]
Mean = 11.461538461538462, SD=7.752075499904075



Rare Threat 2
[14 20  2 20 12 20 19  1 20 18  0 20 20]
Mean = 14.307692307692308, SD=7.689999653742269



Common Threat 1
[ 1 20  0 20  4 10 20  1 20 19  0 20 20]
Mean = 11.923076923076923, SD=8.90515441460332



Common Threat 2
[ 6 20  0 20  9 20 19  0 20 20  2 20 20]
Mean = 13.538461538461538, SD=8.316946412784928



Neutral 1
[ 9 20  0 20 11 20 20  0 20 20  0 20 19]
Mean = 13.76923076923077, SD=8.312676567506783



Neutral 2
[13 20 17 20  6 20 20 20 20 20 19 20 20]
Mean = 18.076923076923077, SD=3.9896315322738274



Accuracy per subject (based on neutral conditions): [11.  20.   8.5 20.   8.5 20.  20.  10.  20.  20.   9.5 20.  19.5]
Effect of positive stimuli on rare: 0.30769230769231015
Effect of emotion: 6.2307692307692335

Dependent Sample T-test Effect of emotion distraction on learning and/or planning:


## Get estimates of self-reported distortions per condition

In [8]:
df_subj_probs=df_task_r[['Response','image_query1','image_query2']][df_task_r['display']=='query_internal_probability']
counter=1
all_subs_data=[]
subject_set=[]
current_set=[]
image1s=[]


for img in df_subj_probs['image_query1']:
    if img not in image1s:
        image1s.append(img)
        
#build nested list of all subjective probabilities per subject per condition
for response in df_subj_probs['Response']: 
    current_set.append(response)
    if counter==4:
        if len(subject_set)==5:
            subject_set.append(current_set)
            all_subs_data.append(subject_set)
            subject_set=[]
        else:
            subject_set.append(current_set)
        counter=1
        current_set=[]
    else:
        counter+=1
        
        
conditions=['rare_threat_1',
    'rare_threat_2',
    'common_threat_1',
    'common_threat_2',
    'neutral_1',
    'neutral_2']

#subjective probability distortions
pd_dict={'rare_threat_1':[],
    'rare_threat_2':[],
    'common_threat_1':[],
    'common_threat_2':[],
    'neutral_1':[],
    'neutral_2':[]}

pattern1=[40,60,60,40]
pattern2=[60,40,40,60]

#print Ground Truth probabilities and Subjectively Reported Probabilities for each subject for each condition
for sub in range(13):
    for cond in range(len(conditions)):
        print('\n Subject {} Self Reported Probabilities in Condition {}:\n'.format(sub+1,conditions[cond]))
        if best_answer_key[conditions[cond]][0][1] not in image1s:
            print('Ground Truth Probabilities:{}'.format([40,60,60,40]))
            total_distortion=0
            for number in range(4):
                try:
                    sr_num=all_subs_data[sub][cond][number]
                    if len(sr_num)==1:
                        total_distortion+=np.abs(pattern1[number]-int(sr_num))
                    else:
                        total_distortion+=np.abs(pattern1[number]-int(sr_num[0:2]))
                except:
                    print('not a real number: {}'.format(sr_num))
            pd_dict[conditions[cond]].append(total_distortion)
            
                    
        else:
            print('Ground Truth Probabilities:{}'.format([60,40,40,60]))
            total_distortion=0
            for number in range(4):
                try:
                    sr_num=all_subs_data[sub][cond][number]
                    if len(sr_num)==1:
                        total_distortion+=np.abs(pattern2[number]-int(sr_num))
                    else:
                        total_distortion+=np.abs(pattern2[number]-int(sr_num[0:2]))
                except:
                    print('not a real number: {}'.format(sr_num))
            pd_dict[conditions[cond]].append(total_distortion)
            
        print('Subjective Probabilities: {}'.format(all_subs_data[sub][cond]))
        print('\n')
        
print('\nDistribution of self-reported probability distortions across subjects: \n')
for key in pd_dict:
    print('{}  : {}'.format(key,pd_dict[key]))
    print('Average distortion for {} = {}\n\n'.format(key,np.mean(pd_dict[key])))
            
            
                      


 Subject 1 Self Reported Probabilities in Condition rare_threat_1:

Ground Truth Probabilities:[40, 60, 60, 40]
Subjective Probabilities: ['60', '40', '60', '40']



 Subject 1 Self Reported Probabilities in Condition rare_threat_2:

Ground Truth Probabilities:[60, 40, 40, 60]
Subjective Probabilities: ['40', '60', '40', '60']



 Subject 1 Self Reported Probabilities in Condition common_threat_1:

Ground Truth Probabilities:[40, 60, 60, 40]
Subjective Probabilities: ['30', '70', '30', '70']



 Subject 1 Self Reported Probabilities in Condition common_threat_2:

Ground Truth Probabilities:[60, 40, 40, 60]
Subjective Probabilities: ['60', '40', '60', '40']



 Subject 1 Self Reported Probabilities in Condition neutral_1:

Ground Truth Probabilities:[60, 40, 40, 60]
Subjective Probabilities: ['40', '60', '40', '60']



 Subject 1 Self Reported Probabilities in Condition neutral_2:

Ground Truth Probabilities:[60, 40, 40, 60]
Subjective Probabilities: ['60', '40', '60', '40']



 Subjec

## Effect of negative distractors as a function of accuracy

### Lessons for Pilot #2

1. Have more extensive practice 
2. Ensure participants know how to answer self-reported probability queries
3. Ensure participants know that 75 p = 0.75
4. If participants are choosing less than 50% correct on neutral trials, remove participant. 
5. Should I have a condition where its negative + neutral?

## Pilot 2 way to create data

In [47]:
import pandas as pd
import numpy as np
#load  in data and just consider columsn of importance for analysis
df_task=pd.read_csv('task2.csv')
df_task_r=df_task[['display','forced_choice','Response','image2','test_image1', 'test_image2','test_image1_value', 
                   'test_image2_value','Zone_type']]

df_sr_currentmood=df_task_r[['Zone_type','Response']][df_task_r['Zone_type']=='response_slider_endValue'] 
#use these columns to calculate best-choice
df_fc=df_task_r[['forced_choice','image2','display']][pd.notnull(df_task_r['forced_choice'])] 
df_fc=df_fc.iloc[117:1307] #strip away practice rounds that come before actual task and only get 1st subject
zero_dict={}
one_dict={}
conditions=['rare_threat_1',
            'rare_positive_1',
            'common_threat_1',
            'common_positive_1',
            'neutral_1',
            'neutral_2']

cp=1
for condition in conditions:
    print(cp)
    cp+=1
    met=0
    for row,data in df_fc.iterrows():
        if df_fc.loc[row][2]==condition:
            if met!=1:
                met=1
                try:
                    for i in range(160):
                        if df_fc.loc[row+i][0].startswith('o'):

                            if df_fc.loc[row+i][1] in one_dict.keys():
                                one_dict[df_fc.loc[row+i][1]]+=1

                            else:
                                one_dict[df_fc.loc[row+i][1]]=1


                        else:
                            if df_fc.loc[row+i][1] in zero_dict.keys():
                                zero_dict[df_fc.loc[row+i][1]]+=1
                            else:
                                zero_dict[df_fc.loc[row+i][1]]=1
                except:
                    print('error')

best_answer_key={}
counter=0
for condition in conditions:
    op1=zero_dict[list(zero_dict)[counter]]
    op2=zero_dict[list(zero_dict)[counter+1]]
    if op1>op2:
        zero_best=list(zero_dict)[counter]
    else:
        zero_best=list(zero_dict)[counter+1]
    op1=one_dict[list(one_dict)[counter]]
    op2=one_dict[list(one_dict)[counter+1]]
    if op1>op2:
        one_best=list(one_dict)[counter]
    else:
        one_best=list(one_dict)[counter+1]
            
    best_answer_key[condition]=[[0,zero_best],[1,one_best]]
    counter+=2


#print best answer key per condition
print('Best Answer Dictionary: {}'.format(best_answer_key))

1
2
3
4
5
6
error
Best Answer Dictionary: {'rare_threat_1': [[0, 'Keyboard 3.jpg'], [1, 'Office supplies 2.jpg']], 'rare_positive_1': [[0, 'Pinecone 1.jpg'], [1, 'Pumpkin 1.jpg']], 'common_threat_1': [[0, 'Fire hydrant 1.jpg'], [1, 'Fence 2.jpg']], 'common_positive_1': [[0, 'Bricks 1.jpg'], [1, 'Barrels 1.jpg']], 'neutral_1': [[0, 'Snow 3.jpg'], [1, 'Skyscraper 1.jpg']], 'neutral_2': [[0, 'Clean 1.jpg'], [1, 'Cotton swabs 3.jpg']]}


In [50]:
#save self-reported state mood taken prior to task
sr_mood=df_sr_currentmood['Response'].values
#np.save('selfreportmood_r2',sr_mood)

In [65]:
import pandas as pd
import numpy as np
from scipy import stats
#load  in data and just consider columsn of importance for analysis
df_task=pd.read_csv('task2.csv')
df_task_r=df_task[['display','forced_choice','Response','image2','test_image1', 
                   'test_image2','test_image1_value', 
                   'test_image2_value']]

# Define best-action dictionary for present design
# best action per condition (6 conditions)
best_answer_key={'rare_threat_1': [[0, 'Keyboard 3.jpg'], [1, 'Office supplies 2.jpg']], 
                 'rare_positive_1': [[0, 'Pinecone 1.jpg'], [1, 'Pumpkin 1.jpg']], 
                 'common_positive_1': [[0, 'Bricks 1.jpg'], [1, 'Barrels 1.jpg']], 
                 'neutral_1': [[0, 'Snow 3.jpg'], [1, 'Skyscraper 1.jpg']], 
                 'neutral_2': [[0, 'Clean 1.jpg'], [1, 'Cotton swabs 3.jpg']]}

#best answers per condition: lists
rt1=[]
rp1=[]
cp1=[]
n1=[]
n2=[]
kept_conds=['neutral_2', 'common_positive_1', 'rare_threat_1', 'rare_positive_1']
value_differential=[]
temp_row=[]
bas=[]
appended_c=0
test_counter=1
tally=0
invalid_scores={'NaN'}
counter=0
conditions=[]
start_new_test_set=0
start_new_subject=0
best_action_tally=0
condition_counter=0
sub_counter=0
current_subject=1

#subject specific data
rt_sub=[]
rp_sub=[]
cp_sub=[]
neut1_sub=[]
neut2_sub=[]
current_choice_data=[]
choice_data_3d=np.zeros((14,4,20)) #to be populated below

for row,data in df_task_r.iterrows():
    
    if str(df_task_r['display'][row]).startswith('test'):
        if counter==0:
            conditions.append(df_task_r['display'][row][5:])
            condition_info=best_answer_key[conditions[counter]]
            new_condition=0
            counter+=1

        elif df_task_r['display'][row][5:]!=conditions[counter-1]:
                    conditions.append(df_task_r['display'][row][5:])
                    condition_info=best_answer_key[conditions[counter]]
                    if conditions[counter-1]=='rare_threat_1':
                        rt1.append(best_action_tally)
                        rt_sub=current_choice_data
       

                    elif conditions[counter-1]=='rare_positive_1':
                        rp1.append(best_action_tally)
                        rp_sub=current_choice_data


                    elif conditions[counter-1]=='common_positive_1':
                        cp1.append(best_action_tally)
                        cp_sub=current_choice_data

                    elif conditions[counter-1]=='neutral_1':
                        n1.append(best_action_tally)
                        neut1_sub=current_choice_data

                    elif conditions[counter-1]=='neutral_2':
                        n2.append(best_action_tally)
                        neut2_sub=current_choice_data
                    counter+=1
                    best_action_tally=0
                    current_choice_data=[]
                    condition_counter+=1
                    #after 6 blocks, new subject
                                        
                    if condition_counter>4:
                        current_subject+=1
                        
                        if len(neut1_sub)<20:
                            neut1_sub=[int(x) for x in neut1_sub+np.zeros(20-len(neut1_sub)).tolist()]
                            
                        
                        if len(neut2_sub)<20:
                            neut2_sub=[int(x) for x in neut2_sub+np.zeros(20-len(neut2_sub)).tolist()]
                        
                        
                        if len(rt_sub)<20:
                            rt_sub=[int(x) for x in rt_sub+np.zeros(20-len(rt_sub)).tolist()]
                      
                        if len(rp_sub)<20:
                            rp_sub=[int(x) for x in rp_sub+np.zeros(20-len(rp_sub)).tolist()]
                      
                    
                        if len(cp_sub)<20:
                            cp_sub=[int(x) for x in cp_sub+np.zeros(20-len(cp_sub)).tolist()]
                        
                        
                        #simulate two equal rounds
                        choice_data_3d[sub_counter,0]=neut1_sub
                        choice_data_3d[sub_counter,1]=cp_sub
                        choice_data_3d[sub_counter,2]=rp_sub
                        choice_data_3d[sub_counter,3]=rt_sub
                        
                        sub_counter+=1                                               
                        condition_counter=0
                        
                        rt_sub=[]
                        rp_sub=[]
                        cp_sub=[]
                        neut1_sub=[]
                        neut2_sub=[]
        
        else:
            new_condition=0
                    
        #Get values and convert from strings to floating point
        value1=df_task_r['test_image1_value'][row]
        if "p" in value1:
            if value1.startswith('2'):
                value1=float(value1[0:2])*.01
            else:   
                value1=float(value1[0:4])
            
        else:
            if value1.startswith('£'):
                value1=float(value1[1])
            else:
                value1=float(value1[0])
       
            
        value2=df_task_r['test_image2_value'][row]
        if "p" in value2:
            if value2.startswith('2'):
                value2=float(value2[0:2])*.01
            else:   
                value2=float(value2[0:4])
        else:
            if value2.startswith('£'):
                value2=float(value2[1])
            else:
                value2=float(value2[0])
       
        
        if value1>value2:
            
            best_option=df_task_r['test_image1'][row]
        else:
            best_option=df_task_r['test_image2'][row]
        
        
            
        #get response and convert to integer
        try:
            current_response=int(df_task_r['Response'][row])
        except:
            current_response='missing'
                
            
        
        
        # determine if participant made best choice
        for info_total in condition_info:
            for info in info_total:
                if best_option == str(info):
                    best_action=info_total[0]
        if current_subject==1:
            if conditions[counter-1]=='neutral_1':
                bas.append(best_action)
            
        #for last subject only that doesn't meet the condition above for indexing     
        if row==18310:
            rp1.append(best_action_tally)
            rp_sub=current_choice_data
           
            if len(neut1_sub)<20:
                neut1_sub=[int(x) for x in neut1_sub+np.zeros(20-len(neut1_sub)).tolist()]
            
            if len(neut2_sub)<20:
                neut2_sub=[int(x) for x in neut2_sub+np.zeros(20-len(neut2_sub)).tolist()]

            if len(rt_sub)<20:
                rt_sub=[int(x) for x in rt_sub+np.zeros(20-len(rt_sub)).tolist()]

            if len(rp_sub)<20:
                rp_sub=[int(x) for x in rp_sub+np.zeros(20-len(rp_sub)).tolist()]

            if len(cp_sub)<20:
                cp_sub=[int(x) for x in cp_sub+np.zeros(20-len(cp_sub)).tolist()]

        

            choice_data_3d[sub_counter,0]=neut1_sub
            choice_data_3d[sub_counter,1]=cp_sub
            choice_data_3d[sub_counter,2]=rp_sub
            choice_data_3d[sub_counter,3]=rt_sub
            
            
            
            sub_counter+=1                                               
            condition_counter=0

            rt_sub=[]
            rp_sub=[]
            cp_sub=[]
            neut1_sub=[]
            neut2_sub=[]

        else:
            
            if current_response==best_action:
                best_action_tally+=1
                current_choice_data.append(1.0)
                if current_subject==1:
                    if test_counter<21:
                        test_counter+=1
                        temp_row.append(np.abs(value1-value2))
                    else:
                        value_differential.append(temp_row)
                        temp_row=[]
                        test_counter=2
                        temp_row.append(np.abs(value1-value2))
            elif current_response=='missing':
                x='missing'
            else:
                current_choice_data.append(0.0)
                if current_subject==1:
                    if test_counter<21:
                        test_counter+=1
                        temp_row.append(np.abs(value1-value2))
                    else:
                        value_differential.append(temp_row)
                        temp_row=[]
                        test_counter=2
                        temp_row.append(np.abs(value1-value2))

value_differential.append(temp_row)

# #convert to numpy arrays    
# rt1=np.array(rt1)
# print(rt1)
# rp1=np.array(rp1)
# print(rp1)
# cp1=np.array(cp1)
# print(cp1)
# n1=np.array(n1)
# print(n1)
print(conditions[0:5])
del value_differential[3]
print(value_differential)
vd=[y-0.8 for x in value_differential for y in x]
print(vd)
print(np.mean(value_differential))

# choice_data_3d = choice_data_3d.astype(int)
# print(choice_data_3d)
#np.save('data_pilot2',choice_data_3d)
#np.save('value_diff_atchoice_r2_demeaned',vd)


['neutral_2', 'common_positive_1', 'rare_threat_1', 'neutral_1', 'rare_positive_1']
[[0.25, 0.5, 1.0, 1.0, 0.5, 1.0, 1.0, 0.5, 0.5, 1.0, 1.25, 1.0, 1.0, 1.0, 0.5, 1.25, 1.0, 1.0, 0.5, 0.25], [0.25, 0.25, 1.0, 1.0, 0.5, 1.0, 0.5, 0.5, 1.0, 0.5, 1.0, 1.0, 1.25, 0.5, 0.5, 1.0, 1.0, 1.25, 1.0, 1.0], [1.0, 1.0, 0.25, 0.25, 1.0, 1.25, 0.5, 0.5, 1.0, 0.5, 0.5, 1.0, 1.0, 0.5, 1.0, 1.0, 1.25, 1.0, 1.0, 0.5], [0.25, 1.0, 1.0, 0.5, 1.0, 1.25, 0.5, 0.5, 1.0, 1.0, 0.25, 1.0, 1.0, 0.5, 1.0, 1.25, 0.5, 0.5, 1.0, 1.0]]
[-0.55, -0.30000000000000004, 0.19999999999999996, 0.19999999999999996, -0.30000000000000004, 0.19999999999999996, 0.19999999999999996, -0.30000000000000004, -0.30000000000000004, 0.19999999999999996, 0.44999999999999996, 0.19999999999999996, 0.19999999999999996, 0.19999999999999996, -0.30000000000000004, 0.44999999999999996, 0.19999999999999996, 0.19999999999999996, -0.30000000000000004, -0.55, -0.55, -0.55, 0.19999999999999996, 0.19999999999999996, -0.30000000000000004, 0.199999999999

In [1]:
import pandas as pd
import numpy as np


worry_data=pd.read_csv('questionnaires_r2.csv')
print('# Males={}'.format(np.sum(1*(worry_data['Response']=='Male'))))

#set up variables
PSWQ_Scores=[]
reverse_scored=[1,3,8,10,11]
rs_dict={1:5,2:4,3:3,4:2,5:1}
current_score=0
reverse_score_counter=0
counter=0

#get row locations where PSWQ items are
ilocs=[]

#check if they faithfully filled out the questionnaire
rs_items=[]
non_rs_items=[]
sub=1
for row,data in worry_data.iterrows():
    if str(worry_data['Question Key'][row])=='BEGIN QUESTIONNAIRE' or row==530:
        PSWQ_Scores.append(current_score)
        current_score=0
        reverse_score_counter=0
        counter=0
#         print('\n Reverse_Score_Check for Subject {}'.format(sub))
#         print('RS avg: {}'.format(np.mean(rs_items)))
#         print('NON RS avg: {}'.format(np.mean(non_rs_items)))
#         print('\n')
        sub+=1
        rs_items=[]
        non_rs_items=[]

    
    elif str(worry_data['Question Key'][row]).startswith('ps') and not str(worry_data['Question Key'][row]).endswith('quantised'):
        if len(worry_data['Question Key'][row])>5:
            if int(worry_data['Question Key'][row][-2:]) in reverse_scored:
                rs_items.append(int(worry_data['Response'][row]))
                current_score+=rs_dict[int(worry_data['Response'][row])]
                counter+=1
                reverse_score_counter+=1
                ilocs.append(row)
            else:
                non_rs_items.append(int(worry_data['Response'][row]))
                current_score+=int(worry_data['Response'][row])
                counter+=1
                ilocs.append(row)
        else:
            if int(worry_data['Question Key'][row][-1]) in reverse_scored:
                rs_items.append(int(worry_data['Response'][row]))
                current_score+=rs_dict[int(worry_data['Response'][row])]
                counter+=1
                reverse_score_counter+=1
                ilocs.append(row)
            else:
                current_score+=int(worry_data['Response'][row])
                non_rs_items.append(int(worry_data['Response'][row]))
                counter+=1
                ilocs.append(row)
            
PSWQ_Scores.remove(0)
#np.save('PSWQ_Scores',PSWQ_Scores)
print('Scores on PSWQ =\n {}'.format(PSWQ_Scores))

print('# of subs = {}'.format(len(PSWQ_Scores)))

# Males=8
Scores on PSWQ =
 [67, 57, 57, 61, 60, 65, 67, 44, 37, 52, 65, 58, 27, 59]
# of subs = 14
