In [89]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

In [90]:
def get_MR_results(path_root, subject_file):
    """
    takes the path to the folder where the individual results files are and an individual subject's file
    returns:
        results (dict): mental rotation task results
            MR_reaction_times: list of reaction times (how long it took for participant to answer) for each trial
            MR_correct: list of subject responses: 1 if the subject answered correctly, 0 if not
            MR_mean_reaction_time: average reaction time for all trials
            MR_percent_correct: percent of tials correctly answered
    """
    
    try:
        with open(path_root + subject_file) as fp:
            lines = fp.readlines()
            
            # skip practice trials, grab all test trials
            test = lines[5:]
            
            # empty lists for storing results
            rxn_times = []
            correct = []
            # loop through each tial, save reaction time and response accuracy
            for i in range(len(test)):
                r = test[i].split()
                rxn_times.append(int(r[3]))
                if r[4] == '1':
                    correct.append(1)
                else:
                    correct.append(0)
            # save results as dictionary, calculate mean reaction times and accuracy        
            results = {'MR_reaction_times': rxn_times, 'MR_correct': correct, 
                   "MR_mean_reaction_time": np.mean(rxn_times), "MR_percent_correct": np.mean(correct)}
        return(results)
    except:
        pass

def get_gng_results(path_root, subject_file):    
    """
    takes the path to the folder where the individual results files are and an individual subject's file
    returns:
        results (dict): go/no-go task results
            gng_mean_hit_rxn_times: average rxn time for 'hit' trials (pressed go when shown go)
            gng_commission_errors: number of commision errors (pressed go when no-go was shown)
    """
    try:
        with open(path_root + subject_file) as fp:
            test = (fp.readlines())
            
            # empyt lists for storing results
            hit_rxn_times = []
            commission_errors = 0

            # loop through trials, save reaction times for hits, total commission errors
            for i in range(len(test)):
                r = test[i].split()

                if r[0] == 'go' and r[2] == '0':
                    hit_rxn_times.append(int(r[1]))
                if r[0] == 'nogo' and r[2] == '1':
                    commission_errors += 1

            # save results as dictionary, calculate mean reaction times and accuracy                
            results = {'gng_mean_hit_rxn_times': np.mean(hit_rxn_times), "gng_commission_errors": commission_errors}
        return(results)
    except:
        pass
    
def add_data_cols(df, path_root):
    """
    inputs: data frame and path to individual data files
    returns: data frame with columns added for task results
    
    """
    
    # add columns for mental rotation results
    df['mental_rotation_results'] = df['mental_rotation:1'].apply(lambda x: get_MR_results(path_root, x))

    df = pd.concat([df.drop(['mental_rotation_results'], axis=1), 
                     df['mental_rotation_results'].apply(pd.Series)], axis=1)

    # add columns for go/no-go task results                                                            
    df['gng_results'] = df['stop_signal:1'].apply(lambda x: get_gng_results(path_root, x))
    df = pd.concat([df.drop(['gng_results'], axis=1), 
                     df['gng_results'].apply(pd.Series)], axis=1)
    return df


def add_timepoint(col, time_point):
    """
    add an extension to end of column names indicating which time point it came from
    
    """  
    col_list = [ 'TIME_start', 'TIME_end',
       'TIME_total', 'MR_reaction_times', 'MR_correct',
       'MR_mean_reaction_time', 'MR_percent_correct', 'gng_mean_hit_rxn_times',
       'gng_commission_errors',  'TOA_mon', 'TOA_tue',
       'TOA_wed', 'TOA_thurs', 'TOA_fri']
    if col in col_list: col = col + '_' + str(time_point)
    return col

def clean_cols(df, time_point):
    """
    clean name of columns to improve clarity
    add time point specific extensions 
    """
    df = df.rename(columns=({'email:1':'email', 'score:1':'pss_score:1'}))
    if time_point > 0:
        df = df.rename(columns=({
                             'daily_time_of_activity:1': 'TOA_mon',
                             'daily_time_of_activity:2': 'TOA_tue', 
                             'daily_time_of_activity:3': 'TOA_wed',
                             'daily_time_of_activity:4': 'TOA_thurs', 
                             'daily_time_of_activity:5': 'TOA_fri'}))
    
    df.rename(columns=lambda x: x.replace(":1", "_"+ str(time_point)),inplace=True)
    
    df.rename(columns=lambda x: add_timepoint(x, time_point) ,inplace=True)

    return df

In [91]:
# test = get_gng_results(path_root, df['stop_signal:1'][0])
# print(test)

In [92]:
# test = get_gng_results(path_root, df['stop_signal_0'][0])
# print(test)


# with open(path_root + df['stop_signal:1'][0]) as fp:
#     test = (fp.readlines())

#     # empyt lists for storing results
#     hit_rxn_times = []
#     commission_errors = 0

#     # loop through trials, save reaction times for hits, total commission errors
#     for i in range(len(test)):
#         r = test[i].split()

#         if r[0] == 'go' and r[2] == '0':
#             hit_rxn_times.append(int(r[1]))
#         if r[0] == 'nogo' and r[2] == '1':
#             commission_errors += 1

#     # save results as dictionary, calculate mean reaction times and accuracy                
#     results = {'gng_mean_hit_rxn_times': np.mean(hit_rxn_times), "gng_commission_errors": commission_errors}
# results

In [93]:
# read in baseline data
df = pd.read_excel('data/data_intake/data_clean.xlsx')

In [94]:
# add task data
path_root = 'data/data_intake/'

df = add_data_cols(df, path_root)
df.head()

Unnamed: 0,participant,email:1,age:1,gender:1,meditated_before:1,pss:1,pss:2,pss:3,pss:4,pss:5,...,stop_signal:1,TIME_start,TIME_end,TIME_total,MR_reaction_times,MR_correct,MR_mean_reaction_time,MR_percent_correct,gng_mean_hit_rxn_times,gng_commission_errors
0,s.f6520e24-e77a-4bb8-afbe-a4ca5b2a2b0a.txt,kwhaley121@gmail.com,43,2,2,3.0,3.0,3.0,2.0,2.0,...,go_nogo.2019-11-11-0427.data.f6520e24-e77a-4bb...,2019-11-11-04-15,2019-11-11-04-27,12.0,"[20000, 10958, 10001, 5375, 12054, 7442, 20000...","[0, 1, 1, 1, 1, 1, 0, 1, 1, 1]",11309.7,0.8,692.473684,0.0
1,s.10b38f34-ce3c-49d8-9e5a-ffa45b9c13fb.txt,sufikaur@gmail.com,18,2,1,2.0,3.0,2.0,1.0,2.0,...,go_nogo.2019-11-11-0434.data.10b38f34-ce3c-49d...,2019-11-11-04-28,2019-11-11-04-34,6.0,"[2466, 2294, 1626, 3255, 2924, 2896, 4088, 380...","[1, 0, 1, 1, 1, 1, 1, 1, 1, 1]",3217.7,0.9,444.0,0.0
2,s.e7c113a6-876e-41d2-a67b-2c839610e188.txt,cpridester@gmail.com,33,2,1,2.0,0.0,2.0,1.0,2.0,...,go_nogo.2019-11-11-0442.data.e7c113a6-876e-41d...,2019-11-11-04-23,2019-11-11-04-42,19.0,"[2469, 2639, 9266, 4265, 4674, 4817, 5113, 380...","[1, 0, 1, 1, 1, 1, 1, 1, 1, 1]",5612.3,0.9,642.1,0.0
3,s.a0b704f7-3ea4-4574-955a-03be7adb77aa.txt,vkathuria@scu.edu,29,1,1,3.0,3.0,2.0,1.0,2.0,...,go_nogo.2019-11-11-0451.data.a0b704f7-3ea4-457...,2019-11-11-04-41,2019-11-11-04-51,10.0,"[18260, 9923, 9415, 15759, 20000, 11638, 10320...","[1, 0, 1, 1, 0, 1, 0, 0, 1, 1]",14779.1,0.6,690.6,0.0
4,s.ecaa243f-3d36-465a-b2e2-91a3157251fb.txt,karisa.tang@gmail.com,31,2,1,2.0,3.0,4.0,2.0,2.0,...,go_nogo.2019-11-11-0454.data.ecaa243f-3d36-465...,2019-11-11-04-48,2019-11-11-04-54,6.0,"[6399, 15750, 4592, 7523, 4233, 7788, 11067, 5...","[0, 0, 1, 1, 1, 1, 1, 1, 1, 1]",7623.8,0.8,390.75,0.0


In [95]:
# convert meditated before and gender to indicator vars
df['meditated_before'] = df['meditated_before:1'].eq(1).mul(1)
df['female'] = df['gender:1'].eq(2).mul(1)
df = df.rename(columns={'age:1':'age'})

In [97]:
# clean column names and add time point 0 extension
df = clean_cols(df, 0)

# select columns needed for analysis
df = df[['email', 'age', 'female', 'meditated_before', 'depression_0', 'anxiety_0', 'stress_0', 
         'pss_score_0','MR_reaction_times_0', 'MR_correct_0',
         'MR_mean_reaction_time_0', 'MR_percent_correct_0', 'gng_commission_errors_0',
         'gng_mean_hit_rxn_times_0',
         'TIME_start_0', 'TIME_end_0', 'TIME_total_0',]]

In [98]:
# get treatment assingments
df_treatment_assignments = pd.read_excel('data/treatment_assignment.xlsx')
df_treatment_assignments = df_treatment_assignments.rename(columns={'Email Address':'email'}) 
df_treatment_assignments.head()

Unnamed: 0,email,Group
0,vacherakash@gmail.com,0
1,amanmj95@gmail.com,0
2,reshmasingh@lbl.gov,0
3,b1gupta@ucsd.edu,0
4,raunaq1510sawhney@gmail.com,0


In [99]:
# merge treatment assingments with baseline data
df_0 = pd.merge(df_treatment_assignments, df, how='outer')

In [100]:
# load end of week one data
df_1 = pd.read_excel('data/data_w1/data_clean.xlsx')

In [101]:
# add end of week one task data, clean column names, add time point 1 extension
path_root = 'data/data_w1/'

df_1 = add_data_cols(df_1, path_root)
df_1 = clean_cols(df_1, 1)

In [105]:
# select columns needed for analysis
df_1 = df_1[['email', 'activity_count_1', 'start_day_1',
             'TOA_mon_1', 'TOA_tue_1', 'TOA_wed_1', 'TOA_thurs_1', 'TOA_fri_1',  
             'pss_score_1', 'MR_reaction_times_1', 'MR_correct_1', 
             'MR_mean_reaction_time_1', 'MR_percent_correct_1',
             'gng_mean_hit_rxn_times_1', 'gng_commission_errors_1',  
             'TIME_start_1', 'TIME_end_1', 'TIME_total_1']]

In [106]:
# merge week one data with baseline
df3 = pd.merge(df_0, df_1, how='outer')

In [107]:
# load week 2 data
df_2 = pd.read_excel('data/data_w2/data_clean.xlsx')
df_2.head()

Unnamed: 0,participant,email:1,activity_count:1,daily_time_of_activity:1,daily_time_of_activity:2,daily_time_of_activity:3,daily_time_of_activity:4,daily_time_of_activity:5,pss:1,pss:2,...,pss:11,pss:12,pss:13,pss:14,score:1,mental_rotation:1,stop_signal:1,TIME_start,TIME_end,TIME_total
0,s.687663be-ef9e-4c67-97c7-bc9efc2a8cae.txt,magon.liu@gmail.com,5.0,2.0,2.0,2.0,4.0,2.0,3.0,3.0,...,3.0,2.0,2.0,3.0,35.0,mental_rotation.2019-11-24-0041.data.687663be-...,go_nogo.2019-11-24-0041.data.687663be-ef9e-4c6...,2019-11-24-00-36,2019-11-24-00-41,5.0
1,s.54019f20-c1e3-4082-8546-42625dcabe84.txt,jamkylam@gmail.com,5.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,...,3.0,3.0,1.0,1.0,32.0,mental_rotation.2019-11-25-0809.data.54019f20-...,go_nogo.2019-11-25-0810.data.54019f20-c1e3-408...,2019-11-25-08-06,2019-11-25-08-10,4.0
2,s.45403d36-e1e7-4272-be94-9554a8cae930.txt,swamiritika2009@gmail.com,3.0,5.0,1.0,1.0,5.0,1.0,2.0,2.0,...,1.0,2.0,1.0,1.0,27.0,mental_rotation.2019-11-23-1146.data.45403d36-...,go_nogo.2019-11-23-1147.data.45403d36-e1e7-427...,2019-11-23-11-41,2019-11-23-11-47,6.0
3,s.892af005-d5ad-4777-936a-f0e896168dab.txt,shayla.delain.harris@gmail.com,1.0,3.0,5.0,5.0,5.0,5.0,1.0,1.0,...,1.0,2.0,2.0,0.0,17.0,mental_rotation.2019-11-25-0433.data.892af005-...,go_nogo.2019-11-25-0434.data.892af005-d5ad-477...,2019-11-25-04-30,2019-11-25-04-34,4.0
4,s.c666cf60-c96a-43ff-9782-d99dd43db958.txt,luvmaps1938@gmail.com,5.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,...,1.0,3.0,1.0,1.0,17.0,mental_rotation.2019-11-23-1747.data.c666cf60-...,go_nogo.2019-11-23-1748.data.c666cf60-c96a-43f...,2019-11-23-17-42,2019-11-23-17-48,6.0


In [108]:
# add week two task data, clean column names, add time point 2 extension
path_root = 'data/data_w2/'

df_2 = add_data_cols(df_2, path_root)
df_2 = clean_cols(df_2, 2)

In [111]:
# select cols needed for analysis
df_2 = df_2[['email', 'activity_count_2', 'TOA_mon_2', 'TOA_tue_2',
             'TOA_wed_2', 'TOA_thurs_2', 'TOA_fri_2', 'pss_score_2',
             'MR_reaction_times_2', 'MR_correct_2',
             'MR_mean_reaction_time_2', 'MR_percent_correct_2', 'gng_mean_hit_rxn_times_2',
             'gng_commission_errors_2', 'TIME_start_2', 'TIME_end_2','TIME_total_2']]
            

In [112]:
# merge week 2 with baseline and week 1 data
df_final = pd.merge(df3, df_2, how='outer')


In [113]:
# add indicator vars for completung each survey
df_final['completed_intake'] = np.where(df_final['TIME_end_0'].isnull(), 0, 1)
df_final['completed_w1'] = np.where(df_final['TIME_end_1'].isnull(), 0, 1)
df_final['completed_w2'] = np.where(df_final['TIME_end_2'].isnull(), 0, 1)
df_final['total_surveys_completed'] = df_final.completed_intake + df_final.completed_w1 + df_final.completed_w2

In [114]:
df_final

Unnamed: 0,email,Group,age,female,meditated_before,depression_0,anxiety_0,stress_0,pss_score_0,MR_reaction_times_0,...,MR_percent_correct_2,gng_mean_hit_rxn_times_2,gng_commission_errors_2,TIME_start_2,TIME_end_2,TIME_total_2,completed_intake,completed_w1,completed_w2,total_surveys_completed
0,vacherakash@gmail.com,0.0,26.0,0.0,1.0,6.0,10.0,0.0,13.0,"[5027, 5296, 7077, 6968, 3881, 6829, 4174, 411...",...,,,,,,,1,0,0,1
1,amanmj95@gmail.com,0.0,24.0,0.0,1.0,18.0,10.0,14.0,23.0,"[4192, 2729, 2629, 5218, 1811, 2281, 4937, 302...",...,0.7,364.150000,0.0,2019-11-23-03-55,2019-11-23-03-58,3.0,1,0,1,2
2,amanmj95@gmail.com,0.0,24.0,0.0,1.0,18.0,10.0,14.0,23.0,"[4192, 2729, 2629, 5218, 1811, 2281, 4937, 302...",...,0.5,372.500000,0.0,2019-11-25-04-15,2019-11-25-04-17,2.0,1,0,1,2
3,reshmasingh@lbl.gov,0.0,46.0,1.0,1.0,4.0,0.0,4.0,26.0,"[20000, 20000, 9991, 2850, 5622, 9623, 8031, 1...",...,,,,,,,1,1,0,2
4,b1gupta@ucsd.edu,0.0,25.0,1.0,1.0,6.0,6.0,6.0,29.0,"[3997, 4964, 2413, 2411, 2549, 4669, 4405, 329...",...,0.9,476.200000,0.0,2019-11-25-03-01,2019-11-25-03-10,9.0,1,1,1,3
5,raunaq1510sawhney@gmail.com,0.0,28.0,0.0,1.0,4.0,2.0,10.0,28.0,"[4495, 13445, 5900, 8471, 7084, 4195, 9569, 76...",...,0.9,521.800000,0.0,2019-11-23-20-34,2019-11-23-20-39,5.0,1,1,1,3
6,motasem@berkeley.edu,0.0,41.0,0.0,0.0,2.0,0.0,6.0,20.0,"[15044, 5406, 3433, 3408, 10782, 6790, 7433, 4...",...,,,,,,,1,0,0,1
7,lchutny@gmail.com,0.0,52.0,1.0,1.0,0.0,2.0,14.0,15.0,"[9281, 10199, 9022, 9103, 10011, 5452, 6970, 4...",...,0.6,426.200000,1.0,2019-11-23-06-10,2019-11-23-06-14,4.0,1,1,1,3
8,han.chen795@gmail.com,0.0,24.0,1.0,0.0,12.0,2.0,12.0,18.0,"[1381, 1970, 2130, 2489, 3999, 1959, 16131, 56...",...,0.8,391.850000,0.0,2019-11-23-01-34,2019-11-23-01-37,3.0,1,1,1,3
9,ellenpsmith21@gmail.com,0.0,27.0,1.0,0.0,0.0,0.0,0.0,14.0,"[3654, 3396, 3557, 2710, 7725, 5325, 4525, 718...",...,0.9,438.473684,0.0,2019-11-23-01-43,2019-11-23-01-53,10.0,1,1,1,3


In [115]:
# write to csv
df_final.to_csv('data/data_final.csv')

In [116]:
df_final.describe()

Unnamed: 0,Group,age,female,meditated_before,depression_0,anxiety_0,stress_0,pss_score_0,MR_mean_reaction_time_0,MR_percent_correct_0,...,pss_score_2,MR_mean_reaction_time_2,MR_percent_correct_2,gng_mean_hit_rxn_times_2,gng_commission_errors_2,TIME_total_2,completed_intake,completed_w1,completed_w2,total_surveys_completed
count,125.0,82.0,82.0,82.0,81.0,81.0,81.0,81.0,80.0,80.0,...,62.0,62.0,62.0,62.0,62.0,62.0,127.0,127.0,127.0,127.0
mean,0.496,30.756098,0.695122,0.719512,6.641975,4.716049,11.283951,23.938272,6110.89625,0.77875,...,21.387097,4983.13871,0.840323,422.176443,0.322581,37.225806,0.622047,0.464567,0.488189,1.574803
std,0.501996,10.684662,0.463189,0.452002,5.242396,4.198912,5.747248,7.431934,2672.455032,0.122932,...,8.209061,1977.567749,0.127343,82.966556,0.62132,255.041457,0.486796,0.500718,0.50184,1.336423
min,0.0,18.0,0.0,0.0,0.0,0.0,0.0,10.0,2041.7,0.5,...,5.0,1921.1,0.5,306.7,0.0,2.0,0.0,0.0,0.0,0.0
25%,0.0,25.0,0.0,0.0,2.0,2.0,8.0,18.0,3827.65,0.7,...,15.0,3478.3,0.8,364.9125,0.0,4.0,0.0,0.0,0.0,0.0
50%,0.0,28.0,1.0,1.0,6.0,4.0,10.0,23.0,5626.35,0.8,...,20.5,4741.1,0.9,399.675,0.0,5.0,1.0,0.0,0.0,2.0
75%,1.0,33.0,1.0,1.0,10.0,6.0,16.0,30.0,7443.475,0.9,...,28.0,6069.85,0.9,468.2375,0.75,5.0,1.0,1.0,1.0,3.0
max,1.0,81.0,1.0,1.0,24.0,20.0,26.0,39.0,14779.1,1.0,...,38.0,10705.7,1.0,706.85,3.0,2013.0,1.0,1.0,1.0,3.0


In [117]:
df_final.columns

Index(['email', 'Group', 'age', 'female', 'meditated_before', 'depression_0',
       'anxiety_0', 'stress_0', 'pss_score_0', 'MR_reaction_times_0',
       'MR_correct_0', 'MR_mean_reaction_time_0', 'MR_percent_correct_0',
       'gng_commission_errors_0', 'gng_mean_hit_rxn_times_0', 'TIME_start_0',
       'TIME_end_0', 'TIME_total_0', 'activity_count_1', 'start_day_1',
       'TOA_mon_1', 'TOA_tue_1', 'TOA_wed_1', 'TOA_thurs_1', 'TOA_fri_1',
       'pss_score_1', 'MR_reaction_times_1', 'MR_correct_1',
       'MR_mean_reaction_time_1', 'MR_percent_correct_1',
       'gng_mean_hit_rxn_times_1', 'gng_commission_errors_1', 'TIME_start_1',
       'TIME_end_1', 'TIME_total_1', 'activity_count_2', 'TOA_mon_2',
       'TOA_tue_2', 'TOA_wed_2', 'TOA_thurs_2', 'TOA_fri_2', 'pss_score_2',
       'MR_reaction_times_2', 'MR_correct_2', 'MR_mean_reaction_time_2',
       'MR_percent_correct_2', 'gng_mean_hit_rxn_times_2',
       'gng_commission_errors_2', 'TIME_start_2', 'TIME_end_2', 'TIME_total_2',