In [1]:
import pandas as pd
import os
import pickle

In [2]:
id_group_treatments_keys = {
    'Participant_IDs': 
    ['T003', 'T005', 'T009', 'T011', 'T016', 'T019', 'T021', 'T031',
       'T032', 'T035', 'T037', 'T046', 'T047', 'T051', 'T061', 'T063',
       'T064', 'T065', 'T066', 'T068', 'T077', 'T078', 'T079', 'T082',
       'T083', 'T084', 'T091', 'T092', 'T093', 'T094', 'T096', 'T098',
       'T099', 'T106', 'T112', 'T113', 'T114', 'T121', 'T122', 'T124',
       'T126', 'T128', 'T130', 'T132', 'T138', 'T139', 'T141', 'T144',
       'T145', 'T151', 'T152', 'T154', 'T156', 'T157', 'T162', 'T166',
       'T172', 'T173', 'T174', 'T175', 'T176', 'T178'],
    'Group': 
    {
        'CH': 'Email Mode Continual x Anticipatory Stressor High',
        'BH': 'Email Mode Batch x Anticipatory Stressor High',
        'CL': 'Email Mode Continual x Anticipatory Stressor Low',
        'BL': 'Email Mode Batch x Anticipatory Stressor Low',
    },
    'Treatments': {
        'RB': 'Resting baseline',
        'ST': 'Single task',
        'DT': 'Dual task',
        'PM': 'Priming, Stroop',
        'RV': 'Relaxing video',
        'PR': 'Presentation',
    }
}

In [3]:
qas_keys = {
    'Age': 'Age of participants in years.',
    'Gender': 'Gender of participants [1 ≡ Male, 2 ≡ female].',
    'Nationality': 'Nationality of participants [1 ≡ United States, 2 ≡ Others].',
    'Other_Nationality': 'Nationality of non-U.S. participants.',
    'Native_Language': 'Mother tongue of participants [1 ≡ English, 2 ≡ Others].',
    'Other_Native_Language': 'Mother tongue of bilingual participants.',
    'Education': 'Educational level of participants [1 ≡ High School, 2 ≡ Undergraduate, 3 ≡ Master or equivalent, 4 ≡ PhD, JD, or equivalent].',
    'Writing_Proficiency': 'Self-reported writing proficiency of participants in a seven-point Likert scale, where 1 ≡ Not fluent at all and 7 ≡ Very fluent.',
    'Daily_Email_Frequency': 'Self-reported daily use of email in a seven-point Likert scale, where 1 ≡ Never and 7 ≡ Very often.',
    'Big Five Inventory (BFI)': {
        'Definition': 'A trait psychometric related to the participant’s key personality factors. It has five sub-scales.',
        'BFI_Agreeableness': 'The level of participant’s friendliness with score range [9–45].',
        'BFI_Conscientiousness': 'The level of participant’s organized nature with score range [9–45]',
        'BFI_Extraversion': 'The level of participant’s outgoing nature with score range [8–40].',
        'BFI_Neuroticism': 'The level of participant’s nervousness with score range [8–40].',
        'BFI_Openness': 'The level of participant’s curiosity with score range [10–50].'
    },
    'Emotion Regulation Questionnaire (ERQ)': {
        'Definition': 'A trait psychometric related to the participant’s ability to regulate emotions. It has two sub-scales.',
        'ERQ_Cognitive_Reappraisal': 'The degree to which a participant can change the way s/he thinks about emotion-eliciting events with score range [6–42].',
        'ERQ_Expressive_Suppression': 'The degree to which a participant can change the way s/he responds to emotion-eliciting events with score range [4–28].',
    },
    'Perceived Stress Scale (PSS)': 'Level of non-specific perceived stress of participants with score range [0–40]. This is a trait psychometric that predicts health-related outcomes associated with appraised stress.',
    'NASA TLX': {
        'Definition': 'A state psychometric administered upon completion of DT to gauge the perceived loading this task induced to participants.',
        'Scales': 'It has six sub-scales with common rating [1 = Strongly disagree, 2 = Disagree, 3 = Somewhat disagree, 4 = Neither agree or disagree, 5 = Somewhat agree, 6 = Agree, 7 = Strongly agree].',
        'NASA_Mental_Demand': 'Perceived mental load induced by DT.',
        'NASA_Physical_Demand': 'Perceived physical activity induced by DT.',
        'NASA_Temporal_Demand': 'Perceived time pressure induced by DT.',
        'NASA_Performance': 'Perceived success in executing DT.',
        'NASA_Effort': 'Perceived amount of work expended to achieve the said level of DT performance.',
        'NASA_Frustration': 'Perceived level of irritation in performing DT.'
    }}

physio_keys = {
    'Treatment': 'The treatment during which each set of modal signal values was recorded.',
    'Time': 'The recorded date and time for each set of modal signal values.',
    'Treatment_Time': 'The time elapsed in seconds since the start of the present treatment.',
    'Task': 'Labeling of email vs. report writing activity during DT.',
    'PP_QC': 'Values of the perinasal perspiration signal in °C2.',
    'EDA_QC': 'Values of the EDA signal in μS, measured with E4 in the wrist of the participant’s non-dominant hand.',
    'BR_QC': 'Values of the breathing rate signal in BPM, measured with the BioHarness in the participant’s chest.',
    'Chest_HR_QC': 'Values of the heart rate signal in BPM, measured with the BioHarness in the participant’s chest.',
    'Wrist_HR_QC': 'Values of the heart rate signal in BPM, measured with E4 in the wrist of the participant’s non-dominant hand.',
}

keyboard_keys = {
    'Is_Key_Up': '0 stands for key depressed, while 1 stands for key released.',
    'Key': 'Alphanumeric code of the key that is either released or depressed.'
}

report_keys = {
    'Word_Count': 'The number of words in the report.',
    'Character_Count': 'The number of characters in the report.',
    'Criterion_Score': 'The overall report quality score given by the e-rater.',
    'Mechanics_Errors': 'Number of mechanics errors in the report, such as spelling errors; it is provided by the e-rater.',
    'Grammar_Errors': 'Number of grammar errors in the report, such as subject-verb agreement errors; it is provided by the e-rater.',
    'Usage_Errors': 'Number of usage errors in the report, such as article errors; it is provided by the e-rater.',
    'Style_Errors': 'Number of style errors in the report, such as repetition of words and very short or very long sentences; it is provided by the e-rater.',
    'Delete_Key_Count': 'The number of times the backwards and forward delete keys were depressed during the writing of the report. This information is extracted from the Keyboard Data file.',
    'Mechanics_Errors/WC': 'The number of mechanics errors divided by the number of words in the report.',
    'Grammar_Errors/WC': 'The number of grammar errors divided by the number of words in the report.',
    'Usage_Errors/WC': 'The number of usage errors divided by the number of words in the report.',
    'Style_Errors/WC': 'The number of style errors divided by the number of words in the report.',
    'Delete_Key/CC': 'The number of times the backwards and forward delete keys were depressed during the writing of the report, normalized per the report length in characters.'
}

In [4]:
study_keys = {}

for dict_ in [id_group_treatments_keys, qas_keys, physio_keys]:
    study_keys.update(dict_)



pickle.dump(study_keys, open('../data/study_keys.pkl', 'wb'))

In [5]:
os.listdir('../data/Quantitative_Data/')

['HRV.csv',
 'Keyboard Data.csv',
 'Physiological Data.csv',
 'Questionnaire Data.csv',
 'Report Data.csv']

# Keyboard Data

In [6]:
keyboard = pd.read_csv('../data/Quantitative_Data/Keyboard Data.csv')
#keyboard['Task'] = keyboard['Task'].fillna('Not Applicable')

keyboard.head()

Unnamed: 0,Participant_ID,Group,Treatment,Time,Task,Is_Key_Up,Key
0,T003,CH,ST,41:53.8,,0,LSHIFT
1,T003,CH,ST,41:53.9,,0,KEY_I
2,T003,CH,ST,41:54.0,,1,KEY_I
3,T003,CH,ST,41:54.0,,1,LSHIFT
4,T003,CH,ST,41:54.1,,0,SPACE


In [7]:
keyboard.groupby(by=['Participant_ID', 'Key']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Group,Treatment,Time,Task,Is_Key_Up
Participant_ID,Key,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
T003,BACK,962,962,962,838,962
T003,CAPITAL,4,4,4,4,4
T003,DOWN,12,12,12,8,12
T003,KEY_0,14,14,14,14,14
T003,KEY_1,14,14,14,14,14
...,...,...,...,...,...,...
T178,OEM_PERIOD,170,170,170,158,170
T178,RETURN,96,96,96,96,96
T178,RIGHT,106,106,106,106,106
T178,SPACE,2724,2724,2724,2446,2724


In [8]:
keyboard.update(pd.DataFrame(keyboard[keyboard['Treatment'] == 'ST']['Task'].fillna('Not Applicable')))

In [9]:
keyboard[keyboard['Treatment'] == 'ST']

Unnamed: 0,Participant_ID,Group,Treatment,Time,Task,Is_Key_Up,Key
0,T003,CH,ST,41:53.8,Not Applicable,0,LSHIFT
1,T003,CH,ST,41:53.9,Not Applicable,0,KEY_I
2,T003,CH,ST,41:54.0,Not Applicable,1,KEY_I
3,T003,CH,ST,41:54.0,Not Applicable,1,LSHIFT
4,T003,CH,ST,41:54.1,Not Applicable,0,SPACE
...,...,...,...,...,...,...,...
918798,T178,BL,ST,14:16.5,Not Applicable,1,OEM_6
918799,T178,BL,ST,14:16.6,Not Applicable,0,OEM_PERIOD
918800,T178,BL,ST,14:16.7,Not Applicable,1,OEM_PERIOD
918801,T178,BL,ST,14:16.9,Not Applicable,0,SPACE


In [10]:
keyboard.tail()

Unnamed: 0,Participant_ID,Group,Treatment,Time,Task,Is_Key_Up,Key
936385,T178,BL,DT,11:24.8,Report,0,KEY_R
936386,T178,BL,DT,11:24.9,Report,1,KEY_A
936387,T178,BL,DT,11:24.9,Report,1,KEY_R
936388,T178,BL,DT,11:24.9,Report,0,OEM_PERIOD
936389,T178,BL,DT,11:24.9,Report,1,OEM_PERIOD


In [11]:
keyboard.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 936390 entries, 0 to 936389
Data columns (total 7 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   Participant_ID  936390 non-null  object
 1   Group           936390 non-null  object
 2   Treatment       936390 non-null  object
 3   Time            936390 non-null  object
 4   Task            921506 non-null  object
 5   Is_Key_Up       936390 non-null  int64 
 6   Key             936390 non-null  object
dtypes: int64(1), object(6)
memory usage: 50.0+ MB


# Report Data

In [12]:
report = pd.read_csv('../data/Quantitative_Data/Report Data.csv')
report.head()

Unnamed: 0,Participant_ID,Group,Treatment,Word_Count,Character_Count,Criterion_Score,Mechanics_Errors,Grammar_Errors,Usage_Errors,Style_Errors,Delete_Key_Count,Mechanic_Errors/WC,Grammar_Errors/WC,Usage_Errors/WC,Style_Errors/WC,Delete_Keys/CC
0,T003,CH,DT,367,2029,4,5,0,2,10,419.0,0.013624,0.0,0.00545,0.027248,0.206506
1,T003,CH,ST,126,694,1,2,1,0,12,62.0,0.015873,0.007937,0.0,0.095238,0.089337
2,T005,CH,DT,175,1143,3,4,1,3,5,1235.0,0.022857,0.005714,0.017143,0.028571,1.08049
3,T005,CH,ST,146,871,2,8,0,1,8,191.0,0.054795,0.0,0.006849,0.054795,0.219288
4,T009,CH,DT,226,1485,4,3,0,1,8,893.0,0.013274,0.0,0.004425,0.035398,0.601347


In [13]:
report['Delete_Key_Count'].unique()

array([ 419.,   62., 1235.,  191.,  893.,   79.,  654.,   95.,  708.,
         54., 1051.,  112.,  370.,   72.,  962.,  122.,  891.,  152.,
        434.,   59.,  668.,  126., 1184.,  105.,  500.,   61.,  658.,
         47., 1138.,  321., 1102.,  161.,  670.,  127.,  552., 1165.,
        103.,  177.,  970.,  214.,  517.,   87., 1247.,  187., 1096.,
        166.,  275.,   nan,  829.,  114.,  686.,   65.,  414.,   60.,
        454.,   71.,  335.,   39.,  457., 1017.,  207.,  466.,   82.,
       1210.,  165.,  703.,   80., 1462.,  170.,  717.,  211., 1335.,
        243.,  934.,   91., 1062.,  192., 1046., 1500., 1684.,  213.,
        969.,  137.,  996.,  131., 1382.,  263.,  395.,   25.,  638.,
       1000.,   93., 1162.,  150.,  785.,  120.,  647.,  101.,  651.,
        231.,  515.,   74.])

In [14]:
report.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 124 entries, 0 to 123
Data columns (total 16 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Participant_ID      124 non-null    object 
 1   Group               124 non-null    object 
 2   Treatment           124 non-null    object 
 3   Word_Count          124 non-null    int64  
 4   Character_Count     124 non-null    int64  
 5   Criterion_Score     124 non-null    int64  
 6   Mechanics_Errors    124 non-null    int64  
 7   Grammar_Errors      124 non-null    int64  
 8   Usage_Errors        124 non-null    int64  
 9   Style_Errors        124 non-null    int64  
 10  Delete_Key_Count    110 non-null    float64
 11  Mechanic_Errors/WC  124 non-null    float64
 12  Grammar_Errors/WC   124 non-null    float64
 13  Usage_Errors/WC     124 non-null    float64
 14  Style_Errors/WC     124 non-null    float64
 15  Delete_Keys/CC      110 non-null    float64
dtypes: float

In [15]:
report[report['Participant_ID'] == 'T003']

Unnamed: 0,Participant_ID,Group,Treatment,Word_Count,Character_Count,Criterion_Score,Mechanics_Errors,Grammar_Errors,Usage_Errors,Style_Errors,Delete_Key_Count,Mechanic_Errors/WC,Grammar_Errors/WC,Usage_Errors/WC,Style_Errors/WC,Delete_Keys/CC
0,T003,CH,DT,367,2029,4,5,0,2,10,419.0,0.013624,0.0,0.00545,0.027248,0.206506
1,T003,CH,ST,126,694,1,2,1,0,12,62.0,0.015873,0.007937,0.0,0.095238,0.089337


In [16]:
for i in report:
    print(i)

Participant_ID
Group
Treatment
Word_Count
Character_Count
Criterion_Score
Mechanics_Errors
Grammar_Errors
Usage_Errors
Style_Errors
Delete_Key_Count
Mechanic_Errors/WC
Grammar_Errors/WC
Usage_Errors/WC
Style_Errors/WC
Delete_Keys/CC


# Questionnaire Data

In [17]:
qas = pd.read_csv('../data/Quantitative_Data/Questionnaire Data.csv').rename(columns={'Percieved_Stress_Scale': 'Perceived_Stress_Scale'})
qas.head()

Unnamed: 0,Participant_ID,Group,Age,Gender,Nationality,Other_Nationality,Native_Language,Other_Native_Language,Education,Writing_Proficiency,...,BFI_Openness,ERQ_Cognitive_Reappraisal,ERQ_Expressive_Suppression,Perceived_Stress_Scale,NASA_Mental_Demand,NASA_Physical_Demand,NASA_Temporal_Demand,NASA_Performance,NASA_Effort,NASA_Frustration
0,T003,CH,23.0,2,1.0,,1.0,,2.0,7.0,...,41.0,37.0,12.0,18.0,6.0,5.0,6.0,5.0,6.0,5.0
1,T005,CH,19.0,2,1.0,,2.0,Spanish,2.0,6.0,...,42.0,31.0,8.0,17.0,6.0,2.0,7.0,5.0,5.0,4.0
2,T009,CH,20.0,2,1.0,,1.0,,2.0,7.0,...,42.0,32.0,14.0,20.0,5.0,3.0,5.0,3.0,6.0,7.0
3,T011,CH,24.0,2,1.0,,2.0,Spanish,2.0,7.0,...,39.0,40.0,14.0,3.0,5.0,2.0,5.0,5.0,5.0,2.0
4,T016,BH,20.0,1,1.0,,1.0,,2.0,5.0,...,29.0,33.0,21.0,10.0,3.0,6.0,5.0,5.0,6.0,6.0


In [18]:
qas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63 entries, 0 to 62
Data columns (total 25 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Participant_ID              63 non-null     object 
 1   Group                       63 non-null     object 
 2   Age                         61 non-null     float64
 3   Gender                      63 non-null     int64  
 4   Nationality                 61 non-null     float64
 5   Other_Nationality           6 non-null      object 
 6   Native_Language             61 non-null     float64
 7   Other_Native_Language       6 non-null      object 
 8   Education                   61 non-null     float64
 9   Writing_Proficiency         61 non-null     float64
 10  Daily_Email_Frequency       61 non-null     float64
 11  BFI_Agreeableness           61 non-null     float64
 12  BFI_Conscientiousness       61 non-null     float64
 13  BFI_Extraversion            61 non-nu

In [19]:
qas['Other_Nationality'].unique()

array([nan, 'Chinese', 'Philippines', 'Dominican American', 'Indian',
       'Nigeria', 'INDIA'], dtype=object)

In [20]:
qas['Other_Native_Language'].unique()

array([nan, 'Spanish', 'farsi', 'Chinese (Cantonese in specific)',
       'English and Spanish'], dtype=object)

In [21]:
qas.drop(columns=['Other_Nationality', 'Other_Native_Language'], inplace=True)

In [22]:
for i in qas.columns:
    print(i)

Participant_ID
Group
Age
Gender
Nationality
Native_Language
Education
Writing_Proficiency
Daily_Email_Frequency
BFI_Agreeableness
BFI_Conscientiousness
BFI_Extraversion
BFI_Neuroticism
BFI_Openness
ERQ_Cognitive_Reappraisal
ERQ_Expressive_Suppression
Perceived_Stress_Scale
NASA_Mental_Demand
NASA_Physical_Demand
NASA_Temporal_Demand
NASA_Performance
NASA_Effort
NASA_Frustration


In [23]:
qas[qas.Age.isnull()]['Participant_ID'].values

array(['T082', 'T096'], dtype=object)

In [24]:
BFI_fts = [ft for ft in qas.columns if 'BFI_' in ft]
NASA_fts = [ft for ft in qas.columns if 'NASA_' in ft]
ERQ_fts = [ft for ft in qas.columns if 'ERQ_' in ft]


main_fts = [ft for ft in list(qas.columns) if ft not in (*BFI_fts, *NASA_fts, *ERQ_fts)]

qas[main_fts].describe()

Unnamed: 0,Age,Gender,Nationality,Native_Language,Education,Writing_Proficiency,Daily_Email_Frequency,Perceived_Stress_Scale
count,61.0,63.0,61.0,61.0,61.0,61.0,61.0,61.0
mean,23.754098,1.714286,1.098361,1.098361,1.967213,6.704918,6.131148,17.131148
std,8.761004,0.455383,0.300273,0.300273,0.576403,0.66694,1.175803,5.675901
min,18.0,1.0,1.0,1.0,1.0,4.0,2.0,3.0
25%,20.0,1.0,1.0,1.0,2.0,7.0,5.0,13.0
50%,21.0,2.0,1.0,1.0,2.0,7.0,7.0,17.0
75%,23.0,2.0,1.0,1.0,2.0,7.0,7.0,21.0
max,54.0,2.0,2.0,2.0,4.0,7.0,7.0,29.0


In [25]:
qas[BFI_fts].describe()

Unnamed: 0,BFI_Agreeableness,BFI_Conscientiousness,BFI_Extraversion,BFI_Neuroticism,BFI_Openness
count,61.0,61.0,61.0,61.0,61.0
mean,35.393443,31.278689,26.081967,22.114754,37.409836
std,5.010917,5.413351,6.535276,4.712036,5.936826
min,19.0,22.0,13.0,12.0,25.0
25%,33.0,27.0,21.0,19.0,34.0
50%,35.0,31.0,26.0,21.0,38.0
75%,39.0,35.0,30.0,26.0,42.0
max,45.0,44.0,39.0,31.0,49.0


In [26]:
qas[NASA_fts].describe()

Unnamed: 0,NASA_Mental_Demand,NASA_Physical_Demand,NASA_Temporal_Demand,NASA_Performance,NASA_Effort,NASA_Frustration
count,61.0,61.0,61.0,61.0,61.0,61.0
mean,4.901639,2.819672,4.786885,4.672131,4.983607,3.901639
std,1.434165,1.575946,1.713814,1.650468,1.190123,1.660371
min,1.0,1.0,1.0,1.0,2.0,1.0
25%,5.0,2.0,4.0,3.0,4.0,2.0
50%,5.0,2.0,5.0,5.0,5.0,4.0
75%,6.0,4.0,6.0,6.0,6.0,5.0
max,7.0,6.0,7.0,7.0,7.0,7.0


In [27]:
qas[ERQ_fts].describe()

Unnamed: 0,ERQ_Cognitive_Reappraisal,ERQ_Expressive_Suppression
count,61.0,61.0
mean,32.0,14.344262
std,5.46199,5.075711
min,20.0,4.0
25%,30.0,11.0
50%,32.0,15.0
75%,36.0,18.0
max,42.0,28.0


In [28]:
qas['Perceived_Stress_Scale'].describe()

count    61.000000
mean     17.131148
std       5.675901
min       3.000000
25%      13.000000
50%      17.000000
75%      21.000000
max      29.000000
Name: Perceived_Stress_Scale, dtype: float64

# Physiological Data

In [29]:
physio = pd.read_csv('../data/Quantitative_Data/Physiological Data.csv')
#physio['Time'] = pd.to_datetime(physio['Time'], format="%Y-%m-%d %H:%M:%S", exact=False)
#physio['Time'] = physio['Time'].apply(lambda x: x.replace(microsecond=0))
physio.update(pd.DataFrame(physio[physio['Treatment'] != 'DT']['Task'].fillna('Not Applicable')))
physio.head()

Unnamed: 0,Participant_ID,Group,Treatment,Time,Treatment_Time,Task,PP_QC,EDA_QC,BR_QC,Chest_HR_QC,Wrist_HR_QC
0,T003,CH,RB,2018-06-22 11:35:42,0,Not Applicable,0.003544,0.311345,12.2,83.0,85.78
1,T003,CH,RB,2018-06-22 11:35:43,1,Not Applicable,0.003543,0.310063,12.8,82.0,85.8
2,T003,CH,RB,2018-06-22 11:35:44,2,Not Applicable,0.003541,0.309743,12.8,81.0,85.83
3,T003,CH,RB,2018-06-22 11:35:45,3,Not Applicable,0.003539,0.309743,13.3,79.0,85.83
4,T003,CH,RB,2018-06-22 11:35:46,4,Not Applicable,0.003537,0.309743,13.3,82.0,85.83


In [30]:
physio['Treatment'].unique()

array(['RB', 'ST', 'PM', 'DT', 'PR'], dtype=object)

In [31]:
for i in physio.columns:
    print(i)

Participant_ID
Group
Treatment
Time
Treatment_Time
Task
PP_QC
EDA_QC
BR_QC
Chest_HR_QC
Wrist_HR_QC


# HRV (Heart Rate Variability) Data

In [32]:
hrv = pd.read_csv('../data/Quantitative_Data/HRV.csv')
#hrv['Time'] = pd.to_datetime(hrv['Time'], format="%Y-%m-%d %H:%M:%S")
#hrv['Time'] = hrv['Time'].apply(lambda x: x.replace(microsecond=0))
hrv.update(pd.DataFrame(hrv[hrv['Treatment'] != 'DT']['Task'].fillna('Not Applicable')))
hrv

Unnamed: 0,Participant_ID,Group,Treatment,Task,Time,Treatment_Time,RR_QC
0,T003,CH,RB,Not Applicable,2018-06-22 11:35:42.459,0,705.0
1,T003,CH,RB,Not Applicable,2018-06-22 11:35:43.190,1,731.0
2,T003,CH,RB,Not Applicable,2018-06-22 11:35:43.894,2,704.0
3,T003,CH,RB,Not Applicable,2018-06-22 11:35:44.613,3,719.0
4,T003,CH,RB,Not Applicable,2018-06-22 11:35:45.346,4,733.0
...,...,...,...,...,...,...,...
267636,T176,BL,PR,Not Applicable,2018-07-17 17:31:16.515,362,634.0
267637,T176,BL,PR,Not Applicable,2018-07-17 17:31:17.183,363,668.0
267638,T176,BL,PR,Not Applicable,2018-07-17 17:31:17.861,364,678.0
267639,T176,BL,PR,Not Applicable,2018-07-17 17:31:18.591,365,730.0


In [33]:
hrv[hrv['Task'].isnull()]

Unnamed: 0,Participant_ID,Group,Treatment,Task,Time,Treatment_Time,RR_QC
200350,T141,BH,DT,,2018-07-10 14:03:16.552,0,
200351,T141,BH,DT,,2018-07-10 14:03:17.153,1,
200352,T141,BH,DT,,2018-07-10 14:03:17.764,2,
200353,T141,BH,DT,,2018-07-10 14:03:18.398,3,634.0
200354,T141,BH,DT,,2018-07-10 14:03:19.054,4,656.0
...,...,...,...,...,...,...,...
204781,T141,BH,DT,,2018-07-10 14:53:12.643,4431,722.0
204782,T141,BH,DT,,2018-07-10 14:53:13.340,4432,697.0
204783,T141,BH,DT,,2018-07-10 14:53:14.042,4433,702.0
204784,T141,BH,DT,,2018-07-10 14:53:14.721,4434,679.0


In [34]:
for i in hrv.columns:
    print(i)

Participant_ID
Group
Treatment
Task
Time
Treatment_Time
RR_QC


In [35]:
hrv.isnull().sum()

Participant_ID        0
Group                 0
Treatment             0
Task               4436
Time                  0
Treatment_Time        0
RR_QC             13289
dtype: int64

In [36]:
(qas
 .merge(physio[['Participant_ID', 'Group', 'Treatment', 'Treatment_Time', 'Task', 'PP_QC', 'EDA_QC', 'BR_QC', 'Chest_HR_QC', 'Wrist_HR_QC']], on=['Participant_ID', 'Group'])
 .merge(hrv[['Participant_ID', 'Group', 'Treatment', 'Task', 'Treatment_Time', 'RR_QC']], on=['Participant_ID', 'Group', 'Task', 'Treatment', 'Treatment_Time'])
)

Unnamed: 0,Participant_ID,Group,Age,Gender,Nationality,Native_Language,Education,Writing_Proficiency,Daily_Email_Frequency,BFI_Agreeableness,...,NASA_Frustration,Treatment,Treatment_Time,Task,PP_QC,EDA_QC,BR_QC,Chest_HR_QC,Wrist_HR_QC,RR_QC
0,T003,CH,23.0,2,1.0,1.0,2.0,7.0,6.0,40.0,...,5.0,RB,0,Not Applicable,0.003544,0.311345,12.2,83.0,85.78,705.0
1,T003,CH,23.0,2,1.0,1.0,2.0,7.0,6.0,40.0,...,5.0,RB,1,Not Applicable,0.003543,0.310063,12.8,82.0,85.80,731.0
2,T003,CH,23.0,2,1.0,1.0,2.0,7.0,6.0,40.0,...,5.0,RB,2,Not Applicable,0.003541,0.309743,12.8,81.0,85.83,704.0
3,T003,CH,23.0,2,1.0,1.0,2.0,7.0,6.0,40.0,...,5.0,RB,3,Not Applicable,0.003539,0.309743,13.3,79.0,85.83,719.0
4,T003,CH,23.0,2,1.0,1.0,2.0,7.0,6.0,40.0,...,5.0,RB,4,Not Applicable,0.003537,0.309743,13.3,82.0,85.83,733.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149090,T176,BL,18.0,2,1.0,1.0,2.0,7.0,7.0,39.0,...,2.0,PR,253,Not Applicable,0.007014,0.145388,17.0,91.0,111.60,653.0
149091,T176,BL,18.0,2,1.0,1.0,2.0,7.0,7.0,39.0,...,2.0,PR,254,Not Applicable,0.007089,0.147629,17.0,91.0,112.38,637.0
149092,T176,BL,18.0,2,1.0,1.0,2.0,7.0,7.0,39.0,...,2.0,PR,255,Not Applicable,0.007165,0.147309,17.0,90.0,113.22,639.0
149093,T176,BL,18.0,2,1.0,1.0,2.0,7.0,7.0,39.0,...,2.0,PR,256,Not Applicable,0.007251,0.147629,17.0,86.0,114.07,1233.0


In [38]:
all_ft = (qas
 .merge(physio[['Participant_ID', 'Group', 'Treatment', 'Treatment_Time', 'Task', 'PP_QC', 'EDA_QC', 'BR_QC', 'Chest_HR_QC', 'Wrist_HR_QC']], on=['Participant_ID', 'Group'])
 .merge(hrv[['Participant_ID', 'Group', 'Treatment', 'Task', 'Treatment_Time', 'RR_QC']], on=['Participant_ID', 'Group', 'Task', 'Treatment', 'Treatment_Time'])
)

In [39]:
all_ft['Is_Stressed'] = all_ft.Group.replace(to_replace=['CH', 'BH', 'CL', 'BL'], value=[1, 1, 0, 0])
all_ft.reset_index(drop=True, inplace=True)

In [40]:
all_ft.Treatment.unique()

array(['RB', 'ST', 'PM', 'DT', 'PR'], dtype=object)

In [41]:
all_ft[all_ft.duplicated()]

Unnamed: 0,Participant_ID,Group,Age,Gender,Nationality,Native_Language,Education,Writing_Proficiency,Daily_Email_Frequency,BFI_Agreeableness,...,Treatment,Treatment_Time,Task,PP_QC,EDA_QC,BR_QC,Chest_HR_QC,Wrist_HR_QC,RR_QC,Is_Stressed


In [42]:
all_ft.head(3)

Unnamed: 0,Participant_ID,Group,Age,Gender,Nationality,Native_Language,Education,Writing_Proficiency,Daily_Email_Frequency,BFI_Agreeableness,...,Treatment,Treatment_Time,Task,PP_QC,EDA_QC,BR_QC,Chest_HR_QC,Wrist_HR_QC,RR_QC,Is_Stressed
0,T003,CH,23.0,2,1.0,1.0,2.0,7.0,6.0,40.0,...,RB,0,Not Applicable,0.003544,0.311345,12.2,83.0,85.78,705.0,1
1,T003,CH,23.0,2,1.0,1.0,2.0,7.0,6.0,40.0,...,RB,1,Not Applicable,0.003543,0.310063,12.8,82.0,85.8,731.0,1
2,T003,CH,23.0,2,1.0,1.0,2.0,7.0,6.0,40.0,...,RB,2,Not Applicable,0.003541,0.309743,12.8,81.0,85.83,704.0,1


In [43]:
all_ft.to_csv('../data/stress_productivity_df.csv')