# Loading in all behavioral csv files and compiling into one workable file

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
from tqdm.auto import tqdm
import pandas as pd
from IPython.display import display
import numpy as np

In [3]:
# configure user
user = "mitchel"
extracted_feats = False

if user.lower() == "jonas":
    BASE_PATH = Path("/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/")
elif user.lower() == "mitchel":
#     BASE_PATH = Path("Z:/shares/ghep_lab/2021_VanhollebekeKappen_EEGStudy2_MIST_Cyberball_Audio/")
    BASE_PATH = Path("D:/Data/EEG_Study_2")
DATA_PATH = BASE_PATH.joinpath("Data/Raw/Behavioral")

In [43]:
# Some files are faulty/empty whatever, so exclude those
excluded_files = ["D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_50_cybb.csv", 
                  "D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_51_mist.csv",
                 "D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_67_mist.csv",
                 "D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_69_mist.csv"]

In [44]:
df_cybb = pd.DataFrame()
for file in tqdm(list(DATA_PATH.glob("*cybb.csv"))):
    print(file)
    if str(file) not in excluded_files:
        df_cybb = df_cybb.append(pd.read_csv(file), ignore_index = True)

df_mist = pd.DataFrame()
for file in tqdm(list(DATA_PATH.glob("*mist.csv"))):
    print(file)
    if str(file) not in excluded_files:
        df_mist = df_mist.append(pd.read_csv(file), ignore_index = True)

  0%|          | 0/66 [00:00<?, ?it/s]

D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_10_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_11_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_12_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_13_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_14_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_15_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_16_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_17_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_18_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_19_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_20_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_21_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_22_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_23_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_24_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_25_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_27_cybb.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_28_c

  0%|          | 0/61 [00:00<?, ?it/s]

D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_10_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_11_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_12_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_13_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_14_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_15_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_16_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_17_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_18_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_19_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_21_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_22_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_23_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_24_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_25_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_27_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_28_mist.csv
D:\Data\EEG_Study_2\Data\Raw\Behavioral\ppt_29_m

# Extract relevant data from each dataframe and combine into 1 comprehensive dataframe for both tasks

In [119]:
# Cyberball
cybbdf = pd.concat([df_cybb['participant_ID'], df_cybb['participant_age'], df_cybb['participant_occupation'], df_cybb['participant_sequence'], df_cybb['participant_sex'], # Participant details
                   df_cybb.filter(like='SCRS_Baseline_slider_'), df_cybb.filter(like='SCRS_Control_slider_'), df_cybb.filter(like='SCRS_Stress_slider_'), # SCRS
                  df_cybb.filter(like='VAS_Baseline_Post_Rest_slider_'), df_cybb.filter(like='VAS_Control_Post_Rest_slider_'), df_cybb.filter(like='VAS_Control_slider_'), df_cybb.filter(like='VAS_Stress_Post_Rest_slider_'), df_cybb.filter(like='VAS_Stress_slider_'), # VAS
                  df_cybb.filter(like='description_picture_baseline'), df_cybb.filter(like='description_picture_control'), df_cybb.filter(like='description_picture_stress') # picture descriptions
                  ], axis=1)
cybbdf['taskType'] = 'cybb'

# MIST
mistdf = pd.concat([df_mist['participant_ID'], df_mist['participant_age'], df_mist['participant_occupation'], df_mist['participant_sequence'], df_mist['participant_sex'], # Participant details
                   df_mist.filter(like='SCRS_Baseline_slider_'), df_mist.filter(like='SCRS_Control_slider_'), df_mist.filter(like='SCRS_Stress_slider_'), # SCRS
                  df_mist.filter(like='VAS_Baseline_Post_Rest_slider_'), df_mist.filter(like='VAS_Control_Post_Rest_slider_'), df_mist.filter(like='VAS_Control_slider_'), df_mist.filter(like='VAS_Stress_Post_Rest_slider_'), df_mist.filter(like='VAS_Stress_slider_'), # VAS
                  df_mist.filter(like='description_picture_baseline'), df_mist.filter(like='description_picture_control'), df_mist.filter(like='description_picture_stress') # picture descriptions
                  ], axis=1)
mistdf['taskType'] = 'mist'

# Combine into massive dataframe
behavioraldataComplete = cybbdf.append(mistdf)

In [120]:
# Add mean questionnaire values | Yes, this is inefficient, but it works

behavioraldataComplete["Mean_SCRS_Baseline"] = behavioraldataComplete.filter(like='SCRS_Baseline_slider_').mean(axis=1)
behavioraldataComplete["Mean_SCRS_Control"] = behavioraldataComplete.filter(like='SCRS_Control_slider_').mean(axis=1)
behavioraldataComplete["Mean_SCRS_Stress"] = behavioraldataComplete.filter(like='SCRS_Stress_slider_').mean(axis=1)

# VAS Baseline
behavioraldataComplete["Mean_VAS_NA_Baseline"] = behavioraldataComplete[['VAS_Baseline_Post_Rest_slider_1', 'VAS_Baseline_Post_Rest_slider_2', 'VAS_Baseline_Post_Rest_slider_3', 'VAS_Baseline_Post_Rest_slider_4', 'VAS_Baseline_Post_Rest_slider_5', 'VAS_Baseline_Post_Rest_slider_6']].mean(axis=1)
behavioraldataComplete["Mean_VAS_PAA_Baseline"] = behavioraldataComplete[['VAS_Baseline_Post_Rest_slider_7', 'VAS_Baseline_Post_Rest_slider_8', 'VAS_Baseline_Post_Rest_slider_9', 'VAS_Baseline_Post_Rest_slider_10', 'VAS_Baseline_Post_Rest_slider_11', 'VAS_Baseline_Post_Rest_slider_12']].mean(axis=1)
behavioraldataComplete["Mean_VAS_PSA_Baseline"] = behavioraldataComplete[['VAS_Baseline_Post_Rest_slider_13', 'VAS_Baseline_Post_Rest_slider_14', 'VAS_Baseline_Post_Rest_slider_15', 'VAS_Baseline_Post_Rest_slider_16', 'VAS_Baseline_Post_Rest_slider_17', 'VAS_Baseline_Post_Rest_slider_18']].mean(axis=1)
behavioraldataComplete["VAS_Stress_Baseline"] = behavioraldataComplete["VAS_Baseline_Post_Rest_slider_19"]

# VAS Control_Post_Rest
behavioraldataComplete["Mean_VAS_NA_Control_Post_Rest"] = behavioraldataComplete[['VAS_Control_Post_Rest_slider_1', 'VAS_Control_Post_Rest_slider_2', 'VAS_Control_Post_Rest_slider_3', 'VAS_Control_Post_Rest_slider_4', 'VAS_Control_Post_Rest_slider_5', 'VAS_Control_Post_Rest_slider_6']].mean(axis=1)
behavioraldataComplete["Mean_VAS_PAA_Control_Post_Rest"] = behavioraldataComplete[['VAS_Control_Post_Rest_slider_7', 'VAS_Control_Post_Rest_slider_8', 'VAS_Control_Post_Rest_slider_9', 'VAS_Control_Post_Rest_slider_10', 'VAS_Control_Post_Rest_slider_11', 'VAS_Control_Post_Rest_slider_12']].mean(axis=1)
behavioraldataComplete["Mean_VAS_PSA_Control_Post_Rest"] = behavioraldataComplete[['VAS_Control_Post_Rest_slider_13', 'VAS_Control_Post_Rest_slider_14', 'VAS_Control_Post_Rest_slider_15', 'VAS_Control_Post_Rest_slider_16', 'VAS_Control_Post_Rest_slider_17', 'VAS_Control_Post_Rest_slider_18']].mean(axis=1)
behavioraldataComplete["VAS_Stress_Control_Post_Rest"] = behavioraldataComplete["VAS_Control_Post_Rest_slider_19"]

# VAS Control
behavioraldataComplete["Mean_VAS_NA_Control"] = behavioraldataComplete[['VAS_Control_slider_1', 'VAS_Control_slider_2', 'VAS_Control_slider_3', 'VAS_Control_slider_4', 'VAS_Control_slider_5', 'VAS_Control_slider_6']].mean(axis=1)
behavioraldataComplete["Mean_VAS_PAA_Control"] = behavioraldataComplete[['VAS_Control_slider_7', 'VAS_Control_slider_8', 'VAS_Control_slider_9', 'VAS_Control_slider_10', 'VAS_Control_slider_11', 'VAS_Control_slider_12']].mean(axis=1)
behavioraldataComplete["Mean_VAS_PSA_Control"] = behavioraldataComplete[['VAS_Control_slider_13', 'VAS_Control_slider_14', 'VAS_Control_slider_15', 'VAS_Control_slider_16', 'VAS_Control_slider_17', 'VAS_Control_slider_18']].mean(axis=1)
behavioraldataComplete["VAS_Stress_Control"] = behavioraldataComplete["VAS_Control_slider_19"]

# VAS Stress_Post_Rest
behavioraldataComplete["Mean_VAS_NA_Stress_Post_Rest"] = behavioraldataComplete[['VAS_Stress_Post_Rest_slider_1', 'VAS_Stress_Post_Rest_slider_2', 'VAS_Stress_Post_Rest_slider_3', 'VAS_Stress_Post_Rest_slider_4', 'VAS_Stress_Post_Rest_slider_5', 'VAS_Stress_Post_Rest_slider_6']].mean(axis=1)
behavioraldataComplete["Mean_VAS_PAA_Stress_Post_Rest"] = behavioraldataComplete[['VAS_Stress_Post_Rest_slider_7', 'VAS_Stress_Post_Rest_slider_8', 'VAS_Stress_Post_Rest_slider_9', 'VAS_Stress_Post_Rest_slider_10', 'VAS_Stress_Post_Rest_slider_11', 'VAS_Stress_Post_Rest_slider_12']].mean(axis=1)
behavioraldataComplete["Mean_VAS_PSA_Stress_Post_Rest"] = behavioraldataComplete[['VAS_Stress_Post_Rest_slider_13', 'VAS_Stress_Post_Rest_slider_14', 'VAS_Stress_Post_Rest_slider_15', 'VAS_Stress_Post_Rest_slider_16', 'VAS_Stress_Post_Rest_slider_17', 'VAS_Stress_Post_Rest_slider_18']].mean(axis=1)
behavioraldataComplete["VAS_Stress_Stress_Post_Rest"] = behavioraldataComplete["VAS_Stress_Post_Rest_slider_19"]

# VAS Stress
behavioraldataComplete["Mean_VAS_NA_Stress"] = behavioraldataComplete[['VAS_Stress_slider_1', 'VAS_Stress_slider_2', 'VAS_Stress_slider_3', 'VAS_Stress_slider_4', 'VAS_Stress_slider_5', 'VAS_Stress_slider_6']].mean(axis=1)
behavioraldataComplete["Mean_VAS_PAA_Stress"] = behavioraldataComplete[['VAS_Stress_slider_7', 'VAS_Stress_slider_8', 'VAS_Stress_slider_9', 'VAS_Stress_slider_10', 'VAS_Stress_slider_11', 'VAS_Stress_slider_12']].mean(axis=1)
behavioraldataComplete["Mean_VAS_PSA_Stress"] = behavioraldataComplete[['VAS_Stress_slider_13', 'VAS_Stress_slider_14', 'VAS_Stress_slider_15', 'VAS_Stress_slider_16', 'VAS_Stress_slider_17', 'VAS_Stress_slider_18']].mean(axis=1)
behavioraldataComplete["VAS_Stress_Stress"] = behavioraldataComplete["VAS_Stress_slider_19"]

behavioraldataComplete

Unnamed: 0,participant_ID,participant_age,participant_occupation,participant_sequence,participant_sex,SCRS_Baseline_slider_1,SCRS_Baseline_slider_10,SCRS_Baseline_slider_2,SCRS_Baseline_slider_3,SCRS_Baseline_slider_4,...,Mean_VAS_PSA_Control,VAS_Stress_Control,Mean_VAS_NA_Stress_Post_Rest,Mean_VAS_PAA_Stress_Post_Rest,Mean_VAS_PSA_Stress_Post_Rest,VAS_Stress_Stress_Post_Rest,Mean_VAS_NA_Stress,Mean_VAS_PAA_Stress,Mean_VAS_PSA_Stress,VAS_Stress_Stress
0,10,19,Student,[ 3 7 11 6 8 2 12 5 10 1 0 4 9],Vrouw,1,1,1,1,1,...,82.000000,0.0,0.000000,40.733333,72.433333,17.6,5.000000,21.183333,73.550000,47.2
1,11,20,Student,[ 6 3 8 5 4 11 2 12 10 7 1 0 9],Vrouw,1,1,1,1,1,...,51.633333,17.5,1.233333,16.033333,67.850000,13.5,9.800000,15.133333,32.100000,23.1
2,12,24,Werkend,[10 5 9 8 0 4 7 3 2 1 6 12 11],Vrouw,1,1,2,1,2,...,66.100000,14.3,0.000000,25.566667,58.100000,19.3,0.000000,36.583333,50.833333,21.8
3,13,18,Student,[ 5 9 3 7 8 1 4 12 6 11 10 0 2],Vrouw,1,2,2,1,2,...,29.683333,78.4,36.850000,27.916667,30.833333,58.7,42.250000,38.983333,34.766667,73.0
4,14,26,Student,[ 5 3 12 1 0 9 4 2 10 7 6 8 11],Vrouw,1,2,1,1,1,...,67.850000,5.9,2.916667,56.500000,85.633333,12.0,4.300000,54.916667,72.850000,1.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53,9,18,Student,[12 7 10 9 3 8 2 4 1 0 11 5 6],Vrouw,1,1,1,1,2,...,48.566667,19.8,9.966667,51.183333,27.033333,51.6,8.000000,54.550000,14.850000,63.9
54,66,18,Student,[10 3 0 12 7 2 11 8 4 1 6 9 5],Man,1,1,1,1,1,...,62.533333,35.5,11.783333,49.750000,58.066667,16.7,17.183333,51.716667,50.666667,44.6
55,70,23,Student,[ 1 0 9 3 6 5 11 2 12 8 4 7 10],Man,1,1,1,1,1,...,80.683333,4.5,0.000000,23.300000,88.233333,9.0,0.000000,30.666667,71.283333,13.1
56,71,19,Student,[ 3 8 5 10 7 4 12 2 1 9 11 6 0],Man,1,3,2,1,1,...,77.616667,0.0,0.000000,42.483333,78.183333,6.1,7.233333,42.300000,58.233333,52.7


In [121]:
behavioraldataComplete.to_parquet(
        DATA_PATH.joinpath("behavioralData_CYBB_MIST.parquet"), engine="fastparquet")
behavioraldataComplete.to_csv(DATA_PATH.joinpath("behavioralData_CYBB_MIST.csv"), index = False)