In [10]:
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import neurokit2 as nk
import matplotlib.pyplot as plt

# Global settings
pd.set_option('display.max_rows', 200)
pd.options.display.float_format = '{:.2f}'.format
plt.rcParams["figure.figsize"] = (20, 6)
plt.style.use('ggplot') # nicer plots
pd.set_option('display.max_columns', None)

# Data loading
df = pd.read_csv('output/empatica_inquisit_merged.csv')
df['datetime'] = pd.to_datetime(df['datetime'])

  df = pd.read_csv('output/empatica_inquisit_merged.csv')


In [3]:
df.head()

Unnamed: 0,acc_x,acc_y,acc_z,temp,eda,bvp,hr,datetime,unix_time,source,trialcode,response,intrusion,intrusion_nothink,intrusion_tnt
0,-8.0,46.0,48.0,26.45,0.03,61.93,85.0,2023-03-22 12:05:18.000000,1679486718.0,pp3-d1-1,,0,0,,
1,-7.41,46.88,50.65,26.47,0.02,90.6,84.98,2023-03-22 12:05:18.015625,1679486718.02,pp3-d1-1,,0,0,,
2,-6.0,46.0,47.0,26.48,0.02,100.07,84.95,2023-03-22 12:05:18.031250,1679486718.03,pp3-d1-1,,0,0,,
3,-5.86,44.05,42.88,26.49,0.02,89.91,84.92,2023-03-22 12:05:18.046875,1679486718.05,pp3-d1-1,,0,0,,
4,-7.0,44.0,43.0,26.5,0.02,64.68,84.88,2023-03-22 12:05:18.062500,1679486718.06,pp3-d1-1,,0,0,,


To calculate rolling windows for periods that are actually consecutive, you can use the rolling function in combination with the groupby function.

First, you need to create a new column that identifies each separate session. This could be done based on the datetime column. For example, you could consider that a new session starts when the time difference with the previous measurement is more than a certain threshold.

Once you have a column that identifies the sessions, you can group by this column and then apply the rolling function to each group. This will ensure that the rolling windows are calculated separately for each session.

In [15]:
# Create a column 'session_id' that identifies each session
df['session_id'] = (df['datetime'].diff() > pd.Timedelta(seconds=1/64)).cumsum()

In [16]:
s = 10 # Window size in seconds
sr = 64
win = sr*s

# EDA
signals, info = nk.eda_process(df['eda'], sampling_rate=sr)
df['eda'] = signals['EDA_Clean'].values
df['eda_tonic'] = signals['EDA_Tonic'].values
df['eda_phasic'] = signals['EDA_Phasic'].values
df['eda_scr_onsets'] = signals['SCR_Onsets'].values
df['eda_scr_peaks'] = signals['SCR_Peaks'].values
df['eda_scr_height'] = signals['SCR_Height'].values
df['eda_scr_amplitude'] = signals['SCR_Amplitude'].values
df['eda_scr_risetime'] = signals['SCR_RiseTime'].values
df['eda_scr_recovery'] = signals['SCR_Recovery'].values

# Group by 'session_id'
temp = df.groupby('session_id')

# EDA
# Mean
df['eda_mean'] = temp['eda'].rolling(window=win).mean().reset_index(0, drop=True)

# Standard deviation
df['eda_std'] = temp['eda'].rolling(window=win).std().reset_index(0, drop=True)

# Minimum
df['eda_min'] = temp['eda'].rolling(window=win).min().reset_index(0, drop=True)

# Maximum
df['eda_max'] = temp['eda'].rolling(window=win).max().reset_index(0, drop=True)

# Skewness
df['eda_skew'] = temp['eda'].rolling(window=win).skew().reset_index(0, drop=True)

# Kurtosis
df['eda_kurt'] = temp['eda'].rolling(window=win).kurt().reset_index(0, drop=True)

# Domain-specific feature means
df['eda_tonic_mean'] = temp['eda_tonic'].rolling(window=win).mean().reset_index(0, drop=True)
df['eda_phasic_mean'] = temp['eda_phasic'].rolling(window=win).mean().reset_index(0, drop=True)
df['eda_scr_onsets_mean'] = temp['eda_scr_onsets'].rolling(window=win).mean().reset_index(0, drop=True)
df['eda_scr_peaks_mean'] = temp['eda_scr_peaks'].rolling(window=win).mean().reset_index(0, drop=True)
df['eda_scr_height_mean'] = temp['eda_scr_height'].rolling(window=win).mean().reset_index(0, drop=True)
df['eda_scr_amplitude_mean'] = temp['eda_scr_amplitude'].rolling(window=win).mean().reset_index(0, drop=True)
df['eda_scr_risetime_mean'] = temp['eda_scr_risetime'].rolling(window=win).mean().reset_index(0, drop=True)
df['eda_scr_recovery_mean'] = temp['eda_scr_recovery'].rolling(window=win).mean().reset_index(0, drop=True)

# ACCELEROMETER
# Mean
df['acc_x_mean'] = temp['acc_x'].rolling(window=win).mean().reset_index(0, drop=True)
df['acc_y_mean'] = temp['acc_y'].rolling(window=win).mean().reset_index(0, drop=True)
df['acc_z_mean'] = temp['acc_z'].rolling(window=win).mean().reset_index(0, drop=True)

# Standard deviation
df['acc_x_std'] = temp['acc_x'].rolling(window=win).std().reset_index(0, drop=True)
df['acc_y_std'] = temp['acc_y'].rolling(window=win).std().reset_index(0, drop=True)
df['acc_z_std'] = temp['acc_z'].rolling(window=win).std().reset_index(0, drop=True)

# Minimum
df['acc_x_min'] = temp['acc_x'].rolling(window=win).min().reset_index(0, drop=True)
df['acc_y_min'] = temp['acc_y'].rolling(window=win).min().reset_index(0, drop=True)
df['acc_z_min'] = temp['acc_z'].rolling(window=win).min().reset_index(0, drop=True)

# Maximum
df['acc_x_max'] = temp['acc_x'].rolling(window=win).max().reset_index(0, drop=True)
df['acc_y_max'] = temp['acc_y'].rolling(window=win).max().reset_index(0, drop=True)
df['acc_z_max'] = temp['acc_z'].rolling(window=win).max().reset_index(0, drop=True)

# Skewness
df['acc_x_skew'] = temp['acc_x'].rolling(window=win).skew().reset_index(0, drop=True)
df['acc_y_skew'] = temp['acc_y'].rolling(window=win).skew().reset_index(0, drop=True)
df['acc_z_skew'] = temp['acc_z'].rolling(window=win).skew().reset_index(0, drop=True)

# Kurtosis
df['acc_x_kurt'] = temp['acc_x'].rolling(window=win).kurt().reset_index(0, drop=True)
df['acc_y_kurt'] = temp['acc_y'].rolling(window=win).kurt().reset_index(0, drop=True)
df['acc_z_kurt'] = temp['acc_z'].rolling(window=win).kurt().reset_index(0, drop=True)

# TEMPERATURE
# Mean
df['temp_mean'] = temp['temp'].rolling(window=win).mean().reset_index(0, drop=True)

# Standard deviation
df['temp_std'] = temp['temp'].rolling(window=win).std().reset_index(0, drop=True)

# Minimum
df['temp_min'] = temp['temp'].rolling(window=win).min().reset_index(0, drop=True)

# Maximum
df['temp_max'] = temp['temp'].rolling(window=win).max().reset_index(0, drop=True)

# Skewness
df['temp_skew'] = temp['temp'].rolling(window=win).skew().reset_index(0, drop=True)

# Kurtosis
df['temp_kurt'] = temp['temp'].rolling(window=win).kurt().reset_index(0, drop=True)

# HR
# Mean
df['hr_mean'] = temp['hr'].rolling(window=win).mean().reset_index(0, drop=True)

# Standard deviation
df['hr_std'] = temp['hr'].rolling(window=win).std().reset_index(0, drop=True)

# Minimum
df['hr_min'] = temp['hr'].rolling(window=win).min().reset_index(0, drop=True)

# Maximum
df['hr_max'] = temp['hr'].rolling(window=win).max().reset_index(0, drop=True)

# Skewness
df['hr_skew'] = temp['hr'].rolling(window=win).skew().reset_index(0, drop=True)

# Kurtosis
df['hr_kurt'] = temp['hr'].rolling(window=win).kurt().reset_index(0, drop=True)


In [4]:
df.iloc[700:750]

Unnamed: 0,acc_x,acc_y,acc_z,temp,eda,bvp,hr,datetime,unix_time,source,trialcode,response,intrusion,intrusion_nothink,intrusion_tnt,session_id,eda_tonic,eda_phasic,eda_scr_onsets,eda_scr_peaks,eda_scr_height,eda_scr_amplitude,eda_scr_risetime,eda_scr_recovery,eda_mean,eda_std,eda_min,eda_max,eda_skew,eda_kurt,eda_tonic_mean,eda_phasic_mean,eda_scr_onsets_mean,eda_scr_peaks_mean,eda_scr_height_mean,eda_scr_amplitude_mean,eda_scr_risetime_mean,eda_scr_recovery_mean,acc_x_mean,acc_y_mean,acc_z_mean,acc_x_std,acc_y_std,acc_z_std,acc_x_min,acc_y_min,acc_z_min,acc_x_max,acc_y_max,acc_z_max,acc_x_skew,acc_y_skew,acc_z_skew,acc_x_kurt,acc_y_kurt,acc_z_kurt,temp_mean,temp_std,temp_min,temp_max,temp_skew,temp_kurt,hr_mean,hr_std,hr_min,hr_max,hr_skew,hr_kurt
700,-2.0,48.0,38.0,25.52,0.22,-181.24,67.12,2023-03-22 12:05:28.937500,1679486728.94,pp3-d1-1,,0,0,,,0,0.12,0.1,0,0,0.0,0.0,0.0,0,0.05,0.07,-0.01,0.39,3.18,9.98,0.07,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,-8.46,45.18,42.41,8.0,4.24,4.09,-25.92,29.56,31.89,13.0,59.73,51.08,1.06,0.36,-0.9,0.01,1.17,0.21,25.28,0.42,24.94,26.25,1.33,0.09,73.2,5.64,67.12,86.13,1.0,-0.25
701,-0.03,50.05,38.21,25.54,0.23,-187.85,67.09,2023-03-22 12:05:28.953125,1679486728.95,pp3-d1-1,,0,0,,,0,0.12,0.11,0,0,0.0,0.0,0.0,0,0.05,0.07,-0.01,0.39,3.14,9.74,0.07,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,-8.44,45.19,42.4,8.01,4.24,4.09,-25.92,29.56,31.89,13.0,59.73,51.08,1.05,0.35,-0.89,-0.01,1.16,0.19,25.28,0.42,24.94,26.25,1.34,0.11,73.18,5.64,67.09,86.13,1.01,-0.23
702,0.0,50.0,36.0,25.55,0.24,-191.91,67.06,2023-03-22 12:05:28.968750,1679486728.97,pp3-d1-1,,0,0,,,0,0.12,0.12,0,0,0.0,0.0,0.0,0,0.05,0.07,-0.01,0.39,3.11,9.5,0.07,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,-8.43,45.21,42.39,8.02,4.24,4.1,-25.92,29.56,31.89,13.0,59.73,51.08,1.05,0.35,-0.89,-0.02,1.15,0.17,25.28,0.42,24.94,26.25,1.35,0.13,73.16,5.64,67.06,86.13,1.01,-0.21
703,-0.33,49.68,33.98,25.56,0.25,-194.67,67.03,2023-03-22 12:05:28.984375,1679486728.98,pp3-d1-1,,0,0,,,0,0.12,0.13,0,0,0.0,0.0,0.0,0,0.05,0.07,-0.01,0.39,3.08,9.25,0.07,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,-8.42,45.22,42.38,8.02,4.25,4.11,-25.92,29.56,31.89,13.0,59.73,51.08,1.04,0.34,-0.88,-0.03,1.13,0.15,25.28,0.42,24.94,26.25,1.35,0.15,73.14,5.64,67.03,86.13,1.02,-0.2
704,0.0,50.0,33.0,25.57,0.26,-194.45,67.0,2023-03-22 12:05:29.000000,1679486729.0,pp3-d1-1,,0,0,,,0,0.12,0.14,0,0,0.0,0.0,0.0,0,0.05,0.07,-0.01,0.39,3.04,8.99,0.07,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,-8.41,45.23,42.37,8.03,4.25,4.13,-25.92,29.56,31.89,13.0,59.73,51.08,1.04,0.34,-0.88,-0.05,1.12,0.13,25.28,0.42,24.94,26.25,1.36,0.17,73.12,5.63,67.0,86.13,1.03,-0.18
705,-0.29,50.74,32.35,25.58,0.28,-188.49,66.97,2023-03-22 12:05:29.015625,1679486729.02,pp3-d1-1,,0,0,,,0,0.12,0.15,0,0,0.0,0.0,0.0,0,0.05,0.07,-0.01,0.39,3.0,8.73,0.07,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,-8.4,45.24,42.35,8.04,4.25,4.14,-25.92,29.56,31.89,13.0,59.73,51.08,1.03,0.33,-0.88,-0.06,1.1,0.11,25.28,0.42,24.94,26.25,1.36,0.19,73.1,5.63,66.97,86.13,1.03,-0.16
706,-1.0,52.0,33.0,25.59,0.29,-174.61,66.94,2023-03-22 12:05:29.031250,1679486729.03,pp3-d1-1,,0,0,,,0,0.12,0.17,0,0,0.0,0.0,0.0,0,0.06,0.07,-0.01,0.39,2.97,8.46,0.07,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,-8.39,45.25,42.33,8.04,4.26,4.15,-25.92,29.56,31.89,13.0,59.73,51.08,1.03,0.33,-0.88,-0.07,1.08,0.1,25.28,0.42,24.94,26.25,1.37,0.22,73.08,5.63,66.94,86.13,1.04,-0.15
707,0.59,53.5,36.48,25.59,0.31,-152.94,66.91,2023-03-22 12:05:29.046875,1679486729.05,pp3-d1-1,,0,0,,,0,0.12,0.19,0,0,0.0,0.0,0.0,0,0.06,0.08,-0.01,0.39,2.93,8.2,0.07,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,-8.38,45.25,42.31,8.05,4.27,4.16,-25.92,29.56,31.89,13.0,59.73,51.08,1.02,0.33,-0.87,-0.08,1.06,0.08,25.28,0.42,24.94,26.25,1.37,0.24,73.06,5.63,66.91,86.13,1.05,-0.13
708,5.0,54.0,41.0,25.6,0.33,-125.95,66.88,2023-03-22 12:05:29.062500,1679486729.06,pp3-d1-1,,0,0,,,0,0.12,0.2,0,0,0.0,0.0,0.0,0,0.06,0.08,-0.01,0.39,2.9,7.95,0.07,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,-8.36,45.27,42.31,8.07,4.29,4.16,-25.92,29.56,31.89,13.0,59.73,51.08,1.02,0.33,-0.87,-0.11,1.03,0.08,25.27,0.41,24.94,26.25,1.38,0.26,73.04,5.63,66.88,86.13,1.05,-0.11
709,8.5,53.29,42.52,25.6,0.34,-97.59,66.86,2023-03-22 12:05:29.078125,1679486729.08,pp3-d1-1,,0,0,,,0,0.12,0.22,0,0,0.0,0.0,0.0,0,0.06,0.08,-0.01,0.39,2.87,7.71,0.07,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,-8.34,45.28,42.31,8.09,4.3,4.16,-25.92,29.56,31.89,13.0,59.73,51.08,1.01,0.33,-0.87,-0.13,1.0,0.08,25.27,0.41,24.94,26.25,1.38,0.28,73.02,5.63,66.86,86.13,1.06,-0.1


In [17]:
# Create a dictionary that maps the original filenames to the new filenames
filename_map = {
    'd1 2': 'pp15_d1 2',
    'd1_1': 'pp13_d1_1',
    '1681713254_A03F6E': 'pp16_1681713254_A03F6E',
    '1681717717_A03F6E': 'pp17_1681717717_A03F6E',
    'd1_3': 'pp18_d1_3',
    'd2_1_1': 'pp17_d2_1_1',
    'd2_2': 'pp16_d2_2',
    'd1': 'pp19_d1',
    'd1_4': 'pp20_d1_4',
    'd2': 'pp18_d2',
    'd2_1': 'pp19_d2_1',
    'd2_4': 'pp20_d2_4'
}

# Update the 'source' column
df['source'] = df['source'].replace(filename_map)

In [18]:
df['participant'] = df['source'].str.extract('pp(\d{1,2})').astype(float)

df['participant'].unique()

array([ 3.,  2.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 13., 15., 16.,
       17., 18., 19., 20.])

In [19]:
df.head()

Unnamed: 0,acc_x,acc_y,acc_z,temp,eda,bvp,hr,datetime,unix_time,source,trialcode,response,intrusion,intrusion_nothink,intrusion_tnt,participant,eda_tonic,eda_phasic,eda_scr_onsets,eda_scr_peaks,eda_scr_height,eda_scr_amplitude,eda_scr_risetime,eda_scr_recovery,session_id,eda_mean,eda_std,eda_min,eda_max,eda_skew,eda_kurt,eda_tonic_mean,eda_phasic_mean,eda_scr_onsets_mean,eda_scr_peaks_mean,eda_scr_height_mean,eda_scr_amplitude_mean,eda_scr_risetime_mean,eda_scr_recovery_mean,acc_x_mean,acc_y_mean,acc_z_mean,acc_x_std,acc_y_std,acc_z_std,acc_x_min,acc_y_min,acc_z_min,acc_x_max,acc_y_max,acc_z_max,acc_x_skew,acc_y_skew,acc_z_skew,acc_x_kurt,acc_y_kurt,acc_z_kurt,temp_mean,temp_std,temp_min,temp_max,temp_skew,temp_kurt,hr_mean,hr_std,hr_min,hr_max,hr_skew,hr_kurt
0,-8.0,46.0,48.0,26.45,0.03,61.93,85.0,2023-03-22 12:05:18.000000,1679486718.0,pp3-d1-1,,0,0,,,3.0,0.03,-0.0,0,0,0.0,0.0,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,-7.41,46.88,50.65,26.47,0.03,90.6,84.98,2023-03-22 12:05:18.015625,1679486718.02,pp3-d1-1,,0,0,,,3.0,0.03,-0.01,0,0,0.0,0.0,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,-6.0,46.0,47.0,26.48,0.02,100.07,84.95,2023-03-22 12:05:18.031250,1679486718.03,pp3-d1-1,,0,0,,,3.0,0.03,-0.01,0,0,0.0,0.0,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,-5.86,44.05,42.88,26.49,0.02,89.91,84.92,2023-03-22 12:05:18.046875,1679486718.05,pp3-d1-1,,0,0,,,3.0,0.03,-0.01,0,0,0.0,0.0,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,-7.0,44.0,43.0,26.5,0.02,64.68,84.88,2023-03-22 12:05:18.062500,1679486718.06,pp3-d1-1,,0,0,,,3.0,0.03,-0.01,0,0,0.0,0.0,0.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [26]:
# Save the dataframe
# df.to_parquet('output/empatica_inquisit_merged_features.parquet', index=False)

# Only keep rows that dont have nan for intrusion_tnt 
df = df.dropna(subset=['intrusion_tnt'])

len(df)

# Save the dataframe
df.to_csv('output/combined_feature_engineered_tnt_only.csv', index=False)