In [1]:
%matplotlib inline

In [2]:
import os 
import sys
import json
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import neurokit as nk
import scipy as sc
import math 
import scipy.signal as ss
import warnings
import itertools

from scipy import signal
warnings.filterwarnings('ignore')

### Using the curated signals to compute features

In [3]:
curated_signals = joblib.load("/mnt/shared_drive/data/brain_therapy/BT_MVP_Content_Pilot/MVP_Working_Memory/pkls/WM_corrected_signals_amplitude.pkl")

In [4]:
p = curated_signals.keys()
curated_df = pd.DataFrame([])
for part in p:
    data = curated_signals[part]
    data['Participant'] = part.split("-")[3]
    curated_df = pd.concat([curated_df,data])
curated_df.set_index(["Participant","Datetime"], inplace = True)
curated_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,RSP,PPG,1-SKTA,ECG,EDA,participant,type,context
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
22,2019-08-22 13:21:30.000,7.417908,-0.343323,29.492676,-0.275116,24.90387,22,3D,StartT3
22,2019-08-22 13:21:30.004,7.423401,-0.348511,29.492676,-0.262604,24.902344,22,3D,StartT3
22,2019-08-22 13:21:30.008,7.425842,-0.353394,29.492676,-0.253296,24.90387,22,3D,StartT3
22,2019-08-22 13:21:30.012,7.427673,-0.352783,29.492676,-0.241852,24.905396,22,3D,StartT3
22,2019-08-22 13:21:30.016,7.433167,-0.357666,29.493896,-0.233307,24.902344,22,3D,StartT3


In [5]:
curated_df.drop("participant", axis = 1, inplace = True)
curated_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,RSP,PPG,1-SKTA,ECG,EDA,type,context
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
22,2019-08-22 13:21:30.000,7.417908,-0.343323,29.492676,-0.275116,24.90387,3D,StartT3
22,2019-08-22 13:21:30.004,7.423401,-0.348511,29.492676,-0.262604,24.902344,3D,StartT3
22,2019-08-22 13:21:30.008,7.425842,-0.353394,29.492676,-0.253296,24.90387,3D,StartT3
22,2019-08-22 13:21:30.012,7.427673,-0.352783,29.492676,-0.241852,24.905396,3D,StartT3
22,2019-08-22 13:21:30.016,7.433167,-0.357666,29.493896,-0.233307,24.902344,3D,StartT3


In [6]:
t = curated_df.copy()
t = t.reset_index().set_index(["Participant","type","context","Datetime"])
t.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,RSP,PPG,1-SKTA,ECG,EDA
Participant,type,context,Datetime,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
22,3D,StartT3,2019-08-22 13:21:30.000,7.417908,-0.343323,29.492676,-0.275116,24.90387
22,3D,StartT3,2019-08-22 13:21:30.004,7.423401,-0.348511,29.492676,-0.262604,24.902344
22,3D,StartT3,2019-08-22 13:21:30.008,7.425842,-0.353394,29.492676,-0.253296,24.90387
22,3D,StartT3,2019-08-22 13:21:30.012,7.427673,-0.352783,29.492676,-0.241852,24.905396
22,3D,StartT3,2019-08-22 13:21:30.016,7.433167,-0.357666,29.493896,-0.233307,24.902344


In [7]:
x_temp = t.copy()
x_temp.reset_index("Datetime", inplace = True)
x_new = pd.DataFrame(x_temp[["Datetime"]].groupby(["Participant","type","context"]).apply(lambda x: x['Datetime'].values[0]), columns = ["time_zero"])

In [8]:
x_new.head()
x_temp.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Datetime,RSP,PPG,1-SKTA,ECG,EDA
Participant,type,context,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
22,3D,StartT3,2019-08-22 13:21:30.000,7.417908,-0.343323,29.492676,-0.275116,24.90387
22,3D,StartT3,2019-08-22 13:21:30.004,7.423401,-0.348511,29.492676,-0.262604,24.902344
22,3D,StartT3,2019-08-22 13:21:30.008,7.425842,-0.353394,29.492676,-0.253296,24.90387
22,3D,StartT3,2019-08-22 13:21:30.012,7.427673,-0.352783,29.492676,-0.241852,24.905396
22,3D,StartT3,2019-08-22 13:21:30.016,7.433167,-0.357666,29.493896,-0.233307,24.902344


In [9]:
x_new2 = x_temp.merge(x_new, how = "left", right_index = True, left_index = True)
x_new2["Datetime_sub"] = x_new2["Datetime"] - x_new2["time_zero"]
x_new2.set_index(["Datetime_sub","Datetime"], append = True, inplace = True)
t = x_new2.copy()

In [10]:
t.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,RSP,PPG,1-SKTA,ECG,EDA,time_zero
Participant,type,context,Datetime_sub,Datetime,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
22,2D,StartT1,00:00:00,2019-08-22 13:37:44.000,6.141663,0.013428,30.354492,0.119629,22.538758,2019-08-22 13:37:44
22,2D,StartT1,00:00:00.004000,2019-08-22 13:37:44.004,6.141357,0.009766,30.355103,0.35965,22.538758,2019-08-22 13:37:44
22,2D,StartT1,00:00:00.008000,2019-08-22 13:37:44.008,6.140747,0.00824,30.353882,0.683899,22.535706,2019-08-22 13:37:44
22,2D,StartT1,00:00:00.012000,2019-08-22 13:37:44.012,6.141052,0.007935,30.354492,1.006622,22.528076,2019-08-22 13:37:44
22,2D,StartT1,00:00:00.016000,2019-08-22 13:37:44.016,6.139221,0.008545,30.354492,1.24054,22.528076,2019-08-22 13:37:44


# ECG

In [11]:
participants = t.index.get_level_values("Participant").unique()

In [12]:
good_peaks = joblib.load("/mnt/shared_drive/data/brain_therapy/BT_MVP_Content_Pilot/MVP_Working_Memory/pkls/WM_r_peaks_index.pkl")

In [13]:
good_peaks.keys()

dict_keys(['BT-MVP-WM-0022', 'BT-MVP-WM-0023', 'BT-MVP-WM-0024', 'BT-MVP-WM-0025', 'BT-MVP-WM-0026', 'BT-MVP-WM-0027', 'BT-MVP-WM-0028', 'BT-MVP-WM-0029', 'BT-MVP-WM-0030', 'BT-MVP-WM-0031', 'BT-MVP-WM-0032', 'BT-MVP-WM-0033', 'BT-MVP-WM-0034', 'BT-MVP-WM-0035', 'BT-MVP-WM-0036', 'BT-MVP-WM-0037', 'BT-MVP-WM-0038', 'BT-MVP-WM-0039', 'BT-MVP-WM-0040', 'BT-MVP-WM-0041'])

Computing ECG Heart Rate and Heart Rate Variability

In [14]:
ecg_hr_features = pd.DataFrame([])
ecg_hrv_features = pd.DataFrame([])
time_diff_df = pd.DataFrame([])
for participant in participants:
    data = t.loc(axis = 0)[participant,:,:,:,:]
    ecg_peaks = good_peaks["BT-MVP-WM-{}".format(participant)]
    time_stamps = data.index.get_level_values('Datetime')[ecg_peaks]
    d = data.loc(axis = 0)[participant,:,:,:,time_stamps]
    
    
    #Computing time differences
    try:
        d['time_differences'] = np.concatenate([np.diff(time_stamps)/np.timedelta64(1,'ms'), [0]])
    except:
        d['time_differences'] = np.concatenate([np.diff(time_stamps)/np.timedelta64(1,'ms'), [0,0]])

    d = d[['time_differences']]
    #Taking out time differences that is more than a minute
    df = d[d.time_differences<60000]
    diff_mean = np.mean(df.time_differences)
    upper_threshold = diff_mean + (4*np.std(df.time_differences))
    lower_threshold = diff_mean - (4*np.std(df.time_differences))
    df = df[(df.time_differences<upper_threshold) & (df.time_differences>lower_threshold)]
    
    # Drop NaN contexts
    temp = df.reset_index("context")
    temp = temp[pd.notnull(temp['context'])]
#     display(temp)
    df = temp.reset_index().set_index(["Participant","type","context","Datetime","Datetime_sub"])
    time_diff_df = pd.concat([time_diff_df, df])
    

#     break

In [15]:
y_temp = time_diff_df.reset_index("Datetime_sub")
y_new = pd.DataFrame(y_temp[["Datetime_sub"]].groupby(["Participant","type","context"]).apply(lambda x: x['Datetime_sub'].values[0]),
                     columns = ["time_zero"])
y_new2 = y_temp.merge(y_new, how = "left", right_index = True, left_index = True)
y_new2["Datetime_sub"] = y_new2["Datetime_sub"] - y_new2["time_zero"]
y_new2 = y_new2.reset_index("Datetime").set_index(["Datetime_sub","Datetime"], append = True)
time_diff_new = y_new2.copy()
time_diff_new.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,time_differences,time_zero
Participant,type,context,Datetime_sub,Datetime,Unnamed: 5_level_1,Unnamed: 6_level_1
22,2D,StartT1,00:00:00,2019-08-22 13:37:44.648,736.0,00:00:00.648000
22,2D,StartT1,00:00:00.736000,2019-08-22 13:37:45.384,744.0,00:00:00.648000
22,2D,StartT1,00:00:01.480000,2019-08-22 13:37:46.128,732.0,00:00:00.648000
22,2D,StartT1,00:00:02.212000,2019-08-22 13:37:46.860,728.0,00:00:00.648000
22,2D,StartT1,00:00:02.940000,2019-08-22 13:37:47.588,740.0,00:00:00.648000


### ECG_HR features

Creating a separate dataframe for datetimes for ECG heart rate

In [16]:
temp_time_diff = time_diff_new.copy().reset_index("Datetime")
temp_time_diff.drop(["time_differences","time_zero"], axis = 1, inplace = True)

In [17]:
ecg_temp = temp_time_diff.groupby(["Participant","type","context", pd.Grouper(freq = "5s", level = "Datetime_sub")]).first()
ecg_temp.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Datetime
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
22,2D,StartT1,00:00:00,2019-08-22 13:37:44.648
22,2D,StartT1,00:00:05,2019-08-22 13:37:49.792
22,2D,StartT1,00:00:10,2019-08-22 13:37:54.856
22,2D,StartT1,00:00:15,2019-08-22 13:37:59.924
22,2D,StartT1,00:00:20,2019-08-22 13:38:05.016


In [18]:
ecg_hr_features = time_diff_new.groupby(["Participant","type","context",pd.Grouper(freq = "5s", level = "Datetime_sub")], 
                                        sort = True).mean()
ecg_hr_features['ecg_heart_rate'] = (60/ecg_hr_features['time_differences'])*1000
ecg_hr_features.drop("time_differences",axis = 1, inplace = True)
ecg_hr_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
22,2D,StartT1,00:00:00,81.648523
22,2D,StartT1,00:00:05,82.938389
22,2D,StartT1,00:00:10,82.872928
22,2D,StartT1,00:00:15,82.482325
22,2D,StartT1,00:00:20,86.065574


Merging the features with its original Datetime

In [19]:
ecg_hr_features = ecg_hr_features.merge(ecg_temp, how = "left", right_index = True, left_index = True).\
                    set_index("Datetime", append = True)

In [20]:
ecg_hr_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,ecg_heart_rate
Participant,type,context,Datetime_sub,Datetime,Unnamed: 5_level_1
22,2D,StartT1,00:00:00,2019-08-22 13:37:44.648,81.648523
22,2D,StartT1,00:00:05,2019-08-22 13:37:49.792,82.938389
22,2D,StartT1,00:00:10,2019-08-22 13:37:54.856,82.872928
22,2D,StartT1,00:00:15,2019-08-22 13:37:59.924,82.482325
22,2D,StartT1,00:00:20,2019-08-22 13:38:05.016,86.065574


### ECG_HRV features

Creating a separate dataframe for datetimes for ECG heart rate variability

In [21]:
ecg_hrv_temp = temp_time_diff.groupby(["Participant","type","context", pd.Grouper(freq = "20s", level = "Datetime_sub")]).first()
ecg_hrv_temp.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Datetime
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
22,2D,StartT1,00:00:00,2019-08-22 13:37:44.648
22,2D,StartT1,00:00:20,2019-08-22 13:38:05.016
22,2D,StartT1,00:00:40,2019-08-22 13:38:24.736
22,2D,StartT1,00:01:00,2019-08-22 13:38:45.292
22,2D,StartT1,00:01:20,2019-08-22 13:39:05.180


In [22]:
ecg_hrv_features = time_diff_new.groupby(["Participant","type","context",
                                          pd.Grouper(freq = "20s", level = "Datetime_sub")], sort = True).std()
ecg_hrv_features.rename(columns = {"time_differences":"ecg_hrv_std"}, inplace = True)
ecg_hrv_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_hrv_std
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
22,2D,StartT1,00:00:00,10.253919
22,2D,StartT1,00:00:20,27.651886
22,2D,StartT1,00:00:40,17.838191
22,2D,StartT1,00:01:00,24.272902
22,2D,StartT1,00:01:20,21.608271


Merging the features with its original Datetime

In [23]:
ecg_hrv_features = ecg_hrv_features.merge(ecg_hrv_temp, how = "left", right_index = True, left_index = True).\
                    set_index("Datetime", append = True)

In [24]:
ecg_hrv_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,ecg_hrv_std
Participant,type,context,Datetime_sub,Datetime,Unnamed: 5_level_1
22,2D,StartT1,00:00:00,2019-08-22 13:37:44.648,10.253919
22,2D,StartT1,00:00:20,2019-08-22 13:38:05.016,27.651886
22,2D,StartT1,00:00:40,2019-08-22 13:38:24.736,17.838191
22,2D,StartT1,00:01:00,2019-08-22 13:38:45.292,24.272902
22,2D,StartT1,00:01:20,2019-08-22 13:39:05.180,21.608271


# Skin Temperature

Creating a separate dataframe for datetimes for Skin Temperature

In [25]:
skt_temp = temp_time_diff.groupby(["Participant","type","context", pd.Grouper(freq = "10s", level = "Datetime_sub")]).first()
skt_temp.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Datetime
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
22,2D,StartT1,00:00:00,2019-08-22 13:37:44.648
22,2D,StartT1,00:00:10,2019-08-22 13:37:54.856
22,2D,StartT1,00:00:20,2019-08-22 13:38:05.016
22,2D,StartT1,00:00:30,2019-08-22 13:38:14.976
22,2D,StartT1,00:00:40,2019-08-22 13:38:24.736


In [26]:
skin_temperature = t[['1-SKTA']]
skin_temperature = skin_temperature.groupby(["Participant","type","context", pd.Grouper(freq = "10s", level = "Datetime_sub")]).mean()
skin_temperature.rename(columns = {"1-SKTA":"skin_temperature"}, inplace = True)
display(skin_temperature.head())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,skin_temperature
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
22,2D,StartT1,00:00:00,30.371319
22,2D,StartT1,00:00:10,30.383633
22,2D,StartT1,00:00:20,30.383632
22,2D,StartT1,00:00:30,30.401948
22,2D,StartT1,00:00:40,30.404511


Merging the features with its original Datetime

In [27]:
skin_temperature = skin_temperature.merge(skt_temp, how = "left", right_index = True, left_index = True).\
                    set_index("Datetime", append = True)

In [28]:
skin_temperature.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,skin_temperature
Participant,type,context,Datetime_sub,Datetime,Unnamed: 5_level_1
22,2D,StartT1,00:00:00,2019-08-22 13:37:44.648,30.371319
22,2D,StartT1,00:00:10,2019-08-22 13:37:54.856,30.383633
22,2D,StartT1,00:00:20,2019-08-22 13:38:05.016,30.383632
22,2D,StartT1,00:00:30,2019-08-22 13:38:14.976,30.401948
22,2D,StartT1,00:00:40,2019-08-22 13:38:24.736,30.404511


# EDA

### EDA Mean

Creating a separate dataframe for datetimes of EDA

In [29]:
eda_temp = temp_time_diff.groupby(["Participant","type","context", pd.Grouper(freq = "30s", level = "Datetime_sub")]).first()
eda_temp.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Datetime
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
22,2D,StartT1,00:00:00,2019-08-22 13:37:44.648
22,2D,StartT1,00:00:30,2019-08-22 13:38:14.976
22,2D,StartT1,00:01:00,2019-08-22 13:38:45.292
22,2D,StartT1,00:01:30,2019-08-22 13:39:15.332
22,2D,StartT1,00:02:00,2019-08-22 13:39:44.804


In [30]:
#EDA MEAN
eda_features = t[['EDA']]
eda_features = eda_features.groupby(["Participant","type","context", pd.Grouper(freq = "30s", level = "Datetime_sub")]).mean()
eda_features.rename(columns = {"EDA":"eda_mean"}, inplace = True)
display(eda_features.head())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,eda_mean
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
22,2D,StartT1,00:00:00,22.6476
22,2D,StartT1,00:00:30,22.158941
22,2D,StartT1,00:01:00,23.412629
22,2D,StartT1,00:01:30,22.855152
22,2D,StartT1,00:02:00,25.843336


### EDA Number of Peaks

In [31]:
def get_eda_features(eda):
    if len(eda > 15):
        try:
            features = nk.eda_process(eda = eda, sampling_rate = 250)
        except:
#             print("No peaks here")
            return 0
    else:
        return 0
    peaks = len(features['EDA']['SCR_Peaks_Indexes'])
    return peaks

In [32]:
#Number of peaks 
eda_peaks_df = pd.DataFrame([])
for participant in participants:
    data = t.loc(axis = 0)[participant,:,:,:]
#     display(data.head())
    d = data['EDA'].groupby(["Participant","type","context", pd.Grouper(freq = '30s', level = "Datetime_sub")]).apply(lambda x: get_eda_features(x))
    d = pd.DataFrame(d)
    eda_peaks_df = pd.concat([eda_peaks_df, d])
# t['EDA'].groupby(["Participant","type","context", pd.Grouper(freq = '30s', level = "Datetime_sub")]).apply(lambda x: get_eda_features(x))

In [33]:
eda_peaks_df.rename(columns = {"EDA":"eda_no_of_peaks"}, inplace = True)
eda_features = eda_features.merge(eda_peaks_df, how = "left",right_index = True, left_index = True)

In [34]:
eda_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,eda_mean,eda_no_of_peaks
Participant,type,context,Datetime_sub,Unnamed: 4_level_1,Unnamed: 5_level_1
22,2D,StartT1,00:00:00,22.6476,4
22,2D,StartT1,00:00:30,22.158941,7
22,2D,StartT1,00:01:00,23.412629,6
22,2D,StartT1,00:01:30,22.855152,6
22,2D,StartT1,00:02:00,25.843336,2


### EDA Power Spectrum

In [35]:
def compute_power_spectra(signal, band):
    try:
        x = np.fft.fft(signal - np.nanmean(signal))
        x = np.abs(x)
        freq = np.fft.fftfreq(len(signal), d = 0.004)
    except:
        return 0
    low, high = np.array(band)
    vals = [i for i in x if high>i>low]
    idx = [list(x).index(i) for i in vals]

    frequencies = freq[idx]
    power = np.sum((frequencies/len(signal))**2)
    return power

In [36]:
def compute_frequency_band_power(signal, band, sampling_rate):
    freq, power = ss.periodogram(signal - np.nanmean(signal), sampling_rate)
    low_f1, low_f2, high_f1, high_f2 = np.array(band)
    lfp_idx  = np.where((freq>=low_f1) & (freq>=low_f2))[0]
    hfp_idx  = np.where((freq>=high_f1) & (freq>=high_f2))[0] 
    lfp = np.trapz(power[lfp_idx], x = freq[lfp_idx])
    hfp = np.trapz(power[hfp_idx], x = freq[hfp_idx])
    
    return [lfp, hfp, lfp/hfp]

In [37]:
#High Frequecy and Low Frequency
bands =  [0.045, 0.15, 0.15,0.25]
eda_powers_df = pd.DataFrame([])
for participant in participants:
    data = t.loc(axis = 0)[participant,:,:,:]
    eda_powers = data["EDA"].groupby(["Participant","type","context",pd.Grouper(freq = "30s", level = "Datetime_sub")]).\
                apply(lambda x:compute_frequency_band_power(x,bands,250) if len(x)>1 else pd.Series(np.nan))
    eda_powers = pd.DataFrame(eda_powers)
    eda_powers = pd.DataFrame(list(eda_powers["EDA"].values), index = eda_powers.index, 
                              columns = ["eda_lf","eda_hf","eda_lf_hf_ratio"])
    eda_powers_df = pd.concat([eda_powers_df, eda_powers])
   

In [38]:
eda_powers_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,type,context,Datetime_sub,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
22,2D,StartT1,00:00:00,0.011341,0.002265,5.007546
22,2D,StartT1,00:00:30,0.004952,0.003437,1.440806
22,2D,StartT1,00:01:00,0.024106,0.00633,3.808441
22,2D,StartT1,00:01:30,0.185619,0.066398,2.795536
22,2D,StartT1,00:02:00,0.038494,0.023487,1.638928


In [39]:
eda_features = eda_features.merge(eda_powers_df, how = 'left', right_index = True, left_index = True)

In [40]:
eda_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,type,context,Datetime_sub,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
22,2D,StartT1,00:00:00,22.6476,4,0.011341,0.002265,5.007546
22,2D,StartT1,00:00:30,22.158941,7,0.004952,0.003437,1.440806
22,2D,StartT1,00:01:00,23.412629,6,0.024106,0.00633,3.808441
22,2D,StartT1,00:01:30,22.855152,6,0.185619,0.066398,2.795536
22,2D,StartT1,00:02:00,25.843336,2,0.038494,0.023487,1.638928


Merging the features with its original Datetime

In [41]:
eda_features = eda_features.merge(eda_temp, how = "left", right_index = True, left_index = True).\
                    set_index("Datetime", append = True)

In [42]:
eda_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,type,context,Datetime_sub,Datetime,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
22,2D,StartT1,00:00:00,2019-08-22 13:37:44.648,22.6476,4,0.011341,0.002265,5.007546
22,2D,StartT1,00:00:30,2019-08-22 13:38:14.976,22.158941,7,0.004952,0.003437,1.440806
22,2D,StartT1,00:01:00,2019-08-22 13:38:45.292,23.412629,6,0.024106,0.00633,3.808441
22,2D,StartT1,00:01:30,2019-08-22 13:39:15.332,22.855152,6,0.185619,0.066398,2.795536
22,2D,StartT1,00:02:00,2019-08-22 13:39:44.804,25.843336,2,0.038494,0.023487,1.638928


### Removing the "Start" and "Stop" from the events

In [43]:
def change_events(df, log_events, time_window):
    df = df.reset_index("context")
    s = [str(x).replace('Start','') for x in df['context'].values]
    s = [str(x).replace('Stop','') for x in s]
    s = [str(x).replace('nan','Garbage') for x in s]
    df['context'] = s
    df = df[df['context']!="FrontBuffer"]
    df = df[df['context']!="EndBuffer"]
    df = df.reset_index().set_index(["Participant","type","context","Datetime_sub"])
    return df

In [45]:
# features_dataframe = {"ecg_hr":ecg_hr_features, "ecg_std":ecg_hrv_features, "skt":skin_temperature, "eda":eda_features}
# time_windows = {"ecg_hr":"5s", "ecg_std":"20s", "skt":"10s", "eda": "30s"}
# all_features = pd.DataFrame([])
# flag = False
# for index, data in features_dataframe.items():
#     print(index)
#     t = time_windows[index]
#     f = change_events(data, log_events, t)
#     display(f.head())
    


In [46]:
all_features = ecg_hr_features.merge(ecg_hrv_features, how = "left", right_index = True, left_index = True)
all_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,ecg_heart_rate,ecg_hrv_std
Participant,type,context,Datetime_sub,Datetime,Unnamed: 5_level_1,Unnamed: 6_level_1
22,2D,StartT1,00:00:00,2019-08-22 13:37:44.648,81.648523,10.253919
22,2D,StartT1,00:00:05,2019-08-22 13:37:49.792,82.938389,
22,2D,StartT1,00:00:10,2019-08-22 13:37:54.856,82.872928,
22,2D,StartT1,00:00:15,2019-08-22 13:37:59.924,82.482325,
22,2D,StartT1,00:00:20,2019-08-22 13:38:05.016,86.065574,27.651886


In [47]:
all_features = all_features.merge(skin_temperature, how = "left", right_index = True, left_index = True)
all_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,ecg_heart_rate,ecg_hrv_std,skin_temperature
Participant,type,context,Datetime_sub,Datetime,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
22,2D,StartT1,00:00:00,2019-08-22 13:37:44.648,81.648523,10.253919,30.371319
22,2D,StartT1,00:00:05,2019-08-22 13:37:49.792,82.938389,,
22,2D,StartT1,00:00:10,2019-08-22 13:37:54.856,82.872928,,30.383633
22,2D,StartT1,00:00:15,2019-08-22 13:37:59.924,82.482325,,
22,2D,StartT1,00:00:20,2019-08-22 13:38:05.016,86.065574,27.651886,30.383632


In [48]:
all_features = all_features.merge(eda_features, how = "left", right_index = True, left_index = True)
all_features.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,ecg_heart_rate,ecg_hrv_std,skin_temperature,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,type,context,Datetime_sub,Datetime,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
22,2D,StartT1,00:00:00,2019-08-22 13:37:44.648,81.648523,10.253919,30.371319,22.6476,4.0,0.011341,0.002265,5.007546
22,2D,StartT1,00:00:05,2019-08-22 13:37:49.792,82.938389,,,,,,,
22,2D,StartT1,00:00:10,2019-08-22 13:37:54.856,82.872928,,30.383633,,,,,
22,2D,StartT1,00:00:15,2019-08-22 13:37:59.924,82.482325,,,,,,,
22,2D,StartT1,00:00:20,2019-08-22 13:38:05.016,86.065574,27.651886,30.383632,,,,,
22,2D,StartT1,00:00:25,2019-08-22 13:38:09.896,82.677165,,,,,,,
22,2D,StartT1,00:00:30,2019-08-22 13:38:14.976,80.152672,,30.401948,22.158941,7.0,0.004952,0.003437,1.440806
22,2D,StartT1,00:00:35,2019-08-22 13:38:20.216,79.646018,,,,,,,
22,2D,StartT1,00:00:40,2019-08-22 13:38:24.736,82.807571,17.838191,30.404511,,,,,
22,2D,StartT1,00:00:45,2019-08-22 13:38:29.808,80.769231,,,,,,,


In [49]:
# all_features.drop("context",axis = 1, inplace = True)
all_features.reset_index("context", inplace = True)
s = [str(x).replace('Start','') for x in all_features['context'].values]
s = [str(x).replace('Stop','') for x in s]
s = [str(x).replace('nan','Garbage') for x in s]
all_features['context'] = s
all_features = all_features[all_features["context"]!="EndBuffer"]
all_features = all_features[all_features['context']!="FrontBuffer"]
all_features = all_features.reset_index().set_index(["Participant","type","context","Datetime"])
all_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Datetime_sub,ecg_heart_rate,ecg_hrv_std,skin_temperature,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,type,context,Datetime,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
22,2D,T1,2019-08-22 13:37:44.648,00:00:00,81.648523,10.253919,30.371319,22.6476,4.0,0.011341,0.002265,5.007546
22,2D,T1,2019-08-22 13:37:49.792,00:00:05,82.938389,,,,,,,
22,2D,T1,2019-08-22 13:37:54.856,00:00:10,82.872928,,30.383633,,,,,
22,2D,T1,2019-08-22 13:37:59.924,00:00:15,82.482325,,,,,,,
22,2D,T1,2019-08-22 13:38:05.016,00:00:20,86.065574,27.651886,30.383632,,,,,


In [50]:
all_features = all_features.reset_index("Datetime").groupby(["Participant","type","context"], as_index = False).apply(lambda x: x.head(24))

In [51]:
all_features = all_features.reset_index().drop("level_0", axis = 1).\
        rename(columns = {"type":"Type"})
all_features = all_features[all_features['context']!="Garbage"]

all_features.set_index(["Participant","Type","context","Datetime"], inplace = True)
all_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Datetime_sub,ecg_heart_rate,ecg_hrv_std,skin_temperature,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,Type,context,Datetime,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
22,2D,T1,2019-08-22 13:37:44.648,00:00:00,81.648523,10.253919,30.371319,22.6476,4.0,0.011341,0.002265,5.007546
22,2D,T1,2019-08-22 13:37:49.792,00:00:05,82.938389,,,,,,,
22,2D,T1,2019-08-22 13:37:54.856,00:00:10,82.872928,,30.383633,,,,,
22,2D,T1,2019-08-22 13:37:59.924,00:00:15,82.482325,,,,,,,
22,2D,T1,2019-08-22 13:38:05.016,00:00:20,86.065574,27.651886,30.383632,,,,,


In [52]:
d = all_features.copy()
d.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Datetime_sub,ecg_heart_rate,ecg_hrv_std,skin_temperature,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,Type,context,Datetime,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
22,2D,T1,2019-08-22 13:37:44.648,00:00:00,81.648523,10.253919,30.371319,22.6476,4.0,0.011341,0.002265,5.007546
22,2D,T1,2019-08-22 13:37:49.792,00:00:05,82.938389,,,,,,,
22,2D,T1,2019-08-22 13:37:54.856,00:00:10,82.872928,,30.383633,,,,,
22,2D,T1,2019-08-22 13:37:59.924,00:00:15,82.482325,,,,,,,
22,2D,T1,2019-08-22 13:38:05.016,00:00:20,86.065574,27.651886,30.383632,,,,,


In [53]:
all_features = all_features.reset_index().set_index(["Participant","Datetime","Type","context"]).sort_index(sort_remaining = True)
all_features = all_features.reset_index().set_index(["Participant","Type","context","Datetime"])

In [54]:
all_features.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Datetime_sub,ecg_heart_rate,ecg_hrv_std,skin_temperature,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,Type,context,Datetime,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
22,3D,T3,2019-08-22 13:21:30.124,00:00:00,79.787234,37.083413,29.483117,25.693551,8.0,0.094892,0.029017,3.270236
22,3D,T3,2019-08-22 13:21:35.388,00:00:05,85.365854,,,,,,,
22,3D,T3,2019-08-22 13:21:40.308,00:00:10,79.125848,,29.459804,,,,,


In [55]:
all_features.index.get_level_values("context").unique()

Index(['T3', 'T2', 'T4', 'T1'], dtype='object', name='context')

In [56]:
t3 = pd.DataFrame([])
for participant in participants:
    t2 = pd.DataFrame([])
    counter = 1
    sessions = all_features.loc(axis = 0)[participant,:,:,:].index.get_level_values('Type').unique()
    for k, session in enumerate(sessions):
        t1 = pd.DataFrame([])
        events = all_features.loc(axis = 0)[participant,session,:,:].index.get_level_values('context').unique()
        for j,event in enumerate(events):
            d = all_features.loc(axis = 0)[participant,session,event,:]
            d['feature_time'] = np.arange(0,len(d['ecg_heart_rate'].values))
            d['event_order'] = j+1
            d['event_session_order'] = counter
            counter+=1
            t1 = pd.concat([t1,d])
#             break
#             display(t1)
        t1['session_order'] = k+1
#         print(k)
        t2 = pd.concat([t2,t1])
#         break
    t3 = pd.concat([t3,t2])
#     break

In [57]:
t3.drop("Datetime_sub", axis = 1, inplace = True)

In [58]:
t3.tail(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate,ecg_hrv_std,skin_temperature,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio,feature_time,event_order,event_session_order,session_order
Participant,Type,context,Datetime,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
41,3D,T4,2019-09-13 14:36:44.028,82.417582,,33.326687,4.445218,2.0,0.004532,0.003427,1.322657,18,3,7,2
41,3D,T4,2019-09-13 14:36:48.396,82.482325,,,,,,,,19,3,7,2
41,3D,T4,2019-09-13 14:36:53.488,82.872928,21.081851,33.446908,,,,,,20,3,7,2
41,3D,T4,2019-09-13 14:36:58.556,84.0,,,,,,,,21,3,7,2
41,3D,T4,2019-09-13 14:37:03.556,82.159624,,33.578331,,,,,,22,3,7,2
41,3D,T4,2019-09-13 14:37:08.668,82.482325,,,,,,,,23,3,7,2
41,3D,T2,2019-09-13 14:37:23.276,82.807571,25.522684,33.713939,4.403788,6.0,0.007735,0.004414,1.752248,0,4,8,2
41,3D,T2,2019-09-13 14:37:28.348,82.742317,,,,,,,,1,4,8,2
41,3D,T2,2019-09-13 14:37:33.424,83.06962,,33.692893,,,,,,2,4,8,2
41,3D,T2,2019-09-13 14:37:38.480,82.417582,,,,,,,,3,4,8,2


In [59]:
feature_names = all_features.columns[1:len(all_features.columns)]
feature_names

Index(['ecg_heart_rate', 'ecg_hrv_std', 'skin_temperature', 'eda_mean',
       'eda_no_of_peaks', 'eda_lf', 'eda_hf', 'eda_lf_hf_ratio'],
      dtype='object')

In [60]:
t3.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate,ecg_hrv_std,skin_temperature,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio,feature_time,event_order,event_session_order,session_order
Participant,Type,context,Datetime,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
22,3D,T3,2019-08-22 13:21:30.124,79.787234,37.083413,29.483117,25.693551,8.0,0.094892,0.029017,3.270236,0,1,1,1
22,3D,T3,2019-08-22 13:21:35.388,85.365854,,,,,,,,1,1,1,1
22,3D,T3,2019-08-22 13:21:40.308,79.125848,,29.459804,,,,,,2,1,1,1
22,3D,T3,2019-08-22 13:21:45.616,77.922078,,,,,,,,3,1,1,1
22,3D,T3,2019-08-22 13:21:50.236,76.642336,21.468546,29.45924,,,,,,4,1,1,1


In [61]:
# data_full = t3.copy()
# cleaned_features = pd.DataFrame([])
# for participant in participants:
#     data = data_full.loc(axis =  0)[participant,:,:,:]
#     for feature in feature_names:
#         values = data[feature].values
#         values_mean = np.nanmean(values)
#         upper_threshold = values_mean + (3*np.std(values))
#         lower_threshold = values_mean - (3*np.std(values))
#         v = [x if (x > lower_threshold and x < upper_threshold) else np.nan for x in values]
#         data[feature] = v
#     cleaned_features = pd.concat([cleaned_features,data])
    

In [64]:
t3.head(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate,ecg_hrv_std,skin_temperature,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio,feature_time,event_order,event_session_order,session_order
Participant,Type,context,Datetime,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
22,3D,T3,2019-08-22 13:21:30.124,79.787234,37.083413,29.483117,25.693551,8.0,0.094892,0.029017,3.270236,0,1,1,1
22,3D,T3,2019-08-22 13:21:35.388,85.365854,,,,,,,,1,1,1,1
22,3D,T3,2019-08-22 13:21:40.308,79.125848,,29.459804,,,,,,2,1,1,1
22,3D,T3,2019-08-22 13:21:45.616,77.922078,,,,,,,,3,1,1,1
22,3D,T3,2019-08-22 13:21:50.236,76.642336,21.468546,29.45924,,,,,,4,1,1,1
22,3D,T3,2019-08-22 13:21:55.716,77.653149,,,,,,,,5,1,1,1
22,3D,T3,2019-08-22 13:22:00.352,75.268817,,29.458863,24.869057,8.0,0.071251,0.018265,3.900921,6,1,1,1
22,3D,T3,2019-08-22 13:22:05.932,77.854671,,,,,,,,7,1,1,1
22,3D,T3,2019-08-22 13:22:10.556,75.885329,26.610813,29.485471,,,,,,8,1,1,1
22,3D,T3,2019-08-22 13:22:15.300,76.866764,,,,,,,,9,1,1,1


In [65]:
def filter_features(data):
    features_names = ['ecg_heart_rate', 'ecg_hrv_std', 'skin_temperature', 'eda_mean',
                       'eda_no_of_peaks', 'eda_lf', 'eda_hf', 'eda_lf_hf_ratio']
    for feature in feature_names:
        values = data[[feature]].values
        values_mean = np.nanmean(values)
        upper_threshold = values_mean + (3*np.nanstd(values))
        lower_threshold = values_mean - (3*np.nanstd(values))
        v = [x[0] if (x > lower_threshold and x < upper_threshold) else np.nan for x in values]
        data[feature] = v
    return data

In [66]:
data_full = t3.groupby("Participant").apply(lambda x: filter_features(x))

In [67]:
data_full.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate,ecg_hrv_std,skin_temperature,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio,feature_time,event_order,event_session_order,session_order
Participant,Type,context,Datetime,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
22,3D,T3,2019-08-22 13:21:30.124,79.787234,37.083413,29.483117,25.693551,8.0,0.094892,0.029017,3.270236,0,1,1,1
22,3D,T3,2019-08-22 13:21:35.388,85.365854,,,,,,,,1,1,1,1
22,3D,T3,2019-08-22 13:21:40.308,79.125848,,29.459804,,,,,,2,1,1,1
22,3D,T3,2019-08-22 13:21:45.616,77.922078,,,,,,,,3,1,1,1
22,3D,T3,2019-08-22 13:21:50.236,76.642336,21.468546,29.45924,,,,,,4,1,1,1


In [68]:
joblib.dump(data_full, "/mnt/shared_drive/data/brain_therapy/BT_MVP_Content_Pilot/MVP_Working_Memory/pkls/features.pkl")

['/mnt/shared_drive/data/brain_therapy/BT_MVP_Content_Pilot/MVP_Working_Memory/pkls/features.pkl']

In [None]:
feature_anove = pd.read_csv("/mnt/bkt_prd_dsv_bbrain_therapy_raw")

In [None]:
t3.loc(axis = 0)['0023',:,:,:]