In [1]:
%matplotlib inline

In [2]:
import os 
import sys
import json
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import neurokit as nk
import scipy as sc
import math 
import scipy.signal as ss
import warnings
import itertools

from scipy import signal
warnings.filterwarnings('ignore')

In [None]:
path = "/mnt/shared_drive/data/brain_therapy/Brain_Therapy_MVP_Content_Pilot/FA_corrected_signals"
files = os.listdir(path)

In [None]:
sensor_data = pd.DataFrame([])
for file in files:
    data = pd.read_csv(os.path.join(path,file))
    data['Participant'] = file.split("-")[3].split(".")[0]
    data['Datetime'] = pd.to_datetime(data['Datetime'])
    data.set_index(['Participant','Datetime'], inplace = True)
    sensor_data = pd.concat([sensor_data,data])

In [None]:
participants = sensor_data.index.get_level_values('Participant').unique().values
participants = sorted(participants)
sensor_data.sort_index(sort_remaining = True, inplace = True)
sensor_data.head()

In [3]:
log_events = joblib.load("/mnt/shared_drive/data/brain_therapy/log_events.pkl")

In [4]:
log_events = log_events.reset_index().rename(columns = {'participant':"Participant", "time":"Datetime"}).set_index(["Participant","Datetime"])
log_events.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,type,context
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2019-07-10 15:25:32,2D,StartFrontBuffer
1,2019-07-10 15:27:32,2D,StopFrontBuffer
1,2019-07-10 15:27:32,2D,StartLL
1,2019-07-10 15:29:42,2D,StopLL
1,2019-07-10 15:29:42,2D,StartHH


### Using the curated signals to compute features

In [5]:
curated_signals = joblib.load("/mnt/shared_drive/data/brain_therapy/Brain_Therapy_MVP_Content_Pilot/FA_corrected_signals_amplitude.pkl")

In [6]:
p = curated_signals.keys()
curated_df = pd.DataFrame([])
for part in p:
    data = curated_signals[part]
    data['Participant'] = part.split("-")[3]
    curated_df = pd.concat([curated_df,data])
curated_df.set_index(["Participant","Datetime"], inplace = True)
curated_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,RSP,PPG,1-SKTA,ECG,EDA
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,2019-07-10 15:22:31.000,-1.263123,0.003052,12.941162,-0.234833,0.041124
1,2019-07-10 15:22:31.004,-1.261902,0.003662,12.945435,-0.226593,0.042649
1,2019-07-10 15:22:31.008,-1.264648,0.002136,12.946045,-0.215607,0.044175
1,2019-07-10 15:22:31.012,-1.261902,0.003662,12.945435,-0.206604,0.039598
1,2019-07-10 15:22:31.016,-1.262207,0.003357,12.946045,-0.198364,0.042649


In [7]:
temp_df = curated_df.merge(log_events, how = "left", right_index = True, left_index = True)

In [222]:
t = temp_df.copy()

In [223]:
t['context'] = t['context'].ffill()
t['type'] = t['type'].ffill()
t['context'] = t['context'].fillna("Garbage")
t['type'] = t['type'].fillna("Garbage")

In [224]:
# t = t[pd.notnull(t['context'])]
t = t.reset_index().set_index(["Participant","type","context","Datetime"])
t.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,RSP,PPG,1-SKTA,ECG,EDA
Participant,type,context,Datetime,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Garbage,Garbage,2019-07-10 15:22:31.000,-1.263123,0.003052,12.941162,-0.234833,0.041124
1,Garbage,Garbage,2019-07-10 15:22:31.004,-1.261902,0.003662,12.945435,-0.226593,0.042649
1,Garbage,Garbage,2019-07-10 15:22:31.008,-1.264648,0.002136,12.946045,-0.215607,0.044175
1,Garbage,Garbage,2019-07-10 15:22:31.012,-1.261902,0.003662,12.945435,-0.206604,0.039598
1,Garbage,Garbage,2019-07-10 15:22:31.016,-1.262207,0.003357,12.946045,-0.198364,0.042649


In [73]:
x_temp = t.copy()
x_temp.reset_index("Datetime", inplace = True)
x_new = pd.DataFrame(x_temp[["Datetime"]].groupby(["Participant","type","context"]).apply(lambda x: x['Datetime'].values[0]), columns = ["time_zero"])

In [74]:
x_new.head()
x_temp.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Datetime,RSP,PPG,1-SKTA,ECG,EDA
Participant,type,context,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Garbage,Garbage,2019-07-10 15:22:31.000,-1.263123,0.003052,12.941162,-0.234833,0.041124
1,Garbage,Garbage,2019-07-10 15:22:31.004,-1.261902,0.003662,12.945435,-0.226593,0.042649
1,Garbage,Garbage,2019-07-10 15:22:31.008,-1.264648,0.002136,12.946045,-0.215607,0.044175
1,Garbage,Garbage,2019-07-10 15:22:31.012,-1.261902,0.003662,12.945435,-0.206604,0.039598
1,Garbage,Garbage,2019-07-10 15:22:31.016,-1.262207,0.003357,12.946045,-0.198364,0.042649


In [94]:
x_new2 = x_temp.merge(x_new, how = "left", right_index = True, left_index = True)
x_new2["Datetime_sub"] = x_new2["Datetime"] - x_new2["time_zero"]
x_new2.set_index(["Datetime_sub","Datetime"], append = True, inplace = True)
t = x_new2.copy()

In [227]:
t.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,RSP,PPG,1-SKTA,ECG,EDA,time_zero
Participant,type,context,Datetime_sub,Datetime,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,2D,StartEndBuffer,00:00:00,2019-07-10 15:36:12.000,-0.8078,0.002441,12.976562,-0.327148,3.355333,2019-07-10 15:36:12
1,2D,StartEndBuffer,00:00:00.004000,2019-07-10 15:36:12.004,-0.794067,0.003052,12.979004,-0.327454,3.355333,2019-07-10 15:36:12
1,2D,StartEndBuffer,00:00:00.008000,2019-07-10 15:36:12.008,-0.787659,0.003357,12.977173,-0.327148,3.356858,2019-07-10 15:36:12
1,2D,StartEndBuffer,00:00:00.012000,2019-07-10 15:36:12.012,-0.779114,0.003052,12.979004,-0.327454,3.35991,2019-07-10 15:36:12
1,2D,StartEndBuffer,00:00:00.016000,2019-07-10 15:36:12.016,-0.772705,0.002747,12.978394,-0.323334,3.358384,2019-07-10 15:36:12


# ECG

In [81]:
participants = t.index.get_level_values("Participant").unique()

In [82]:
good_peaks = joblib.load("/mnt/shared_drive/data/brain_therapy/MVP_Focused_Attention/FA_r_peaks_index.pkl")

Computing ECG Heart Rate and Heart Rate Variability

In [233]:
ecg_hr_features = pd.DataFrame([])
ecg_hrv_features = pd.DataFrame([])
time_diff_df = pd.DataFrame([])
for participant in participants:
    data = t.loc(axis = 0)[participant,:,:,:,:]
    ecg_peaks = good_peaks["BT-MVP-FA-{}".format(participant)]
    time_stamps = data.index.get_level_values('Datetime')[ecg_peaks]
    d = data.loc(axis = 0)[participant,:,:,:,time_stamps]
    
    
    #Computing time differences
    try:
        d['time_differences'] = np.concatenate([np.diff(time_stamps)/np.timedelta64(1,'ms'), [0]])
    except:
        d['time_differences'] = np.concatenate([np.diff(time_stamps)/np.timedelta64(1,'ms'), [0,0]])

    d = d[['time_differences']]
    #Taking out time differences that is more than a minute
    df = d[d.time_differences<60000]
    diff_mean = np.mean(df.time_differences)
    upper_threshold = diff_mean + (3*np.std(df.time_differences))
    lower_threshold = diff_mean - (3*np.std(df.time_differences))
    df = df[(df.time_differences<upper_threshold) & (df.time_differences>lower_threshold)]
    
    # Drop NaN contexts
    temp = df.reset_index("context")
    temp = temp[pd.notnull(temp['context'])]
#     display(temp)
    df = temp.reset_index().set_index(["Participant","type","context","Datetime","Datetime_sub"])
    time_diff_df = pd.concat([time_diff_df, df])
    

#     break

In [243]:
time_diff_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Datetime,time_differences
Participant,type,context,Datetime_sub,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2D,StartEndBuffer,00:00:00.700000,2019-07-10 15:36:12.700,1064.0
1,2D,StartEndBuffer,00:00:01.764000,2019-07-10 15:36:13.764,1024.0
1,2D,StartEndBuffer,00:00:02.788000,2019-07-10 15:36:14.788,1016.0
1,2D,StartEndBuffer,00:00:03.804000,2019-07-10 15:36:15.804,1036.0
1,2D,StartEndBuffer,00:00:04.840000,2019-07-10 15:36:16.840,940.0


In [261]:
y_temp = time_diff_df.reset_index("Datetime_sub")
y_new = pd.DataFrame(y_temp[["Datetime_sub"]].groupby(["Participant","type","context"]).apply(lambda x: x['Datetime_sub'].values[0]),
                     columns = ["time_zero"])
y_new2 = y_temp.merge(y_new, how = "left", right_index = True, left_index = True)
y_new2["Datetime_sub"] = y_new2["Datetime_sub"] - y_new2["time_zero"]
y_new2.set_index(["Datetime_sub","Datetime"], append = True, inplace = True)
time_diff_new = y_new2.copy()

In [262]:
time_diff_new.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,time_differences,time_zero
Participant,type,context,Datetime_sub,Datetime,Unnamed: 5_level_1,Unnamed: 6_level_1
1,2D,StartEndBuffer,00:00:00,2019-07-10 15:36:12.700,1064.0,00:00:00.700000
1,2D,StartEndBuffer,00:00:01.064000,2019-07-10 15:36:13.764,1024.0,00:00:00.700000
1,2D,StartEndBuffer,00:00:02.088000,2019-07-10 15:36:14.788,1016.0,00:00:00.700000
1,2D,StartEndBuffer,00:00:03.104000,2019-07-10 15:36:15.804,1036.0,00:00:00.700000
1,2D,StartEndBuffer,00:00:04.140000,2019-07-10 15:36:16.840,940.0,00:00:00.700000


In [270]:
ecg_hr_features = time_diff_new.groupby(["Participant","type","context",pd.Grouper(freq = "5s", level = "Datetime_sub")], sort = True).mean()
ecg_hr_features['ecg_heart_rate'] = (60/ecg_hr_features['time_differences'])*1000
ecg_hr_features.drop("time_differences",axis = 1, inplace = True)
ecg_hr_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
1,2D,StartEndBuffer,00:00:00,59.055118
1,2D,StartEndBuffer,00:00:05,61.813187
1,2D,StartEndBuffer,00:00:10,62.866723
1,2D,StartEndBuffer,00:00:15,64.655172
1,2D,StartEndBuffer,00:00:20,74.196208


In [267]:
ecg_hrv_features = time_diff_new.groupby(["Participant","type","context",pd.Grouper(freq = "20s", level = "Datetime_sub")], sort = True).std()
ecg_hrv_features.rename(columns = {"time_differences":"ecg_hrv_std"}, inplace = True)
ecg_hrv_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_hrv_std
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
1,2D,StartEndBuffer,00:00:00,70.691281
1,2D,StartEndBuffer,00:00:20,64.967976
1,2D,StartEndBuffer,00:00:40,55.276714
1,2D,StartEndBuffer,00:01:00,48.497066
1,2D,StartEndBuffer,00:01:20,91.838782


# Skin Temperature

In [119]:
skin_temperature = t[['1-SKTA']]
skin_temperature = skin_temperature.groupby(["Participant","type","context", pd.Grouper(freq = "10s", level = "Datetime_sub")]).mean()
skin_temperature.rename(columns = {"1-SKTA":"skin_temperature"}, inplace = True)
display(skin_temperature.head())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,skin_temperature
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
1,2D,StartEndBuffer,00:00:00,12.96783
1,2D,StartEndBuffer,00:00:10,12.959119
1,2D,StartEndBuffer,00:00:20,12.959495
1,2D,StartEndBuffer,00:00:30,12.967094
1,2D,StartEndBuffer,00:00:40,12.966464


In [120]:
len(skin_temperature.loc(axis = 0)["0021","2D","StartLL",:])

13

# EDA

In [121]:
#EDA MEAN
eda_features = t[['EDA']]
eda_features = eda_features.groupby(["Participant","type","context", pd.Grouper(freq = "30s", level = "Datetime_sub")]).mean()
eda_features.rename(columns = {"EDA":"eda_mean"}, inplace = True)
display(eda_features.head())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,eda_mean
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
1,2D,StartEndBuffer,00:00:00,3.677715
1,2D,StartEndBuffer,00:00:30,5.122383
1,2D,StartEndBuffer,00:01:00,5.102888
1,2D,StartEndBuffer,00:01:30,5.220675
1,2D,StartFrontBuffer,00:00:00,5.467291


In [122]:
len(skin_temperature.loc(axis = 0)["0001","3D","StartLL",:])

13

In [125]:
def get_eda_features(eda):
    if len(eda > 15):
        try:
            features = nk.eda_process(eda = eda, sampling_rate = 250)
        except:
#             print("No peaks here")
            return 0
    else:
        return 0
    peaks = len(features['EDA']['SCR_Peaks_Indexes'])
    return peaks

In [126]:
#Number of peaks 
eda_peaks_df = pd.DataFrame([])
for participant in participants:
    data = t.loc(axis = 0)[participant,:,:,:]
#     display(data.head())
    d = data['EDA'].groupby(["Participant","type","context", pd.Grouper(freq = '30s', level = "Datetime_sub")]).apply(lambda x: get_eda_features(x))
    d = pd.DataFrame(d)
    eda_peaks_df = pd.concat([eda_peaks_df, d])
# t['EDA'].groupby(["Participant","type","context", pd.Grouper(freq = '30s', level = "Datetime_sub")]).apply(lambda x: get_eda_features(x))

In [127]:
eda_peaks_df.rename(columns = {"EDA":"eda_no_of_peaks"}, inplace = True)
eda_features = eda_features.merge(eda_peaks_df, how = "left",right_index = True, left_index = True)

In [128]:
eda_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,eda_mean,eda_no_of_peaks
Participant,type,context,Datetime_sub,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2D,StartEndBuffer,00:00:00,3.677715,6
1,2D,StartEndBuffer,00:00:30,5.122383,4
1,2D,StartEndBuffer,00:01:00,5.102888,3
1,2D,StartEndBuffer,00:01:30,5.220675,7
1,2D,StartFrontBuffer,00:00:00,5.467291,3


In [None]:
def compute_power_spectra(signal, band):
    try:
        x = np.fft.fft(signal - np.nanmean(signal))
        x = np.abs(x)
        freq = np.fft.fftfreq(len(signal), d = 0.004)
    except:
        return 0
    low, high = np.array(band)
    vals = [i for i in x if high>i>low]
    idx = [list(x).index(i) for i in vals]

    frequencies = freq[idx]
    power = np.sum((frequencies/len(signal))**2)
    return power

In [129]:
def compute_frequency_band_power(signal, band, sampling_rate):
    freq, power = ss.periodogram(signal - np.nanmean(signal), sampling_rate)
    low_f1, low_f2, high_f1, high_f2 = np.array(band)
    lfp_idx  = np.where((freq>=low_f1) & (freq>=low_f2))[0]
    hfp_idx  = np.where((freq>=high_f1) & (freq>=high_f2))[0] 
    lfp = np.trapz(power[lfp_idx], x = freq[lfp_idx])
    hfp = np.trapz(power[hfp_idx], x = freq[hfp_idx])
    
    return [lfp, hfp, lfp/hfp]

In [214]:
#High Frequecy and Low Frequency
bands =  [0.045, 0.15, 0.15,0.25]
eda_powers_df = pd.DataFrame([])
for participant in participants:
    data = t.loc(axis = 0)[participant,:,:,:]
    eda_powers = data["EDA"].groupby(["Participant","type","context",pd.Grouper(freq = "30s", level = "Datetime_sub")]).\
                apply(lambda x:compute_frequency_band_power(x,bands,250) if len(x)>1 else pd.Series(np.nan))
    eda_powers = pd.DataFrame(eda_powers)
    eda_powers = pd.DataFrame(list(eda_powers["EDA"].values), index = eda_powers.index, 
                              columns = ["eda_lf","eda_hf","eda_lf_hf_ratio"])
    eda_powers_df = pd.concat([eda_powers_df, eda_powers])
   

In [215]:
eda_powers_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,type,context,Datetime_sub,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,2D,StartEndBuffer,00:00:00,0.037592,0.023888,1.57369
1,2D,StartEndBuffer,00:00:30,0.001944,0.000539,3.608991
1,2D,StartEndBuffer,00:01:00,0.007135,0.002519,2.832421
1,2D,StartEndBuffer,00:01:30,0.006044,0.003774,1.601694
1,2D,StartFrontBuffer,00:00:00,0.025428,0.019284,1.318618


In [216]:
eda_features.drop(["eda_lf","eda_hf","eda_lf_hf_ratio"], axis = 1, inplace = True)

In [217]:
eda_features = eda_features.merge(eda_powers_df, how = 'left', right_index = True, left_index = True)

In [218]:
eda_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,type,context,Datetime_sub,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,2D,StartEndBuffer,00:00:00,3.677715,6,0.037592,0.023888,1.57369
1,2D,StartEndBuffer,00:00:30,5.122383,4,0.001944,0.000539,3.608991
1,2D,StartEndBuffer,00:01:00,5.102888,3,0.007135,0.002519,2.832421
1,2D,StartEndBuffer,00:01:30,5.220675,7,0.006044,0.003774,1.601694
1,2D,StartFrontBuffer,00:00:00,5.467291,3,0.025428,0.019284,1.318618


In [201]:
ecg_hr_features_v2 = ecg_hr_features.copy()
ecg_hr_features_v2.reset_index("Datetime_sub", inplace = True)
ecg_hr_features_v2["Datetime_sub"] = ecg_hr_features_v2["Datetime_sub"] / np.timedelta64(1, 's')
ecg_hr_features_v2['Datetime_sub'] = np.round(ecg_hr_features_v2['Datetime_sub'])
ecg_hr_features_v2['Datetime_sub'] = pd.to_timedelta(ecg_hr_features_v2['Datetime_sub'], unit = 's')
# ecg_hr_features_v2.head()
ecg_hr_features_v2.set_index("Datetime_sub", append = True, inplace = True)
ecg_hr_features_v2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
1,2D,StartEndBuffer,00:00:00,59.055118
1,2D,StartEndBuffer,00:00:05,61.374795
1,2D,StartEndBuffer,00:00:10,63.184499
1,2D,StartEndBuffer,00:00:15,62.761506
1,2D,StartEndBuffer,00:00:20,73.529412


In [203]:
ecg_hrv_features_v2 = ecg_hrv_features.copy()
ecg_hrv_features_v2.reset_index("Datetime_sub", inplace = True)
ecg_hrv_features_v2["Datetime_sub"] = ecg_hrv_features_v2["Datetime_sub"] / np.timedelta64(1, 's')
ecg_hrv_features_v2['Datetime_sub'] = np.round(ecg_hrv_features_v2['Datetime_sub'])
ecg_hrv_features_v2['Datetime_sub'] = pd.to_timedelta(ecg_hrv_features_v2['Datetime_sub'], unit = 's')
# ecg_hr_features_v2.head()
ecg_hrv_features_v2.set_index("Datetime_sub", append = True, inplace = True)
ecg_hrv_features_v2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_hrv_std
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
1,2D,StartEndBuffer,00:00:00,63.71119
1,2D,StartEndBuffer,00:00:20,62.69671
1,2D,StartEndBuffer,00:00:40,55.093781
1,2D,StartEndBuffer,00:01:00,45.139898
1,2D,StartEndBuffer,00:01:20,90.796952


In [206]:
def change_events(df, log_events, time_window):
    df = df.reset_index("context")
    s = [str(x).replace('Start','') for x in df['context'].values]
    s = [str(x).replace('Stop','') for x in s]
    s = [str(x).replace('nan','Garbage') for x in s]
    df['context'] = s
    df = df[df['context']!="FrontBuffer"]
    df = df[df['context']!="EndBuffer"]
    df = df.reset_index().set_index(["Participant","type","context","Datetime_sub"])
    return df

In [276]:
features_dataframe = {"ecg_hr":ecg_hr_features, "ecg_std":ecg_hrv_features, "skt":skin_temperature, "eda":eda_features}
time_windows = {"ecg_hr":"5s", "ecg_std":"20s", "skt":"10s", "eda": "30s"}
all_features = pd.DataFrame([])
flag = False
for index, data in features_dataframe.items():
    print(index)
    t = time_windows[index]
    f = change_events(data, log_events, t)
    display(f.head())
    


ecg_hr


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
1,2D,HH,00:00:00,66.568047
1,2D,HH,00:00:05,68.545316
1,2D,HH,00:00:10,65.104167
1,2D,HH,00:00:15,61.425061
1,2D,HH,00:00:20,65.502183


ecg_std


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_hrv_std
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
1,2D,HH,00:00:00,60.411289
1,2D,HH,00:00:20,82.040176
1,2D,HH,00:00:40,83.371593
1,2D,HH,00:01:00,111.385606
1,2D,HH,00:01:20,68.275632


skt


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,skin_temperature
Participant,type,context,Datetime_sub,Unnamed: 4_level_1
1,2D,HH,00:00:00,12.959742
1,2D,HH,00:00:10,12.962748
1,2D,HH,00:00:20,12.959219
1,2D,HH,00:00:30,12.969762
1,2D,HH,00:00:40,12.963512


eda


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,type,context,Datetime_sub,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,2D,HH,00:00:00,3.248435,6,0.000444,0.000203,2.186737
1,2D,HH,00:00:30,2.994607,9,0.001145,0.00066,1.736156
1,2D,HH,00:01:00,2.745352,4,0.001074,0.000334,3.21584
1,2D,HH,00:01:30,2.567717,11,0.001928,0.001121,1.71934
1,2D,HH,00:02:00,2.327562,2,7.5e-05,6.2e-05,1.212903


In [323]:
all_features = ecg_hr_features.merge(ecg_hrv_features, how = "left", right_index = True, left_index = True)
all_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate,ecg_hrv_std
Participant,type,context,Datetime_sub,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2D,StartEndBuffer,00:00:00,59.055118,70.691281
1,2D,StartEndBuffer,00:00:05,61.813187,
1,2D,StartEndBuffer,00:00:10,62.866723,
1,2D,StartEndBuffer,00:00:15,64.655172,
1,2D,StartEndBuffer,00:00:20,74.196208,64.967976


In [324]:
all_features = all_features.merge(skin_temperature, how = "left", right_index = True, left_index = True)
all_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate,ecg_hrv_std,skin_temperature
Participant,type,context,Datetime_sub,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,2D,StartEndBuffer,00:00:00,59.055118,70.691281,12.96783
1,2D,StartEndBuffer,00:00:05,61.813187,,
1,2D,StartEndBuffer,00:00:10,62.866723,,12.959119
1,2D,StartEndBuffer,00:00:15,64.655172,,
1,2D,StartEndBuffer,00:00:20,74.196208,64.967976,12.959495


In [325]:
all_features = all_features.merge(eda_features, how = "left", right_index = True, left_index = True)
all_features.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate,ecg_hrv_std,skin_temperature,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,type,context,Datetime_sub,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,2D,StartEndBuffer,00:00:00,59.055118,70.691281,12.96783,3.677715,6.0,0.037592,0.023888,1.57369
1,2D,StartEndBuffer,00:00:05,61.813187,,,,,,,
1,2D,StartEndBuffer,00:00:10,62.866723,,12.959119,,,,,
1,2D,StartEndBuffer,00:00:15,64.655172,,,,,,,
1,2D,StartEndBuffer,00:00:20,74.196208,64.967976,12.959495,,,,,
1,2D,StartEndBuffer,00:00:25,69.821567,,,,,,,
1,2D,StartEndBuffer,00:00:30,66.617321,,12.967094,5.122383,4.0,0.001944,0.000539,3.608991
1,2D,StartEndBuffer,00:00:35,65.16073,,,,,,,
1,2D,StartEndBuffer,00:00:40,66.371681,55.276714,12.966464,,,,,
1,2D,StartEndBuffer,00:00:45,66.19594,,,,,,,


In [326]:
# all_features.drop("context",axis = 1, inplace = True)
all_features.reset_index("context", inplace = True)
s = [str(x).replace('Start','') for x in all_features['context'].values]
s = [str(x).replace('Stop','') for x in s]
s = [str(x).replace('nan','Garbage') for x in s]
all_features['context'] = s
all_features = all_features[all_features["context"]!="EndBuffer"]
all_features = all_features[all_features['context']!="FrontBuffer"]
all_features = all_features.reset_index().set_index(["Participant","type","context","Datetime_sub"])
all_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate,ecg_hrv_std,skin_temperature,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,type,context,Datetime_sub,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,2D,HH,00:00:00,66.568047,60.411289,12.959742,3.248435,6.0,0.000444,0.000203,2.186737
1,2D,HH,00:00:05,68.545316,,,,,,,
1,2D,HH,00:00:10,65.104167,,12.962748,,,,,
1,2D,HH,00:00:15,61.425061,,,,,,,
1,2D,HH,00:00:20,65.502183,82.040176,12.959219,,,,,


In [327]:
joblib.dump(all_features,"/mnt/shared_drive/data/brain_therapy/new_features.pkl")

['/mnt/shared_drive/data/brain_therapy/new_features.pkl']

In [330]:
d = all_features.copy()
d.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate,ecg_hrv_std,skin_temperature,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,type,context,Datetime_sub,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,2D,HH,00:00:00,66.568047,60.411289,12.959742,3.248435,6.0,0.000444,0.000203,2.186737
1,2D,HH,00:00:05,68.545316,,,,,,,
1,2D,HH,00:00:10,65.104167,,12.962748,,,,,
1,2D,HH,00:00:15,61.425061,,,,,,,
1,2D,HH,00:00:20,65.502183,82.040176,12.959219,,,,,


In [None]:
all_features.rese

In [339]:
all_features = all_features.reset_index("Datetime_sub").groupby(["Participant","type","context"], as_index = False).apply(lambda x: x.head())

In [348]:
all_features = all_features.reset_index().drop("level_0", axis = 1).\
        rename(columns = {"Datetime_sub":"Datetime","type":"Type"}).set_index(["Participant","Type","context","Datetime"])

In [332]:
x = d.groupby(["Participant","type","context",]).apply(lambda x: x.head(24))

In [333]:
x.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,ecg_heart_rate,ecg_hrv_std,skin_temperature,eda_mean,eda_no_of_peaks,eda_lf,eda_hf,eda_lf_hf_ratio
Participant,type,context,Participant,type,context,Datetime_sub,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,2D,HH,1,2D,HH,00:00:00,66.568047,60.411289,12.959742,3.248435,6.0,0.000444,0.000203,2.186737
1,2D,HH,1,2D,HH,00:00:05,68.545316,,,,,,,
1,2D,HH,1,2D,HH,00:00:10,65.104167,,12.962748,,,,,
1,2D,HH,1,2D,HH,00:00:15,61.425061,,,,,,,
1,2D,HH,1,2D,HH,00:00:20,65.502183,82.040176,12.959219,,,,,
