In [1]:
%matplotlib inline

In [2]:
import os 
import sys
import json
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import neurokit as nk
import scipy as sc
import math 
import scipy.signal as ss
import warnings

from scipy import signal
warnings.filterwarnings('ignore')

In [3]:
path = "/mnt/shared_drive/data/brain_therapy/Brain_Therapy_MVP_Content_Pilot/FA_corrected_signals"
files = os.listdir(path)

Taking all participant signal dictionary and putting them in one dataframe

In [4]:
sensor_data = pd.DataFrame([])
for file in files:
    data = pd.read_csv(os.path.join(path,file))
    data['Participant'] = file.split("-")[3].split(".")[0]
    data['Datetime'] = pd.to_datetime(data['Datetime'])
    data.set_index(['Participant','Datetime'], inplace = True)
    sensor_data = pd.concat([sensor_data,data])

In [5]:
participants = sensor_data.index.get_level_values('Participant').unique().values
participants = sorted(participants)
sensor_data.sort_index(sort_remaining = True, inplace = True)
sensor_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,RSP,PPG,1-SKTA,ECG,EDA
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,2019-07-10 15:22:31.000,-1.263123,0.003052,12.941162,-0.234833,0.041124
1,2019-07-10 15:22:31.004,-1.261902,0.003662,12.945435,-0.226593,0.042649
1,2019-07-10 15:22:31.008,-1.264648,0.002136,12.946045,-0.215607,0.044175
1,2019-07-10 15:22:31.012,-1.261902,0.003662,12.945435,-0.206604,0.039598
1,2019-07-10 15:22:31.016,-1.262207,0.003357,12.946045,-0.198364,0.042649


Now, let's load the log events file which contains the timestamps for the events.

In [6]:
log_events = joblib.load("/mnt/shared_drive/data/brain_therapy/log_events.pkl")
log_events.set_index("time", append = True, inplace = True)
log_events = log_events.reset_index().rename(columns = {"participant":"Participant",'type':'Type','time':'Datetime'}).\
                        set_index(['Participant','Type','Datetime'])
log_events = log_events.reset_index('Type')
log_events.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Type,context
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2019-07-10 15:25:32,2D,StartFrontBuffer
1,2019-07-10 15:27:32,2D,StopFrontBuffer
1,2019-07-10 15:27:32,2D,StartLL
1,2019-07-10 15:29:42,2D,StopLL
1,2019-07-10 15:29:42,2D,StartHH


### ECG

Loading the curated ECG peaks

In [7]:
good_peaks = joblib.load("/mnt/shared_drive/data/brain_therapy/MVP_Focused_Attention/FA_r_peaks_index.pkl")

Computing the ECG Heart Rate

In [8]:
def get_rmssd(x):
    ans = np.sqrt(np.mean([i**2 for i in x]))
    return ans

In [9]:
feature_df = pd.DataFrame([])
time_differences_df = pd.DataFrame([])
for participant in participants :
    df = pd.DataFrame([])
    data = sensor_data.loc(axis = 0)[participant,:,:,:]
    
    #Taking the clean ECG peaks of the participant
    ecg_peaks = good_peaks["BT-MVP-FA-{}".format(participant)]
    time_stamps =  data.index.get_level_values('Datetime')[ecg_peaks]
    
    #Computing for the time differences of every peak
#     time_differences1 = [(t - s) for s, t in zip(time_stamps,time_stamps[1:])]
    time_differences  = np.diff(time_stamps)/np.timedelta64(1,'ms')
#     time_differences1 = [(x.seconds*1000) + (x.microseconds/1000) for x in time_differences1]
    
#     display(time_differences1, time_differences)
    #Creating a dataframe of time_differences for easy cleaning and other future use
    df = pd.DataFrame(time_stamps[1:].ravel(), columns = ['Datetime'])
    df['time_differences'] = time_differences
    
    #Taking out time differences that is more than a minute
    df = df[df.time_differences < 60000]
    diff_mean = np.mean(df.time_differences)
    upper_threshold = diff_mean + (3*np.std(df.time_differences))   
    lower_threshold = diff_mean - (3*np.std(df.time_differences))
    df = df[(df.time_differences<upper_threshold) & (df.time_differences>lower_threshold)]
    
    #Interpolating the time differences and getting the values every two seconds
    df.set_index('Datetime', inplace = True)
    df = df.time_differences.resample('4ms').interpolate(method = 'linear')
    features = pd.DataFrame(df.resample('2s').first())
    
    features['Participant'] = participant
    
    #Solving for Heart Rate
    features['ecg_heart_rate'] = (60/features['time_differences'])*1000
    
    #Solving for ECG HRV SDNN
    features['ecg_hrv_std'] = features[['ecg_heart_rate']].rolling('30s').std()
    features['ecg_hrv_std'] = features['ecg_hrv_std'].resample('2s').first()
    
    #Solving for ECG HRV RMSSD
    features['ecg_hrv_rmssd'] = features[['time_differences']].rolling('30s', min_periods = 2).apply(lambda x: get_rmssd(x))
    features['ecg_hrv_rmssd'] = features['ecg_hrv_rmssd'].resample('2s').first()
    
    #Solving for ECG LnRMSSD
    features['ecg_hrv_lnrmssd'] = features[['ecg_hrv_rmssd']].apply(lambda x: np.log(x), axis = 1)
    
    features = features.reset_index().set_index(['Participant','Datetime'])
    time_differences_df = pd.concat([time_differences_df,df])
    feature_df = pd.concat([feature_df,features])
#     break
feature_df.drop('time_differences',axis = 1, inplace = True)

In [10]:
feature_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ecg_heart_rate,ecg_hrv_std,ecg_hrv_rmssd,ecg_hrv_lnrmssd
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2019-07-10 15:22:32,56.390977,,,
1,2019-07-10 15:22:34,58.689424,1.625247,1043.373394,6.950214
1,2019-07-10 15:22:36,58.843168,1.373544,1035.529128,6.942668
1,2019-07-10 15:22:38,61.939107,2.277549,1019.231144,6.926804
1,2019-07-10 15:22:40,61.907033,2.370811,1009.422396,6.917134


### PPG

In [11]:
def low_pass_filter(x, cut_off, order = 6, sampling_rate = 250):
    """
    This is a butterworth low pass filter.
    input:
        x - 1d array signal to be filtered.
        f_c_low - low pass filter cut off frequency in fraction of nyquist rate.
    return:
        filtered_x - filtered signal
    """
    nyquist = (cut_off/sampling_rate/2)
    # nth order Butterworth low pass filter at corner frequency
    b, a = ss.butter(order, nyquist, btype='low')
    filtered_x = ss.filtfilt(b, a, x)
    return filtered_x

In [12]:
def window_index_pairs(fast, slow):
    """
    computes the index of window span based on the intersection
    of 2 input signals.
    input:
        fast - 1d array high frequency signal
        slow - 1d array low frequenct signal
    return:
        window - list of span indices
    """
    positive_crossing = []
    negative_crossing = []
    for i in range(len(fast)-1):
        if fast[i] <= slow[i] and fast[i+1] > slow[i+1]:
            positive_crossing.append(i)
        if fast[i] >= slow[i] and fast[i+1] < slow[i+1] and len(positive_crossing)>0:
            negative_crossing.append(i)

    window = list(zip(positive_crossing,negative_crossing))
    return window


In [13]:
def get_peak(signal, fast_cor_f = 0.2, slow_cor_f = 0):
    """
    computes the indices of glucose peaks.
    input:
        egv - 1d array filtered egv signal.
        fast_cor_f - low pass filter higher cut-off frequency in fraction of nyquist rate.
        slow_cor_f - low pass filter lower cut-off frequency in fraction of nyquist rate.
    return:
        peak_index - 1d array of indices of glucose peaks.
        window_span - List of span indices. Inside of which a glucose peak is located.
        lpf_fast - higher cut-off frequency low pass filtered signal.
        lpf_slow - lower cut-off frequency low pass filtered signal.
    """
    signal_dc = np.mean(signal)
    signal_zero = signal - signal_dc
    lpf_fast = low_pass_filter(signal_zero, fast_cor_f)
    if slow_cor_f == 0:
        lpf_slow = np.zeros(len(signal))
    else:
        lpf_slow = low_pass_filter(signal_zero, slow_cor_f)
    window_span = window_index_pairs(lpf_fast, lpf_slow)
    peak_index =  np.array([start+np.argmax(signal_zero[start:end]) for start, end in window_span])

    return peak_index, window_span, lpf_fast + signal_dc, lpf_slow + signal_dc

Generating Heart Rate from PPG

In [17]:
heart_rate_df = pd.DataFrame([])
peaks_dict = {}
for participant in participants:
    data = sensor_data.loc(axis = 0)[participant]
    ppg_peaks,_,_,_ = get_peak(data['PPG'].values, 6, 4)
    timestamps =  data.index.get_level_values('Datetime')[ppg_peaks]
    
    #Saving the peaks fpr future use
    peaks_dict[participant] = [timestamps, ppg_peaks]
    
    #Getting the time differences every peak to peak
    time_differences  = np.diff(timestamps)/np.timedelta64(1,'ms')
    

    df = pd.DataFrame(timestamps[1:].ravel(), columns = ['time'])
    df['time_differences'] = time_differences
    
    #Removing peak distances that exceeds the threshold
    df = df[df.time_differences < 60000]
    diff_mean = np.mean(df.time_differences)
    upper_threshold = diff_mean + (3*np.std(df.time_differences))   
    lower_threshold = diff_mean - (3*np.std(df.time_differences))
    df = df[(df.time_differences<upper_threshold) & (df.time_differences>lower_threshold)]
    
    #Interpolating and taking the value every 2 seconds
    df.set_index('time', inplace = True)
    df_resampled =df['time_differences'].resample('4ms')
    interpolated = df_resampled.interpolate(method = 'linear')
    values = interpolated.resample('2s').first()
    
    df = pd.DataFrame(values)
    df['Participant'] = participant
    heart_rate_df = pd.concat([heart_rate_df,df])
heart_rate_df = heart_rate_df.reset_index().set_index(['Participant','time'])

In [18]:
# feature_df['ppg_heart_rate'] = (60/heart_rate_df['time_differences'][:-1])*1000
# feature_df.head()

Solving for PPG Amplitude

In [19]:
amplitudes_df =pd.DataFrame([])
for participant in participants:
    data = sensor_data.loc(axis = 0)[participant,:]
    ppg = data[['PPG']]
    values = data[['PPG']].values
    peak_df = pd.DataFrame(values[peaks_dict[participant][1]],columns = ['ppg_amplitude'], index = peaks_dict[participant][0])
    peak_df = peak_df.groupby(pd.Grouper(freq = '2s')).mean()
    
    #RECENTLY ADDED
    mean = np.mean(peak_df.ppg_amplitude)
    upper_threshold = mean + (3*np.std(peak_df.ppg_amplitude))   
    lower_threshold = mean - (3*np.std(peak_df.ppg_amplitude))
    peak_df = peak_df[(peak_df.ppg_amplitude<upper_threshold) & (peak_df.ppg_amplitude>lower_threshold)]
    
    
    peak_df['Participant'] = participant    
    amplitudes_df = pd.concat([amplitudes_df,peak_df])
amplitudes_df = amplitudes_df.reset_index().set_index(['Participant','Datetime'])

In [20]:
feature_df = feature_df.merge(amplitudes_df,how = 'left',left_index = True, right_index = True)
feature_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ecg_heart_rate,ecg_hrv_std,ecg_hrv_rmssd,ecg_hrv_lnrmssd,ppg_amplitude
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,2019-07-10 15:22:32,56.390977,,,,
1,2019-07-10 15:22:34,58.689424,1.625247,1043.373394,6.950214,
1,2019-07-10 15:22:36,58.843168,1.373544,1035.529128,6.942668,0.990295
1,2019-07-10 15:22:38,61.939107,2.277549,1019.231144,6.926804,0.976715
1,2019-07-10 15:22:40,61.907033,2.370811,1009.422396,6.917134,0.99823


### Skin Temperature

In [33]:
curated_signals = joblib.load("/mnt/shared_drive/data/brain_therapy/Brain_Therapy_MVP_Content_Pilot/FA_corrected_signals_amplitude.pkl")

In [34]:
curated_signals.keys()

dict_keys(['BT-MVP-FA-0001', 'BT-MVP-FA-0002', 'BT-MVP-FA-0003', 'BT-MVP-FA-0004', 'BT-MVP-FA-0005', 'BT-MVP-FA-0006', 'BT-MVP-FA-0007', 'BT-MVP-FA-0008', 'BT-MVP-FA-0010', 'BT-MVP-FA-0011', 'BT-MVP-FA-0012', 'BT-MVP-FA-0013', 'BT-MVP-FA-0014', 'BT-MVP-FA-0015', 'BT-MVP-FA-0016', 'BT-MVP-FA-0017', 'BT-MVP-FA-0018', 'BT-MVP-FA-0019', 'BT-MVP-FA-0020', 'BT-MVP-FA-0021'])

In [35]:
p = curated_signals.keys()
curated_df = pd.DataFrame([])
for part in p:
    data = curated_signals[part]
    data['Participant'] = part.split("-")[3]
    curated_df = pd.concat([curated_df,data])
curated_df.set_index(["Participant","Datetime"], inplace = True)
curated_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,RSP,PPG,1-SKTA,ECG,EDA
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,2019-07-10 15:22:31.000,-1.263123,0.003052,12.941162,-0.234833,0.041124
1,2019-07-10 15:22:31.004,-1.261902,0.003662,12.945435,-0.226593,0.042649
1,2019-07-10 15:22:31.008,-1.264648,0.002136,12.946045,-0.215607,0.044175
1,2019-07-10 15:22:31.012,-1.261902,0.003662,12.945435,-0.206604,0.039598
1,2019-07-10 15:22:31.016,-1.262207,0.003357,12.946045,-0.198364,0.042649


In [36]:
skin_temp_df = pd.DataFrame([])
for participant in participants:
    data = curated_df.loc(axis = 0)[participant,:]
    skin_temp = data[['1-SKTA']]
    skin_x = skin_temp.index.get_level_values('Datetime')
    skin_y = skin_temp['1-SKTA'].values
    
    skt = skin_temp.reset_index(['Participant'], drop = True).groupby(pd.Grouper(freq = '2s')).mean()
    
    mean =  (skt["1-SKTA"].mean())
    upper_threshold = mean + (3*np.std(skt["1-SKTA"]))   
    lower_threshold = mean - (3*np.std(skt["1-SKTA"]))
    skt = skt[(skt["1-SKTA"]<upper_threshold) & (skt["1-SKTA"]>lower_threshold)]
    
    
    skt['Participant'] = participant
    skin_temp_df = pd.concat([skin_temp_df, skt])
skin_temp_df = skin_temp_df.reset_index().set_index(['Participant','Datetime']) 
skin_temp_df.rename(columns = {"1-SKTA":"skt_skin_temp"}, inplace = True)

In [37]:
#Removing Skin Temperature outliers
skin_temp_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,skt_skin_temp
Participant,Datetime,Unnamed: 2_level_1
1,2019-07-10 15:22:30,12.951174
1,2019-07-10 15:22:32,12.956869
1,2019-07-10 15:22:34,12.956752
1,2019-07-10 15:22:36,12.956626
1,2019-07-10 15:22:38,12.956655


In [38]:
feature_df= feature_df.merge(skin_temp_df, how = 'left', left_index = True, right_index = True)
feature_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ecg_heart_rate,ecg_hrv_std,ecg_hrv_rmssd,ecg_hrv_lnrmssd,ppg_amplitude,skt_skin_temp
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,2019-07-10 15:22:32,56.390977,,,,,12.956869
1,2019-07-10 15:22:34,58.689424,1.625247,1043.373394,6.950214,,12.956752
1,2019-07-10 15:22:36,58.843168,1.373544,1035.529128,6.942668,0.990295,12.956626
1,2019-07-10 15:22:38,61.939107,2.277549,1019.231144,6.926804,0.976715,12.956655
1,2019-07-10 15:22:40,61.907033,2.370811,1009.422396,6.917134,0.99823,12.956718


### RSP

In [39]:
def rsp_low_pass_filter(sensor_data, sampling_frequency_hz = 250, corner_frequency_hz = 1.0):
    """
    Filters sensor signals using the second order butterworth filter
    
    Inputs:
        sensor_data (series) : sensor signals
        sampling_frequency_hz (int) : sampling frequency
        corner_frequency_hc (int) : corner frequency
    Output:
        filtered (list) : filtered sensor signal
    """
    order = 5
    nyquist = sampling_frequency_hz/ 2.0
    f_c = np.array([corner_frequency_hz, ], dtype=np.float64)
    f_c /= nyquist
    b, a = signal.butter(order, f_c, btype='low')
    filtered = signal.filtfilt(b, a, sensor_data)
    return filtered

In [40]:
rsp_time_difference = pd.DataFrame([])
for participant in participants:
    data = sensor_data.loc(axis = 0)[participant,:]
    rsp = data[['RSP']]
    rsp['RSP'] = rsp_low_pass_filter(rsp['RSP'])
    rsp_x = rsp.index.get_level_values('Datetime')
    rsp_y = rsp['RSP'].values
    features = nk.rsp_process(data['RSP'],sampling_rate = 250)
    onsets = features['RSP']['Cycles_Onsets']
    onset_times = rsp_x[onsets]
#      [((t - s).microseconds)/1000 for s, t in zip(time_stamps,time_stamps[1:])]
    rsp_peaks = [list(rsp_y).index(np.max(rsp_y[s:t])) for s, t in zip(onsets,onsets[1:])]
    
    time_stamps =  data.index.get_level_values('Datetime')[rsp_peaks]
    
    #Computing the differences between each peak
    time_differences  = np.diff(time_stamps)/np.timedelta64(1,'ms')
    
    
    df = pd.DataFrame(time_stamps[1:].ravel(), columns = ['Datetime'])
    df.set_index('Datetime', inplace = True)
    df['time_differences'] = time_differences
    
    #Removing peak distances that exceeds the threshold
    df = df[df.time_differences < 60000]
    diff_mean = np.mean(df.time_differences)
    upper_threshold = diff_mean + (3*np.std(df.time_differences))   
    lower_threshold = diff_mean - (3*np.std(df.time_differences))
    df = df[(df.time_differences<upper_threshold) & (df.time_differences>lower_threshold)]
    
    rsp_rate = pd.DataFrame(df.resample('4ms').interpolate(method = 'linear'))
    rsp_rate = rsp_rate.resample('2s').first()
    rsp_rate['rsp_rate'] = (60/rsp_rate['time_differences'][:-1])*1000
    rsp_rate['Participant'] = participant
    rsp_rate = rsp_rate.reset_index().set_index(['Participant','Datetime'])
    rsp_time_difference = pd.concat([rsp_time_difference,rsp_rate])
rsp_time_difference.drop('time_differences',axis = 1, inplace = True)

In [41]:
feature_df = feature_df.merge(rsp_time_difference, how = 'left', right_index = True, left_index = True)

In [42]:
feature_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ecg_heart_rate,ecg_hrv_std,ecg_hrv_rmssd,ecg_hrv_lnrmssd,ppg_amplitude,skt_skin_temp,rsp_rate
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,2019-07-10 15:22:32,56.390977,,,,,12.956869,
1,2019-07-10 15:22:34,58.689424,1.625247,1043.373394,6.950214,,12.956752,
1,2019-07-10 15:22:36,58.843168,1.373544,1035.529128,6.942668,0.990295,12.956626,22.865854
1,2019-07-10 15:22:38,61.939107,2.277549,1019.231144,6.926804,0.976715,12.956655,27.712759
1,2019-07-10 15:22:40,61.907033,2.370811,1009.422396,6.917134,0.99823,12.956718,21.75583


### EDA

In [43]:
def get_eda_features(eda):
    if len(eda > 15):
        try:
            features = nk.eda_process(eda = eda, sampling_rate = 250)
        except:
            print("No peaks here")
            return 0
    else:
        return 0
    peaks = len(features['EDA']['SCR_Peaks_Indexes'])
    return peaks

In [44]:
eda_peaks_df = pd.DataFrame([])
for participant in participants:
    data = sensor_data.loc(axis = 0)[participant]
    d = data['EDA'].groupby(pd.Grouper(freq = '60s')).apply(lambda x: get_eda_features(x))
    d = pd.DataFrame(d)
    d = d.resample('4ms').interpolate(method = 'linear').resample('2s').first()
    d['Participant'] = participant
    eda_peaks_df = pd.concat([eda_peaks_df,d])
eda_peaks_df = eda_peaks_df.reset_index().set_index(['Participant','Datetime'])
eda_peaks_df.rename(columns = {"EDA":'eda_no_of_peaks'}, inplace = True)   

No peaks here
No peaks here


In [45]:
feature_df = feature_df.merge(eda_peaks_df, how = 'left', right_index = True, left_index = True)

In [46]:
feature_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ecg_heart_rate,ecg_hrv_std,ecg_hrv_rmssd,ecg_hrv_lnrmssd,ppg_amplitude,skt_skin_temp,rsp_rate,eda_no_of_peaks
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,2019-07-10 15:22:32,56.390977,,,,,12.956869,,2.0
1,2019-07-10 15:22:34,58.689424,1.625247,1043.373394,6.950214,,12.956752,,2.0
1,2019-07-10 15:22:36,58.843168,1.373544,1035.529128,6.942668,0.990295,12.956626,22.865854,2.0
1,2019-07-10 15:22:38,61.939107,2.277549,1019.231144,6.926804,0.976715,12.956655,27.712759,2.0
1,2019-07-10 15:22:40,61.907033,2.370811,1009.422396,6.917134,0.99823,12.956718,21.75583,2.0


In [47]:
def compute_power_spectra(signal, band):
    try:
        x = np.fft.fft(signal - np.nanmean(signal))
        x = np.abs(x)
        freq = np.fft.fftfreq(len(signal), d = 0.004)
    except:
        return 0
    low, high = np.array(band)
    vals = [i for i in x if high>i>low]
    idx = [list(x).index(i) for i in vals]

    frequencies = freq[idx]
    power = np.sum((frequencies/len(signal))**2)
    return power


In [48]:
bands = {'low_power_spectrum':[0.045, 0.15],'high_power_spectrum':[0.15,0.25]}
eda_powers_df = pd.DataFrame([])
for participant in participants:
    data = curated_df.loc(axis = 0)[participant]
    eda_powers = pd.DataFrame([])
    for index, band in bands.items():
        eda_powers[index] = data['EDA'].groupby(pd.Grouper(freq = '60s')).apply(lambda x: compute_power_spectra(x, band))
    
    eda_powers = eda_powers.resample('4ms').interpolate()
    eda_powers = eda_powers.resample('2s').first()    
    eda_powers['Participant'] = participant
    eda_powers_df = pd.concat([eda_powers_df,eda_powers])


In [49]:
eda_powers_df = eda_powers_df.reset_index().set_index(["Participant","Datetime"])

In [50]:
eda_powers_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,low_power_spectrum,high_power_spectrum
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2019-07-10 15:22:00,0.0,0.0
1,2019-07-10 15:22:02,0.000535,0.001179
1,2019-07-10 15:22:04,0.001071,0.002359
1,2019-07-10 15:22:06,0.001606,0.003538
1,2019-07-10 15:22:08,0.002141,0.004718


In [51]:
feature_df = feature_df.merge(eda_powers_df, how = "left", right_index = True, left_index = True)

In [52]:
feature_df['eda_high_low_ratio'] = feature_df['low_power_spectrum']/feature_df['high_power_spectrum']

In [53]:
eda_mean_df = pd.DataFrame([])
for participant in participants:
    data = sensor_data.loc(axis = 0)[participant]
    mean_df = data[['EDA']].groupby(pd.Grouper(freq = '2s')).mean()
    mean_df['Participant'] = participant
    eda_mean_df = pd.concat([eda_mean_df, mean_df])
eda_mean_df = eda_mean_df.reset_index().set_index(['Participant','Datetime'])

In [54]:
feature_df = feature_df.merge(eda_mean_df, how = "left", right_index = True, left_index = True)

In [55]:
feature_df.rename(columns = {"low_power_spectrum":"eda_low_power_spectrum","high_power_spectrum":"eda_high_power_spectrum",
                            "EDA":"eda_mean"}, inplace = True)

In [56]:
temp1 = feature_df.copy()

## Merging with 2D or 3D labels

In [57]:
log_events.set_index('Type',append = True, inplace = True)

In [58]:
type_times_df = pd.DataFrame([])
for participant in sorted(participants):
    data = log_events.loc(axis = 0)[participant,:,:]
    features = feature_df.loc(axis = 0)[participant]
    two_d = data.loc(axis = 0)[:,:,'2D']
    two_start = two_d[two_d['context'] == 'StartFrontBuffer'].index.get_level_values('Datetime').values[0]
    two_end = two_d[two_d['context'] == 'StopEndBuffer'].index.get_level_values('Datetime').values[0]
    
    three_d = data.loc(axis = 0)[:,:,'3D']
    three_start = three_d[three_d['context'] == 'StartFrontBuffer'].index.get_level_values('Datetime').values[0]
    three_end = three_d[three_d['context'] == 'StopEndBuffer'].index.get_level_values('Datetime').values[0]
    
    time_stamps = feature_df.index.get_level_values('Datetime')
    
    f_temp = features.copy().reset_index()
    times_two = f_temp[(f_temp['Datetime']>=two_start) & (f_temp['Datetime']<=two_end)][['Datetime']]
    times_two['Type'] = '2D'
    times_three = f_temp[(f_temp['Datetime']>=three_start) & (f_temp['Datetime']<=three_end)][['Datetime']]
    times_three['Type'] = '3D'
    times_df = pd.concat([times_two,times_three])
    times_df['Participant'] = participant

    type_times_df = pd.concat([type_times_df,times_df])
type_times_df = type_times_df.set_index(['Participant','Datetime'])  

In [59]:
type_times_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Type
Participant,Datetime,Unnamed: 2_level_1
1,2019-07-10 15:25:32,2D
1,2019-07-10 15:25:34,2D
1,2019-07-10 15:25:36,2D
1,2019-07-10 15:25:38,2D
1,2019-07-10 15:25:40,2D


In [60]:
feature_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ecg_heart_rate,ecg_hrv_std,ecg_hrv_rmssd,ecg_hrv_lnrmssd,ppg_amplitude,skt_skin_temp,rsp_rate,eda_no_of_peaks,eda_low_power_spectrum,eda_high_power_spectrum,eda_high_low_ratio,eda_mean
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,2019-07-10 15:22:32,56.390977,,,,,12.956869,,2.0,0.008564,0.018872,0.453819,0.001451
1,2019-07-10 15:22:34,58.689424,1.625247,1043.373394,6.950214,,12.956752,,2.0,0.0091,0.020051,0.453819,-0.014861
1,2019-07-10 15:22:36,58.843168,1.373544,1035.529128,6.942668,0.990295,12.956626,22.865854,2.0,0.009635,0.021231,0.453819,-0.00808
1,2019-07-10 15:22:38,61.939107,2.277549,1019.231144,6.926804,0.976715,12.956655,27.712759,2.0,0.01017,0.02241,0.453819,0.002443
1,2019-07-10 15:22:40,61.907033,2.370811,1009.422396,6.917134,0.99823,12.956718,21.75583,2.0,0.010706,0.02359,0.453819,-0.00887


In [61]:
features_complete = feature_df.merge(type_times_df, how = 'left', left_index = True, right_index = True)

In [62]:
features_complete.set_index('Type',append = True, inplace = True)

In [63]:
features_complete.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ecg_heart_rate,ecg_hrv_std,ecg_hrv_rmssd,ecg_hrv_lnrmssd,ppg_amplitude,skt_skin_temp,rsp_rate,eda_no_of_peaks,eda_low_power_spectrum,eda_high_power_spectrum,eda_high_low_ratio,eda_mean
Participant,Datetime,Type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,2019-07-10 15:22:32,,56.390977,,,,,12.956869,,2.0,0.008564,0.018872,0.453819,0.001451
1,2019-07-10 15:22:34,,58.689424,1.625247,1043.373394,6.950214,,12.956752,,2.0,0.0091,0.020051,0.453819,-0.014861
1,2019-07-10 15:22:36,,58.843168,1.373544,1035.529128,6.942668,0.990295,12.956626,22.865854,2.0,0.009635,0.021231,0.453819,-0.00808
1,2019-07-10 15:22:38,,61.939107,2.277549,1019.231144,6.926804,0.976715,12.956655,27.712759,2.0,0.01017,0.02241,0.453819,0.002443
1,2019-07-10 15:22:40,,61.907033,2.370811,1009.422396,6.917134,0.99823,12.956718,21.75583,2.0,0.010706,0.02359,0.453819,-0.00887


In [64]:
features_complete.loc(axis = 0)['0002',:,'3D'].head(60)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ecg_heart_rate,ecg_hrv_std,ecg_hrv_rmssd,ecg_hrv_lnrmssd,ppg_amplitude,skt_skin_temp,rsp_rate,eda_no_of_peaks,eda_low_power_spectrum,eda_high_power_spectrum,eda_high_low_ratio,eda_mean
Participant,Datetime,Type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2,2019-07-11 15:51:36,3D,78.879274,5.376291,794.855465,6.67816,1.700287,34.120664,14.396021,9.4,0.003058,0.008402,0.363991,17.442026
2,2019-07-11 15:51:38,3D,72.747558,5.13269,801.110792,6.685999,1.817424,34.124889,17.482326,9.366667,0.003228,0.008864,0.36415,17.498682
2,2019-07-11 15:51:40,3D,70.585918,4.526805,810.355762,6.697473,1.712494,34.124855,20.994794,9.333333,0.003398,0.009327,0.364293,17.513516
2,2019-07-11 15:51:42,3D,74.406253,4.408948,807.477381,6.693915,1.711833,34.124833,22.799075,9.3,0.003568,0.00979,0.364422,17.370066
2,2019-07-11 15:51:44,3D,74.477546,3.70116,812.855085,6.700553,1.673889,34.124858,23.069212,9.266667,0.003738,0.010253,0.364539,17.348059
2,2019-07-11 15:51:46,3D,73.298811,3.238235,807.913321,6.694455,1.590271,34.124802,21.001548,9.233333,0.003908,0.010716,0.364647,17.289112
2,2019-07-11 15:51:48,3D,78.210757,3.097391,801.684116,6.686715,1.531982,34.124849,20.271748,9.2,0.004077,0.011179,0.364745,17.195966
2,2019-07-11 15:51:50,3D,74.829932,2.473164,796.269854,6.679938,1.713765,34.124812,23.097033,9.166667,0.004247,0.011642,0.364836,17.125318
2,2019-07-11 15:51:52,3D,71.770335,2.649906,799.14125,6.683538,1.514893,34.124902,21.316135,9.133333,0.004417,0.012105,0.36492,17.122818
2,2019-07-11 15:51:54,3D,71.656577,2.741515,800.082132,6.684714,1.658783,34.124827,19.218491,9.1,0.004587,0.012568,0.364997,17.093064


In [65]:
log_events.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,context
Participant,Datetime,Type,Unnamed: 3_level_1
1,2019-07-10 15:25:32,2D,StartFrontBuffer
1,2019-07-10 15:27:32,2D,StopFrontBuffer
1,2019-07-10 15:27:32,2D,StartLL
1,2019-07-10 15:29:42,2D,StopLL
1,2019-07-10 15:29:42,2D,StartHH


In [66]:
log_events.reset_index('Type', inplace = True)

In [67]:
temp_log = log_events.drop(["Type",'context'], axis = 1)
temp_log['temp'] = np.arange(0,len(log_events['context'].values))
temp_log.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,temp
Participant,Datetime,Unnamed: 2_level_1
1,2019-07-10 15:25:32,0
1,2019-07-10 15:27:32,1
1,2019-07-10 15:27:32,2
1,2019-07-10 15:29:42,3
1,2019-07-10 15:29:42,4


In [68]:
p_df = pd.DataFrame([])
for participant in participants:
    data = temp_log.loc(axis = 0)[participant]
    df = pd.DataFrame(data.resample('1s').sum())
    
    df['Participant'] = participant
    p_df = pd.concat([p_df,df])
p_df = p_df.reset_index().set_index(['Participant','Datetime'])

In [69]:
p_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,temp
Participant,Datetime,Unnamed: 2_level_1
1,2019-07-10 15:25:32,0
1,2019-07-10 15:25:33,0
1,2019-07-10 15:25:34,0
1,2019-07-10 15:25:35,0
1,2019-07-10 15:25:36,0


In [70]:
p_df = p_df.merge(log_events, how = 'left', right_index = True, left_index = True)

In [71]:
p_df['context'] = p_df['context'].fillna(method = 'ffill')
p_df['Type'] = p_df['Type'].fillna(method = 'ffill')
p_df.drop('temp', axis = 1, inplace = True)
# p_df.set_index("Type", append = True, inplace = True)

In [72]:
f = feature_df.merge(right = p_df, how = 'left', left_index= True, right_index = True)

In [73]:
f['context'] = f['context'].fillna(method = 'ffill')

In [74]:
f.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ecg_heart_rate,ecg_hrv_std,ecg_hrv_rmssd,ecg_hrv_lnrmssd,ppg_amplitude,skt_skin_temp,rsp_rate,eda_no_of_peaks,eda_low_power_spectrum,eda_high_power_spectrum,eda_high_low_ratio,eda_mean,Type,context
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,2019-07-10 15:22:32,56.390977,,,,,12.956869,,2.0,0.008564,0.018872,0.453819,0.001451,,
1,2019-07-10 15:22:34,58.689424,1.625247,1043.373394,6.950214,,12.956752,,2.0,0.0091,0.020051,0.453819,-0.014861,,
1,2019-07-10 15:22:36,58.843168,1.373544,1035.529128,6.942668,0.990295,12.956626,22.865854,2.0,0.009635,0.021231,0.453819,-0.00808,,
1,2019-07-10 15:22:38,61.939107,2.277549,1019.231144,6.926804,0.976715,12.956655,27.712759,2.0,0.01017,0.02241,0.453819,0.002443,,
1,2019-07-10 15:22:40,61.907033,2.370811,1009.422396,6.917134,0.99823,12.956718,21.75583,2.0,0.010706,0.02359,0.453819,-0.00887,,


In [75]:
# f.reset_index('context', inplace = True)
s = [str(x).replace('Start','') for x in f['context'].values]
s = [str(x).replace('Stop','') for x in s]
s = [str(x).replace('nan','Garbage') for x in s]
f['context'] = s

In [76]:
f= f[f['context']!='Garbage']
f = f[f['context']!='FrontBuffer']
f = f[f['context']!='EndBuffer']


In [77]:
f.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ecg_heart_rate,ecg_hrv_std,ecg_hrv_rmssd,ecg_hrv_lnrmssd,ppg_amplitude,skt_skin_temp,rsp_rate,eda_no_of_peaks,eda_low_power_spectrum,eda_high_power_spectrum,eda_high_low_ratio,eda_mean,Type,context
Participant,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,2019-07-10 15:27:32,61.365192,4.851276,945.688088,6.851913,0.998688,12.957098,15.614582,9.133333,0.073592,0.102356,0.718984,4.610121,2D,LL
1,2019-07-10 15:27:34,76.46901,5.652729,941.43284,6.847403,0.501251,12.95718,16.358931,9.266667,0.072756,0.101977,0.71345,4.53827,2D,LL
1,2019-07-10 15:27:36,80.674995,7.013103,928.667268,6.833751,0.99884,12.957137,16.644474,9.4,0.071919,0.101598,0.707875,4.479658,2D,LL
1,2019-07-10 15:27:38,74.489475,7.375541,923.343304,6.828001,0.993958,12.957051,17.281312,9.533333,0.071083,0.10122,0.702259,4.43329,2D,LL
1,2019-07-10 15:27:40,60.190684,6.974201,916.864589,6.82096,0.998077,12.961814,18.949808,9.666667,0.070246,0.100841,0.6966,4.371519,2D,LL


In [78]:
sensor = joblib.load('/mnt/shared_drive/data/brain_therapy/MVP_Focused_Attention/signal_with_events.pkl')

In [79]:
sensor.loc(axis = 0)['0002',:,:]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ecg_heart_rate,ecg_hrv_std,ecg_hrv_rmssd,ecg_hrv_lnrmssd,ppg_heart_rate,ppg_amplitude,skt_skin_temp,rsp_rate,eda_no_of_peaks,eda_low_power_spectrum,high_power_spectrum,eda_high_low_ratio,eda_mean,context
Participant,Datetime,Type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2,2019-07-11 15:53:35,3D,81.383972,4.047955,749.851061,6.619875,80.675640,1.417389,34.284050,22.282901,10.566667,0.040455,0.063035,0.641789,15.934644,StartLH
2,2019-07-11 15:53:36,3D,82.225693,4.078326,748.810704,6.618486,82.325623,1.337992,34.311979,17.811997,10.600000,0.038074,0.060208,0.632380,15.889694,StartLH
2,2019-07-11 15:53:38,3D,79.013030,3.858629,752.444492,6.623327,79.413477,1.393127,34.320702,13.957037,10.633333,0.035693,0.057380,0.622043,15.851868,StartLH
2,2019-07-11 15:53:40,3D,79.365079,3.793248,751.094218,6.621531,79.607658,1.431732,34.320713,13.037683,10.666667,0.033312,0.054553,0.610635,15.877506,StartLH
2,2019-07-11 15:53:42,3D,84.478486,3.633409,744.854805,6.613189,83.271306,1.204224,34.325649,13.623101,10.700000,0.030931,0.051726,0.597980,16.175189,StartLH
2,2019-07-11 15:53:44,3D,87.080537,3.977382,741.238315,6.608322,87.068035,1.219279,34.330568,15.665974,10.733333,0.028550,0.048898,0.583861,16.265302,StartLH
2,2019-07-11 15:53:46,3D,80.213904,3.707774,744.735381,6.613029,81.265292,1.358744,34.340847,21.689618,10.766667,0.026169,0.046071,0.568010,16.110776,StartLH
2,2019-07-11 15:53:48,3D,85.290964,3.721202,744.652665,6.612918,84.552475,1.112569,34.358229,19.548710,10.800000,0.023788,0.043244,0.550086,16.120523,StartLH
2,2019-07-11 15:53:50,3D,87.021382,3.624776,736.968764,6.602546,90.004091,1.209513,34.369812,17.136357,10.833333,0.021407,0.040417,0.529654,16.747101,StartLH
2,2019-07-11 15:53:52,3D,78.777654,3.042960,733.333823,6.597601,80.200571,1.493073,34.371587,14.565935,10.866667,0.019026,0.037589,0.506148,16.550702,StartLH


#### Replace Time with numbers

In [80]:
features = f.copy()

In [81]:
features = features.reset_index().set_index(["Participant","Type",'context',"Datetime"])

In [82]:
features.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate,ecg_hrv_std,ecg_hrv_rmssd,ecg_hrv_lnrmssd,ppg_amplitude,skt_skin_temp,rsp_rate,eda_no_of_peaks,eda_low_power_spectrum,eda_high_power_spectrum,eda_high_low_ratio,eda_mean
Participant,Type,context,Datetime,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,2D,LL,2019-07-10 15:27:32,61.365192,4.851276,945.688088,6.851913,0.998688,12.957098,15.614582,9.133333,0.073592,0.102356,0.718984,4.610121
1,2D,LL,2019-07-10 15:27:34,76.46901,5.652729,941.43284,6.847403,0.501251,12.95718,16.358931,9.266667,0.072756,0.101977,0.71345,4.53827


In [83]:
sessions = ["2D","3D"]
events = features.index.get_level_values('context').unique()

In [84]:
features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecg_heart_rate,ecg_hrv_std,ecg_hrv_rmssd,ecg_hrv_lnrmssd,ppg_amplitude,skt_skin_temp,rsp_rate,eda_no_of_peaks,eda_low_power_spectrum,eda_high_power_spectrum,eda_high_low_ratio,eda_mean
Participant,Type,context,Datetime,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,2D,LL,2019-07-10 15:27:32,61.365192,4.851276,945.688088,6.851913,0.998688,12.957098,15.614582,9.133333,0.073592,0.102356,0.718984,4.610121
1,2D,LL,2019-07-10 15:27:34,76.46901,5.652729,941.43284,6.847403,0.501251,12.95718,16.358931,9.266667,0.072756,0.101977,0.71345,4.53827
1,2D,LL,2019-07-10 15:27:36,80.674995,7.013103,928.667268,6.833751,0.99884,12.957137,16.644474,9.4,0.071919,0.101598,0.707875,4.479658
1,2D,LL,2019-07-10 15:27:38,74.489475,7.375541,923.343304,6.828001,0.993958,12.957051,17.281312,9.533333,0.071083,0.10122,0.702259,4.43329
1,2D,LL,2019-07-10 15:27:40,60.190684,6.974201,916.864589,6.82096,0.998077,12.961814,18.949808,9.666667,0.070246,0.100841,0.6966,4.371519


In [85]:
t3 = pd.DataFrame([])
for participant in participants:
    t2 = pd.DataFrame([])
    counter = 1
    sessions = features.loc(axis = 0)[participant,:,:,:].index.get_level_values('Type').unique()
    for k, session in enumerate(sessions):
        t1 = pd.DataFrame([])
        events = features.loc(axis = 0)[participant,session,:,:].index.get_level_values('context').unique()
        for j,event in enumerate(events):
            d = features.loc(axis = 0)[participant,session,event,:]
            d['feature_time'] = np.arange(0,len(d['ecg_heart_rate'].values))
            d['event_order'] = j+1
            d['event_session_order'] = counter
            counter+=1
            t1 = pd.concat([t1,d])
        t1['session_order'] = k+1
#         print(k)
        t2 = pd.concat([t2,t1])
#         break
    t3 = pd.concat([t3,t2])
#     break

In [87]:
t3 = t3[t3['feature_time']<65]

In [88]:
t3.groupby(level = [0,1,2]).count()['ecg_heart_rate'].unique()

array([65])

In [89]:
joblib.dump(t3,'/mnt/shared_drive/data/brain_therapy/features_v2.pkl')

['/mnt/shared_drive/data/brain_therapy/features_v2.pkl']

In [None]:
t33 = t3.reset_index()
t33.to_csv( "/mnt/bkt_prd_dsv_brain_therapy_raw/Features/Focused_Attention/features_v2.csv", index = False)

In [None]:
features_complete = joblib.load('/mnt/shared_drive/data/brain_therapy/features.pkl')