In [None]:
import numpy as np
import numpy.fft as fft
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import random
from collections import defaultdict
from numpy import hamming

In [None]:
PATH = '../WESAD/'
SUBJECTS = ['2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '13', '14', '15', '16', '17']
CHEST_SIGNALS = ['ECG', 'EMG', 'EDA', 'Resp', 'Temp', 'ACC']
CHEST_SAMPLING_RATE = 700

In [None]:
def read_subject_data(subject) :
    path = PATH + 'S' + subject + '/S' + subject + '.pkl'
    subject = pd.read_pickle(path)
    
    return subject

In [None]:
def visualize_raw(signal, duration, sampling_rate, title) :
    n = duration * sampling_rate
    l = len(signal)
    
    s = random.randint(0, l - n)
    df = pd.DataFrame(columns=['x', 'y'])
    df['x'] = np.arange(n)
    df['y'] = signal[s:s+n]
    
    plt.figure(figsize=(20, 5))
    sns.scatterplot(x = 'x', y = 'y', data = df, s = 3, color='0').set(title=title)

In [None]:
# Visualize the different chest signals
SAMPLING_RATE = 700
DURATION = 30

subject = random.choice(SUBJECTS)
subject_data = read_subject_data(subject)

chest_signals = subject_data['signal']['chest']

for signal_type in chest_signals :
    if signal_type == 'ACC' :
        continue
    
    signal = chest_signals[signal_type]
    visualize_raw(signal, DURATION, SAMPLING_RATE, signal_type)

In [434]:
# https://imotions.com/blog/eda/

# https://www.scitepress.org/Papers/2021/102446/102446.pdf
# https://sci-hub.ee/10.1016/j.cmpb.2020.105482
WINDOW_LEN = 20
OVERLAP = 0.75
NUM_FEATURES = 10

In [435]:
# Identify the continuous intervals for each label
def find_intervals(labels) :
    intervals = []

    l = len(labels)
    i = 0
    label = labels[0]

    for j in range(l):
        if label != labels[j]:
            intervals.append({
                'label' : label, 
                'beg' : i,
                'end' : j
                })
            i = j
            label = labels[j]

    intervals.append({
        'label' : label, 
        'beg' : i,
        'end' : l
    })

    return intervals
    

In [436]:
def extract_fft_features(signal, num_features) :
    window = hamming(len(signal))
    signal *= window
    coeffs = fft.fft(signal)
    l = len(coeffs)
    freqs = fft.fftfreq(l)
    
    # Discard the negative elems
    l //= 2
    amps = np.abs(coeffs[0:l])
    freqs = np.abs(freqs[0:l])
    
    # Sort descending w.r.t amp   
    p = amps.argsort()[::-1]
    freqs = freqs[p]
    amps = amps[p]
    
    features = [[amps[i], freqs[i]] for i in range(num_features)]    
    return np.array(features)

In [437]:
def normalize_fft_features(train_features, test_features) :
#     feature_mean = np.mean(train_features, axis=0)
    feature_std = np.std(train_features, axis=0)
#     train_features -= feature_mean
#     test_features -= feature_mean
    train_features = np.divide(train_features, feature_std, out=np.zeros_like(train_features), where=feature_std!=0)
    test_features = np.divide(test_features, feature_std, out=np.zeros_like(test_features), where=feature_std!=0)
    
    return train_features, test_features

In [438]:
TRANSIENT = 0
BASELINE = 1
STRESS = 2
AMUSEMENT = 3
MEDITATION = 4
IGNORE = 5

def extract_signal_features(signal, intervals, sampling_rate, window_len = WINDOW_LEN, overlap = OVERLAP, num_features = NUM_FEATURES) :
    segment_size = sampling_rate * window_len
    signal_features = {
        BASELINE : [],
        STRESS : [],
        AMUSEMENT : [],
        MEDITATION : []
    }
    
    baseline_av = 0
    
    for interval in intervals :
        label = interval['label']
        beg = interval['beg']
        end = interval['end']
        
        signal_of_interest = signal[beg:end]  
        
        if label >= IGNORE or label == TRANSIENT:
            baseline_av = (np.mean(signal_of_interest) + baseline_av)/2
            continue      
            
        if label == BASELINE :
            baseline_av = (np.mean(signal_of_interest) + baseline_av)/2
        
        signal_of_interest -= baseline_av
        
        l = end - beg        
        while l >= segment_size:
            segment = signal_of_interest[int(l - segment_size) : l]
            l -= int((1 - overlap) * segment_size)
            
            segment_features = extract_fft_features(segment, num_features)            
            signal_features[label].append(segment_features)
        
    return signal_features

In [439]:
def get_chest_signal_dataset(subjects, signal_type) :
    dataset = defaultdict(list)
    for subject in subjects :
        subject_data = read_subject_data(subject)
        intervals = find_intervals(subject_data['label'])
        
        signal = np.array(subject_data['signal']['chest'][signal_type]).flatten()
        
        signal_features = extract_signal_features(signal, intervals, CHEST_SAMPLING_RATE)
        
        for label, feature_vecs in signal_features.items() :
            dataset[label] += list(feature_vecs)
            
    features_dataset = [] 
    label_dataset = []
    
    for label, feature_vecs in dataset.items() :
        for features in feature_vecs :
            features_dataset.append(features)
            label_dataset.append(label)

    return np.array(features_dataset), np.array(label_dataset)

In [440]:
# Get Joint test-train  Datasets combining all signals

def get_agg_chest_signal_dataset(subjects) :
    agg_feature_dataset = []
    agg_labels_dataset = []
    
    for subject in subjects :
        subject_data = read_subject_data(subject)
        intervals = find_intervals(subject_data['label'])
        
        subject_agg = {
            BASELINE : [],
            STRESS : [],
            AMUSEMENT : [],
            MEDITATION : []
        }
        
        for signal_type in CHEST_SIGNALS :
            if signal_type == 'ACC' :
                continue
                
            signal = np.array(subject_data['signal']['chest'][signal_type]).flatten()
            signal_features = extract_signal_features(signal, intervals, CHEST_SAMPLING_RATE)
            
            for label, features in signal_features.items() :
                subject_agg[label].append(features)
                
        for label in subject_agg :
            subject_agg[label] = np.stack(subject_agg[label], axis = 1)
            
        for label, feature_aggs in subject_agg.items() :
            for feature_agg in feature_aggs :
                agg_feature_dataset.append(feature_agg)
                agg_labels_dataset.append(label)

    return agg_feature_dataset, agg_labels_dataset

In [441]:
# Visualize Datasets
def visualize_dataset(features, labels) :
    features = np.array(features)
    amps = np.log10(features[:, :, 0].flatten())
    freqs  = features[:, :, 1].flatten()
    labels = np.repeat(np.array(labels), NUM_FEATURES)
    
    df = pd.DataFrame()
    df['Amp'] = amps
    df['Freq'] = freqs
    df['Label'] = labels
    
    for label, group in df.groupby('Label') :
        plt.figure(figsize=(20, 20))
        sns.scatterplot(x = 'Freq', y = 'Amp', data = group, palette='bright').set(title=label)
    
    plt.figure(figsize=(20, 20))
    sns.scatterplot(x = 'Freq', y = 'Amp', hue = 'Label', style='Label', data = df, palette='bright')

In [None]:
# Get ECG Test-Train Datasets

TRAIN_TEST_RATIO = 0.1

np.random.shuffle(SUBJECTS)
num_subjects = len(SUBJECTS)
k = int ((1 - TRAIN_TEST_RATIO) * num_subjects)

train_features, train_labels = get_chest_signal_dataset(SUBJECTS[0 : k], 'EDA')
test_features, test_labels = get_chest_signal_dataset(SUBJECTS[k : num_subjects], 'EDA')

train_features, test_features = normalize_fft_features(train_features, test_features)

In [493]:
# Get ECG Test-Train Datasets

TRAIN_TEST_RATIO = 0.1

np.random.shuffle(SUBJECTS)
num_subjects = len(SUBJECTS)
k = int ((1 - TRAIN_TEST_RATIO) * num_subjects)

train_features, train_labels = get_agg_chest_signal_dataset(SUBJECTS[0 : k])
test_features, test_labels = get_agg_chest_signal_dataset(SUBJECTS[k : num_subjects])
train_features, test_features = normalize_fft_features(train_features, test_features)

In [None]:
visualize_dataset(train_features, train_labels)

In [None]:
visualize_dataset(test_features, test_labels)

In [387]:
def encode_labels(labels) :
    encoder = {
        1 : [1, 0, 0, 0],
        2 : [0, 1, 0, 0],
        3 : [0, 0, 1, 0],
        4 : [0, 0, 0, 1]
    }
    
    return np.array([np.array(encoder[l]) for l in labels])

In [494]:
train_labels = encode_labels(train_labels)
test_labels = encode_labels(test_labels)

In [538]:
model = keras.Sequential([
    keras.layers.InputLayer(input_shape=(5, 10, 2)),
    keras.layers.Dropout(0.1),
    keras.layers.Conv2D(3, kernel_size=(2, 2), activation='swish'),
    keras.layers.AveragePooling2D(pool_size=(2, 2)),
    keras.layers.Dense(units=15,  activation='swish'),
    keras.layers.Dense(units=10,  activation='swish'),
    keras.layers.Conv2D(2, kernel_size=(1, 4), activation='swish'),
    keras.layers.Flatten(),
    keras.layers.Dense(units=15,  activation='swish'),
    keras.layers.Dense(units=8,  activation='swish'),
    keras.layers.Dense(units=6,  activation='swish'),
    keras.layers.Dense(units=6,  activation='swish'),
    keras.layers.Dense(units=4)
])

In [539]:
model.summary()

Model: "sequential_96"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout_122 (Dropout)        (None, 5, 10, 2)          0         
_________________________________________________________________
conv2d_139 (Conv2D)          (None, 4, 9, 3)           27        
_________________________________________________________________
average_pooling2d_45 (Averag (None, 2, 4, 3)           0         
_________________________________________________________________
dense_471 (Dense)            (None, 2, 4, 15)          60        
_________________________________________________________________
dense_472 (Dense)            (None, 2, 4, 10)          160       
_________________________________________________________________
conv2d_140 (Conv2D)          (None, 2, 1, 2)           82        
_________________________________________________________________
flatten_96 (Flatten)         (None, 4)               

In [540]:
model.compile(optimizer='adam', loss=tf.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

In [464]:
import datetime
! rm -rf ./logs/
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [423]:
%reload_ext tensorboard

In [424]:
%tensorboard --logdir logs/fit

In [447]:
print(np.shape(train_features), np.shape(train_labels), np.shape(test_features), np.shape(test_labels))

(7570, 5, 10, 2) (7570, 4) (1172, 5, 10, 2) (1172, 4)


In [552]:
# i = 0
# while True :
model.fit(train_features, train_labels, epochs=20, shuffle = True, verbose=1,
          validation_data=(test_features,  test_labels), 
          callbacks=[tensorboard_callback])
#     i += 1

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fe57c267220>

In [553]:
model.evaluate(test_features,  test_labels, verbose=2)

37/37 - 0s - loss: 0.4320 - accuracy: 0.8794


[0.43198782205581665, 0.8794143199920654]

In [None]:
# import pickle

train_features_file = open('train_features_file', 'wb')
test_features_file = open('test_features_file', 'wb')
train_labels_file = open('train_labels_file', 'wb')
test_labels_file = open('test_labels_file', 'wb')

pickle.dump(train_features, train_features_file)
pickle.dump(test_features, test_features_file)
pickle.dump(train_labels, train_labels_file)
pickle.dump(test_labels, test_labels_file)

train_features_file.close()
test_features_file.close()
train_labels_file.close()
test_labels_file.close()

In [None]:
print(len(train_features), len(test_features))

In [None]:
t1 = np.array([[[1, 9], [3, 4], [10, 12]], [[5, 6], [7, 8], [13, 11]]])
t2 = np.array([[2, 3], [3, 4]])

t3 = np.max(t1, axis=0)
print(t3)

In [None]:
np.stack([t1, t1], axis=1)

In [None]:
np.mean(t1, axis = 0)

In [None]:
np.std(t1, axis = 0)

In [None]:
d[1]

In [None]:
t1[:, :, 0].flatten()

In [None]:
a, b = zip(*train_dataset.items())

In [None]:
len(train_features)

In [None]:
SUBJECTS

In [None]:
train_features[10]

In [None]:
num_subjects