# QRS Detector

This notebook implements the actual QRS Detector. In this notebook there are two networks present, a fully conected feed forward network and a recurrent network, with the aim of comparing the two models and verifying which is more indicated for the type of data we have

In [0]:
# importing required libraries
import numpy as np
import tensorflow as tf
from tensorflow.python.keras import layers
import matplotlib.pyplot as plt
import pickle as pkl
import random
import math

In [0]:
from google.colab import drive
#drive.mount('/content/drive/')
from glob import glob

In [16]:
!ls drive/'My Drive'/'Colab Notebooks'/processed_data
main_path = 'drive/My Drive/Colab Notebooks/processed_data/'
weights_path = 'drive/My Drive/Colab Notebooks/network_weights/'

Test  Training


## Sampling Function

The objective is to retrieve a random cropping of both the signal and the target from all files specified for the training/validation set. This helps us have a more well-versed dataset for training and validating our models.

In [0]:
# auxiliary function
# extracts from a long np-array (2-rows) a (2-rows)-random segment with a fixed length (seqL*ninputs)
def selectFrom1ecg(ecgBdata, seqL, ninputs, feed_forward = True, training = True, testing = False):
    """
    x: An array with vairous files, channels and examples
    seqL:  number of timesteps to be used in recurrent nn
    ninput : is number of inputs in each timestep
    file_indexes: A list of the file indexes for training or validation set
    """
    segmentL  = seqL * ninputs
    numChan = 3
    
    if(training):
        random_file_idx = random.randint(0, 57)
    else:
        random_file_idx = random.randint(0, 9)
    
    if(testing):
        random_file_idx = random.randint(0, 48)
        
    inpOutSegment = tf.random_crop(ecgBdata[random_file_idx],[numChan, segmentL])
    
    if(feed_forward):
        channelII = inpOutSegment[0,:]
        channelV1 = inpOutSegment[1,:]
        target = inpOutSegment[2,:]
        inputs = tf.concat((channelII, channelV1), axis = -1)
        return inputs,target
    else:
        transposed = tf.transpose(inpOutSegment)
        
        inputs = transposed[:, :-1]
        target = transposed[:, -1]
        inputs = tf.reshape(inputs, (seqL, -1))
        
        # We need to re-transpose the target to turn int back into a one-row vector
        target = tf.transpose(target)
        
        return inputs, target

## Dataset array creation

In this section we create the main dataset array containing all the training files. Each file has two input signals (channelII and channelV1) and a target signal.

In [8]:
dataset_array = []

files_not_to_read = [4,17,35,44,57,72,74]
index_counter = 0
for i in range(1, 76):
    
    if i not in files_not_to_read:
        file_path = f"Training/I{i:02}"
        file_path = main_path + file_path
        print(file_path)
        file_data = pkl.load(open(file_path, "rb"))        
        index_counter = index_counter + 1
        
        info = [file_data["channelII"], file_data["channelV1"], file_data["label"]]
        info = np.array(info)
        info = info.astype(np.float32)
        dataset_array.append(info)

ecgs_array = np.array(dataset_array)

drive/My Drive/Colab Notebooks/processed_data/Training/I01
drive/My Drive/Colab Notebooks/processed_data/Training/I02
drive/My Drive/Colab Notebooks/processed_data/Training/I03
drive/My Drive/Colab Notebooks/processed_data/Training/I05
drive/My Drive/Colab Notebooks/processed_data/Training/I06
drive/My Drive/Colab Notebooks/processed_data/Training/I07
drive/My Drive/Colab Notebooks/processed_data/Training/I08
drive/My Drive/Colab Notebooks/processed_data/Training/I09
drive/My Drive/Colab Notebooks/processed_data/Training/I10
drive/My Drive/Colab Notebooks/processed_data/Training/I11
drive/My Drive/Colab Notebooks/processed_data/Training/I12
drive/My Drive/Colab Notebooks/processed_data/Training/I13
drive/My Drive/Colab Notebooks/processed_data/Training/I14
drive/My Drive/Colab Notebooks/processed_data/Training/I15
drive/My Drive/Colab Notebooks/processed_data/Training/I16
drive/My Drive/Colab Notebooks/processed_data/Training/I18
drive/My Drive/Colab Notebooks/processed_data/Training/I

In [9]:
#Just for testing
print(ecgs_array.shape)
print(ecgs_array[0].shape)
np.transpose(ecgs_array[0])[:, :-1].reshape(5400, -1).reshape(5400*240, )
print(np.transpose(ecgs_array[0])[:, :-1].reshape(5400, -1).reshape(5400*240, ).shape)

(68, 3, 648000)
(3, 648000)
(1296000,)


## Test dataset array creation

In this section we create the test dataset array containing all the test files. Each file has two input signals ("MLII","V1", which correspond to signals "II" and "V1" in the training dataset) and a target signal.

In [11]:
test_dataset_array = []

files_not_to_read = [110, 120, 204, 206, 211, 216, 218, 229]
files_not_to_read = files_not_to_read + list(range(125,200)) + list(range(224,228))

index_counter = 0
for i in range(100, 235):
    
    if i not in files_not_to_read:
        file_path = f"Test/{i}"
        file_path = main_path + file_path
        print(file_path)
        file_data = pkl.load(open(file_path, "rb"))        
        index_counter = index_counter + 1
        
        info = [file_data["channelII"], file_data["channelV1"], file_data["label"]]
        info = np.array(info)
        info = info.astype(np.float32)
        test_dataset_array.append(info)

test_ecgs_array = np.array(test_dataset_array)

# Test dataset length is not a multiple of 2*ninputs*seqL(rnn) which causes problems when we want to
# transpose the data as before, so we discard the last 2000 points 
# The test dataset will then have the same length as the training dataset
# Is this ok?
lenRecords = test_ecgs_array.shape[2]
print(lenRecords)
new_length = int(math.floor(lenRecords/5400))*5400
print(new_length)
test_ecgs_array = test_ecgs_array[:,:,:new_length]

drive/My Drive/Colab Notebooks/processed_data/Test/100
drive/My Drive/Colab Notebooks/processed_data/Test/101
drive/My Drive/Colab Notebooks/processed_data/Test/102
drive/My Drive/Colab Notebooks/processed_data/Test/103
drive/My Drive/Colab Notebooks/processed_data/Test/104
drive/My Drive/Colab Notebooks/processed_data/Test/105
drive/My Drive/Colab Notebooks/processed_data/Test/106
drive/My Drive/Colab Notebooks/processed_data/Test/107
drive/My Drive/Colab Notebooks/processed_data/Test/108
drive/My Drive/Colab Notebooks/processed_data/Test/109
drive/My Drive/Colab Notebooks/processed_data/Test/111
drive/My Drive/Colab Notebooks/processed_data/Test/112
drive/My Drive/Colab Notebooks/processed_data/Test/113
drive/My Drive/Colab Notebooks/processed_data/Test/114
drive/My Drive/Colab Notebooks/processed_data/Test/115
drive/My Drive/Colab Notebooks/processed_data/Test/116
drive/My Drive/Colab Notebooks/processed_data/Test/117
drive/My Drive/Colab Notebooks/processed_data/Test/118
drive/My D

In [12]:
#Just for testing 
print(test_ecgs_array.shape)
print(test_ecgs_array[0].shape)
# Reshaping would not work if the length of the records was not a multiple of 2*ninputs*seqL(rnn)
np.transpose(test_ecgs_array[0])[:, :-1].reshape(5400, -1).reshape(5400*240, )

(48, 3, 648000)
(3, 648000)


array([-0.01313889,  0.00847222, -0.012125  , ..., -0.03984722,
       -0.07179166, -0.05447222], dtype=float32)

In [13]:
# number of examples
N = ecgs_array.shape[2]

# Sampling frequency
fs = 360

# For each timestep we give ninputs
ninputs = int(0.2*fs)

# Sequence length (number of timesteps)
seqL = int((5 * 360)/ninputs) # Using a 5 second window sequence

print('ninputs = ',ninputs)
print('seqL = ',seqL)
print('ninputs*seqL = ',ninputs*seqL)

# training data for feed forward network
# Create efficient training sequences
trainData = tf.data.Dataset.from_tensors(ecgs_array[:len(ecgs_array) - 10, :, :])
trainData = trainData.map(lambda x:  selectFrom1ecg(x, seqL, ninputs, training = True))
trainData = trainData.repeat()  # Repeat the input indefinitely.
batchSize = 8
trainData = trainData.batch(batchSize)

valData = tf.data.Dataset.from_tensors(ecgs_array[len(ecgs_array) - 10:, :, :])
valData = valData.map(lambda x:  selectFrom1ecg(x, seqL, ninputs, training = False))
valData = valData.repeat()  # Repeat the input indefinitely.
batchSize = 8
valData = valData.batch(batchSize)

# test data for feed forward network (here we don't need to leave out the 10 files)
testData = tf.data.Dataset.from_tensors(test_ecgs_array)
testData = testData.map(lambda x:  selectFrom1ecg(x, seqL, ninputs, training = False, testing = True))
testData = testData.repeat()  # Repeat the input indefinitely.
batchSize = 8
testData = testData.batch(batchSize)


# Creating Training and Validation datasets with the correct shape for a Recurrent neural network
# The sequence length for the recurrent neural network can be about 3 times greater than for the feed
# forward neural net
seql_rnn = 3 * seqL

print('ninputs*seqL(rnn) = ',ninputs*seql_rnn)

trainData_rnn = tf.data.Dataset.from_tensors(ecgs_array[:len(ecgs_array) - 10, :, :])
trainData_rnn = trainData_rnn.map(lambda x:  selectFrom1ecg(x, seql_rnn, ninputs, training = True, feed_forward = False))
trainData_rnn = trainData_rnn.repeat()  # Repeat the input indefinitely.
batchSize_rnn = 8
trainData_rnn = trainData_rnn.batch(batchSize_rnn)

valData_rnn = tf.data.Dataset.from_tensors(ecgs_array[len(ecgs_array) - 10:, :, :])
valData_rnn = valData_rnn.map(lambda x:  selectFrom1ecg(x, seql_rnn, ninputs, training = False, feed_forward = False))
valData_rnn = valData_rnn.repeat()  # Repeat the input indefinitely.
batchSize_rnn = 8
valData_rnn = valData_rnn.batch(batchSize_rnn)


# test data for feed forward network (here we don't need to leave out the 10 files)
testData_rnn = tf.data.Dataset.from_tensors(test_ecgs_array)
testData_rnn = testData_rnn.map(lambda x:  selectFrom1ecg(x, seql_rnn, ninputs, training = False, testing = True))
testData_rnn = testData_rnn.repeat()  # Repeat the input indefinitely.
batchSize = 8
testData_rnn = testData_rnn.batch(batchSize)

ninputs =  72
seqL =  25
ninputs*seqL =  1800
Instructions for updating:
Colocations handled automatically by placer.
ninputs*seqL(rnn) =  5400


In [14]:
test_ecgs_array.shape

(48, 3, 648000)

## Recurrent neural network

In [15]:
numLstmUnits = 320

rnnModel = tf.keras.Sequential()
rnnModel.add(layers.CuDNNLSTM(units=numLstmUnits, return_sequences=True, input_shape = (seql_rnn, 2 * ninputs)))         
rnnModel.add(layers.CuDNNLSTM(units=numLstmUnits, return_sequences=True))
rnnModel.add(layers.CuDNNLSTM(units=numLstmUnits, return_sequences=True))         
rnnModel.add(layers.CuDNNLSTM(units=numLstmUnits, return_sequences=True))
rnnModel.add(layers.TimeDistributed(layers.Dense(ninputs)))
rnnModel.add(layers.Reshape((seql_rnn * ninputs, )))

rnnModel.compile(optimizer=tf.train.RMSPropOptimizer(0.001), loss='MSE',metrics=['mae'])
rnnModel.fit(trainData_rnn,  epochs=10, steps_per_epoch=1000, validation_data=valData_rnn, validation_steps=100)

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f721a075240>

In [0]:
# Saving the network weights
weights_file_path = weights_path + 'RNN_weights.h5'
rnnModel.save_weights(weights_file_path)

In [18]:
out = rnnModel.evaluate(testData_rnn, steps=100)
print('test mean square error (loss): ', out[0], '  test absolute error: ', out[1])
iterator = trainData.make_initializable_iterator()
next_element = iterator.get_next()
#with tf.Session() as sess:
#    sess.run(iterator.initializer)
#    inp, targ = sess.run(next_element)

#output = rnnModel.predict(inp)

#t = range(seql_rnn*ninputs)
#plt.plot(t,inp[0,:],'k',t,targ[0]-2,'r',t,output[0]-2,'b')
#plt.show()

ValueError: ignored

In [18]:
testData_rnn

<DatasetV1Adapter shapes: ((?, 10800), (?, 5400)), types: (tf.float32, tf.float32)>

## Feedforward network

In [0]:
ffwdModel = tf.keras.Sequential()
ffwdModel.add(layers.Dense(64, activation='relu',input_shape=(2*seqL*ninputs,)))
ffwdModel.add(layers.Dense(64, activation='relu'))
ffwdModel.add(layers.Dense(seqL*ninputs))

ffwdModel.compile(optimizer=tf.train.RMSPropOptimizer(0.001), loss='MSE',metrics=['mae'])
ffwdModel.fit(trainData,  epochs=10, steps_per_epoch=1000, validation_data=valData, validation_steps=100)