<a href="https://colab.research.google.com/github/supertime1/BP_PPG/blob/master/BP_PPG_CNN%2BLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#1.Introduction

This notebook trains an simple PPG DNN by using labeled PPG data from Afib_Data_Clean notebook;
The loaded data is 30s segemented PPG signals with 125Hz sampling rate.

#2.Setup Environment



In [1]:
from IPython.display import display
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext tensorboard
import numpy as np
import os
import shutil
import glob
import wfdb
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model 
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
import tensorflow_datasets as tfds
import multiprocessing
from datetime import datetime
import sklearn.metrics
import itertools
import io
import pickle
print(tf.__version__)

2.1.0


In [0]:
#run this cell to log device placement info
tf.debugging.set_log_device_placement(True)

In [0]:
from tensorflow.python.client import device_lib 
print(device_lib.list_local_devices())

In [0]:
tf.test.is_built_with_cuda()

True

In [0]:
tf.executing_eagerly()

True

#3.Data Pipeline

In [0]:
#load the data filename
train_data_dir = "D:/WFDB/matched/BP/Cleaned Data/train1/data*"
train_data_fn = glob.glob(train_data_dir)
train_label_dir = "D:/WFDB/matched/BP/Cleaned Data/train1/label*"
train_label_fn = glob.glob(train_label_dir)

In [0]:
val_data_dir = "D:/WFDB/matched/BP/Cleaned Data/validation1/data*"
val_data_fn = glob.glob(val_data_dir)
val_label_dir = "D:/WFDB/matched/BP/Cleaned Data/validation1/label*"
val_label_fn = glob.glob(val_label_dir)

In [0]:
#use generator to input data, since the data size(>160GB) is larger than memory size (64GB)
def train_data_generator():
  for i in range(len(train_data_fn)):
    data = pickle.load(open(train_data_fn[i],'rb'))
    yield data

In [0]:
def train_label_generator():
  for i in range(len(train_label_fn)):
    label = pickle.load(open(train_label_fn[i],'rb'))
    yield label

In [0]:
def val_data_generator():
  for i in range(len(val_data_fn)):
    data = pickle.load(open(val_data_fn[i],'rb'))
    yield data

In [0]:
def val_label_generator():
  for i in range(len(val_label_fn)):
    label = pickle.load(open(val_label_fn[i],'rb'))
    yield label

In [9]:
#calculate number of elements in training for later use in shuffle and model.fit
number_of_element = 0
for i in range(len(train_label_fn)):
  label = pickle.load(open(train_label_fn[i],'rb'))
  number_of_element += len(label)
print("There are in total", number_of_element, "in training dataset")

There are in total 2494543 in training dataset


In [10]:
#calculate number of elements in validation
number_of_val_element = 0
for i in range(len(val_label_fn)):
  label = pickle.load(open(val_label_fn[i],'rb'))
  number_of_val_element += len(label)
print("There are in total", number_of_val_element, "in validation dataset")

There are in total 277340 in validation dataset


In [0]:
#input the data by using generator and use flat_map to removing the 
#first dimension (number of elements) and flat all data
train_data = tf.data.Dataset.from_generator(train_data_generator,(tf.float32),output_shapes=[None,10,750,1])
train_label = tf.data.Dataset.from_generator(train_label_generator,(tf.float32),output_shapes=[None,2])
train_ds = train_data.flat_map(lambda x: train_data.from_tensor_slices(x))
train_lb = train_label.flat_map(lambda x: train_label.from_tensor_slices(x))
train = tf.data.Dataset.zip((train_ds,train_lb))

In [0]:
#do the same to validation
val_data = tf.data.Dataset.from_generator(val_data_generator,(tf.float32),output_shapes=[None,10,750,1])
val_label = tf.data.Dataset.from_generator(val_label_generator,(tf.float32),output_shapes=[None,2])
val_ds = val_data.flat_map(lambda x: val_data.from_tensor_slices(x))
val_lb = val_label.flat_map(lambda x: val_label.from_tensor_slices(x))
validation = tf.data.Dataset.zip((val_ds,val_lb))

In [0]:
batch_size = 256
train_dataset = train.cache(filename='D:/WFDB/matched/BP/Cleaned Data/cache')
train_dataset = train_dataset.shuffle(number_of_element//100).repeat().batch(batch_size,drop_remainder=True)
train_dataset = train_dataset.prefetch(buffer_size = tf.data.experimental.AUTOTUNE)
val_dataset = validation.repeat().batch(batch_size, drop_remainder=True)

#4.Train Model

##4.1 CNN + LSTM

In [14]:
from tensorflow.keras.layers import BatchNormalization
BatchNormalization._USE_V2_BEHAVIOR = False
#create CNN layers
cnn = tf.keras.Sequential([
    #1st Conv1D
    tf.keras.layers.Conv1D(8, 1, strides=1, 
                          activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling1D(pool_size=2,strides=2),
    tf.keras.layers.Dropout(0.2),
    #2nd Conv1D
    tf.keras.layers.Conv1D(16, 3, strides=1,
                          activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling1D(pool_size=2,strides=2),
    tf.keras.layers.Dropout(0.2),
    #3rd Conv1D
    tf.keras.layers.Conv1D(32, 3, strides=1,
                          activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling1D(pool_size=2,strides=2),
    tf.keras.layers.Dropout(0.2),
    #4th Conv1D
    tf.keras.layers.Conv1D(64, 3, strides=1,
                          activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling1D(pool_size=2,strides=2),
    tf.keras.layers.Dropout(0.2),
    #5th Conv1D
    tf.keras.layers.Conv1D(16, 1, strides=1,
                          activation='relu'),
    tf.keras.layers.BatchNormalization(),
    #Full connection layer
    tf.keras.layers.Flatten()
])

#combine with LSTM
model = tf.keras.Sequential([
        tf.keras.layers.TimeDistributed(cnn,input_shape=(10,750,1)),                   
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32,return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16)),
        tf.keras.layers.Dense(2)
])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed (TimeDistri (None, 10, 720)           9776      
_________________________________________________________________
bidirectional (Bidirectional (None, 10, 64)            192768    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 32)                10368     
_________________________________________________________________
dense (Dense)                (None, 2)                 66        
Total params: 212,978
Trainable params: 212,706
Non-trainable params: 272
_________________________________________________________________


##4.2 Define callbacks

###4.2.1 Learning rate scheduler

In [0]:
def decay(epoch):
  if epoch < 30:
    return 1e-3
  elif epoch >= 30 and epoch < 100:
    return 1e-4
  else:
    return 1e-5

In [0]:
#callback: schedule a learning rate incline iteration
lr_schedule = tf.keras.callbacks.LearningRateScheduler(decay)

###4.2.2 Tensorboard

In [0]:
#callback: tensorboard
log_dir=r"C:\Users\57lzhang.US04WW4008\Desktop\Blood pressure\BP data\logs\fit\\" + datetime.now().strftime("%Y%m%d-%H%M%S") +"CNN+LSTM+all"
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

###4.2.4 Checkpoint

In [0]:
#callback: checkpoint
filepath = r"C:\Users\57lzhang.US04WW4008\Desktop\Blood pressure\BP data\models\CNN+LSTM+all-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='auto')

##4.3 Train the model 

### 4.3.1 Start Training

In [20]:
#clear history if necessary
tf.keras.backend.clear_session()
#strategy = tf.distribute.MirroredStrategy(cross_device_ops=tf.distribute.HierarchicalCopyAllReduce()) ##to overwrite NCCL cross device communication as this is running in Windows
#with strategy.scope():

model = model

model.compile(optimizer=tf.keras.optimizers.Adam(), 
              loss='mse', 
              metrics=['mae'])

callbacks_list = [tensorboard_callback, checkpoint, lr_schedule]

#start training
model.fit(train_dataset,
          epochs=300,
          steps_per_epoch = number_of_element//batch_size,
          verbose=1,
          validation_data=val_dataset,
          validation_steps=number_of_val_element//batch_size,
          callbacks=callbacks_list
          )

Train for 9744 steps, validate for 1083 steps
Epoch 1/300
   1/9744 [..............................] - ETA: 13:07:44

KeyError: ignored

# 5.Model Evaluation

In [0]:
#os.chdir(r"C:\Users\57lzhang.US04WW4008\Desktop\Blood pressure\BP data\models")
#model = tf.keras.models.load_model('CNN+LSTM-285-10.2110.hdf5')

## 5.2 MAE

In [0]:
test_data_dir = "D:/WFDB/matched/BP/Cleaned Data/test/data*"
test_data_fn = glob.glob(test_data_dir)
test_label_dir = "D:/WFDB/matched/BP/Cleaned Data/test/label*"
test_label_fn = glob.glob(test_label_dir)

In [0]:
def test_data_generator():
  for i in range(len(test_data_fn)):
    data = pickle.load(open(test_data_fn[i],'rb'))
    yield data

In [0]:
bp_estimate=model.predict_generator(test_data_generator,steps=len(test_data_fn)/batch_size)

In [0]:
from sklearn.metrics import mean_absolute_error
sys_mae = mean_absolute_error(test_labels[:,0], bp_estimate[:,0])
dia_mae = mean_absolute_error(test_labels[:,1], bp_estimate[:,1])
print("Systolic MAE in test dataset:", round(sys_mae,1))
print("Diastolic MAE in test dataset:", round(dia_mae,1))