<a href="https://colab.research.google.com/github/supertime1/BP_PPG/blob/master/BP_PPG_CNN%2BLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#1.Introduction

This notebook trains an simple PPG DNN by using labeled PPG data from Afib_Data_Clean notebook;
The loaded data is 30s segemented PPG signals with 125Hz sampling rate.

#2.Setup Environment



In [1]:
from IPython.display import display
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext tensorboard
import numpy as np
import os
import shutil
import glob
import wfdb
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model 
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
import tensorflow_datasets as tfds
import multiprocessing
from datetime import datetime
import sklearn.metrics
import itertools
import io
import pickle
print(tf.__version__)

2.1.0


In [0]:
#run this cell to log device placement info
tf.debugging.set_log_device_placement(True)

In [0]:
from tensorflow.python.client import device_lib 
print(device_lib.list_local_devices())

In [4]:
tf.test.is_built_with_cuda()

True

#3.Data Pipeline

# 3.1 Input Data

In [0]:
train_data_dir = "D:/WFDB/matched/BP/Cleaned Data/train/train_BP_data*"
train_data_fn = glob.glob(train_data_dir)
train_label_dir = "D:/WFDB/matched/BP/Cleaned Data/train/train_BP_label*"
train_label_fn = glob.glob(train_label_dir)

In [0]:
val_data_dir = "D:/WFDB/matched/BP/Cleaned Data/validation/val_BP_data*"
val_data_fn = glob.glob(val_data_dir)
val_label_dir = "D:/WFDB/matched/BP/Cleaned Data/validation/val_BP_label*"
val_label_fn = glob.glob(val_label_dir)

## 3.2 Extract, Transform and Load data

###3.2.1 Parallelize Extraction

In [0]:
train_data = tf.data.Dataset.from_tensor_slices(train_data_fn)
#train_data = train_data.interleave(lambda x: tf.data.TFRecordDataset(x), cycle_length=4, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [0]:
train_label = tf.data.Dataset.from_tensor_slices(train_label_fn)
#train_label = train_label.interleave(lambda x: tf.data.TFRecordDataset(x), cycle_length=4, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [0]:
train = tf.data.Dataset.zip((train_data,train_label))

In [0]:
val_data = tf.data.Dataset.from_tensor_slices(val_data_fn)
#val_data = val_data.interleave(lambda x: tf.data.TFRecordDataset(x), cycle_length=4, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [0]:
val_label = tf.data.Dataset.from_tensor_slices(val_label_fn)
#val_label = val_label.interleave(lambda x: tf.data.TFRecordDataset(x), cycle_length=4, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [0]:
validation = tf.data.Dataset.zip((val_data,val_label))

### 3.2.2 Parallelize Transformation


In [0]:
def read_file(filenames,labels):
  raw_signals = tf.io.read_file(filenames)
  label = tf.io.read_file(labels)
  return raw_signals,label

In [0]:
#don't need to run this cell, just as a placeholder
cores = multiprocessing.cpu_count()
train = train.map(read_file, num_parallel_calls = cores)
validation = validation.map(read_file, num_parallel_calls = cores)

### 3.2.3 Parallelize Loading

In [0]:
batch_size = 32
train_dataset = train.cache(filename='cache')
train_dataset = train_dataset.shuffle(100000).repeat().batch(batch_size,drop_remainder=True)
train_dataset = train_dataset.prefetch(buffer_size = tf.data.experimental.AUTOTUNE)
val_dataset = validation.repeat().batch(batch_size, drop_remainder=True)

#4.Train Model

##4.1 CNN + LSTM

In [24]:
from tensorflow.keras.layers import BatchNormalization
BatchNormalization._USE_V2_BEHAVIOR = False
#create CNN layers
cnn = tf.keras.Sequential([
    #1st Conv1D
    tf.keras.layers.Conv1D(8, 1, strides=1, 
                          activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling1D(pool_size=2,strides=2),
    tf.keras.layers.Dropout(0.2),
    #2nd Conv1D
    tf.keras.layers.Conv1D(16, 3, strides=1,
                          activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling1D(pool_size=2,strides=2),
    tf.keras.layers.Dropout(0.2),
    #3rd Conv1D
    tf.keras.layers.Conv1D(32, 3, strides=1,
                          activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling1D(pool_size=2,strides=2),
    tf.keras.layers.Dropout(0.2),
    #4th Conv1D
    tf.keras.layers.Conv1D(64, 3, strides=1,
                          activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling1D(pool_size=2,strides=2),
    tf.keras.layers.Dropout(0.2),
    #5th Conv1D
    tf.keras.layers.Conv1D(16, 1, strides=1,
                          activation='relu'),
    tf.keras.layers.BatchNormalization(),
    #Full connection layer
    tf.keras.layers.Flatten()
])

#combine with LSTM
model = tf.keras.Sequential([
        tf.keras.layers.TimeDistributed(cnn,input_shape=(10,750,1)),                   
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32,return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16)),
        tf.keras.layers.Dense(2)
])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed (TimeDistri (None, 10, 720)           9776      
_________________________________________________________________
bidirectional (Bidirectional (None, 10, 64)            192768    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 32)                10368     
_________________________________________________________________
dense (Dense)                (None, 2)                 66        
Total params: 212,978
Trainable params: 212,706
Non-trainable params: 272
_________________________________________________________________


##4.2 Define callbacks

###4.2.1 Learning rate scheduler

In [0]:
def decay(epoch):
  if epoch < 50:
    return 1e-3
  elif epoch >= 50 and epoch < 200:
    return 1e-4
  else:
    return 1e-5

In [0]:
#callback: schedule a learning rate incline iteration
lr_schedule = tf.keras.callbacks.LearningRateScheduler(decay)

###4.2.2 Tensorboard

In [0]:
#callback: tensorboard
log_dir=r"C:\Users\57lzhang.US04WW4008\Desktop\Blood pressure\BP data\logs\fit\\" + datetime.now().strftime("%Y%m%d-%H%M%S") +"CNN+LSTM"
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

###4.2.4 Checkpoint

In [0]:
#callback: checkpoint
filepath = r"C:\Users\57lzhang.US04WW4008\Desktop\Blood pressure\BP data\models\CNN+LSTM-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='auto')

##4.3 Train the model 

### 4.3.1 Start Training

In [30]:
#clear history if necessary
tf.keras.backend.clear_session()
#strategy = tf.distribute.MirroredStrategy(cross_device_ops=tf.distribute.HierarchicalCopyAllReduce()) ##to overwrite NCCL cross device communication as this is running in Windows
#with strategy.scope():

model = model

model.compile(optimizer=tf.keras.optimizers.Adam(), 
              loss='mse', 
              metrics=['mae'])

callbacks_list = [tensorboard_callback, checkpoint, lr_schedule]

#start training
model.fit(train_dataset,
          epochs=300,
          verbose=1,
          validation_data=val_dataset,
          callbacks=callbacks_list
          )

ValueError: ignored

## 4.4 Save Model for future evaluation

In [0]:
os.chdir(r"C:\Users\57lzhang.US04WW4008\Desktop\Afib\Afib_ECG data")
model.save('Deep_PPG_CNN_041120.h5')
print("Save model to disk")

Save model to disk


# 5.Model Evaluation

## 5.1 Load saved model

In [0]:
os.chdir(r"C:\Users\57lzhang.US04WW4008\Desktop\Blood pressure\BP data\models")
model = tf.keras.models.load_model('CNN+LSTM-285-10.2110.hdf5')

## 5.2 MAE

In [0]:
bp_estimate=model.predict(test_dataset)

In [0]:
from sklearn.metrics import mean_absolute_error
sys_mae = mean_absolute_error(test_labels[:,0], bp_estimate[:,0])
dia_mae = mean_absolute_error(test_labels[:,1], bp_estimate[:,1])
print("Systolic MAE in test dataset:", round(sys_mae,1))
print("Diastolic MAE in test dataset:", round(dia_mae,1))

Systolic MAE in test dataset: 11.2
Diastolic MAE in test dataset: 6.2
