## Deep Learning Pipeline for Random cross validation

### Before you run next block, please make sure you download the Waveform Data folder from Rice Box. Note you only need to download the patient folders which have labelled events associated with them (to save space), which are currently patients: 1, 2, 3, 4, 7 , 8, 13, 14, 15, 16, 17, 18, 19, 20, 22. However make sure to keep all patients within their own folder, and keep all patients together in a Waveform Data folder.

### Also, make sure to download the Labelled_Events.xlsx file from the GitHub repo. Save both of these to a place where the local version of this notebook has access, and make sure you know the local paths.

### Please also download ECG_feature_extraction.py, ECG_preprocessing.py, PPG_preprocessing.py, data_generator.py and CNN_models.py . The detailed information about these .py files can be found in readme file.

### Make sure all of you have all installed packages and they are up to date using the requirements.txt file (pip install -r requirements.txt).



In [1]:



import h5py
import pywt
import numpy as np
import os
import random
from glob import glob
# from sklearn.model_selection import train_test_split
from ECG_feature_extraction import *
from ECG_preprocessing import *
from PPG_preprocessing import *
from os import listdir
import pandas as pd

## loading cwt images for later training deep learning model


Three parameters need to be provided for this section:

patient_folder_path: the local path of the folder containing all Waveform Data (folder containing folders for each patient)

excel_file_path: the local path Labelled__Events.xlsx file

save_path: the path of folder where you want to save the "cwt images" (these will be used for modelling).
    

In [2]:


def load_event_cwt_images(save_path,patient_folder_path,excel_file_path,excel_sheet_name='PJ',fs=240):
    '''
    load cwt features 
    input:
        save_path: it is the folder path to save these np.array files 
        patient_folder_path: it is the folder containing different patients data
        excel_file_path: the path for labelled event excel
        excel_sheet_name: it is the labelled event that you plan to work with. Basically save the same events into a folder call the same name as the excel_sheet_name
        fs: sampling frequncy

    output:
        no return value 
        but you can check the saved file based on your save_path 
    '''
    labelevent = pd.read_excel(excel_file_path,sheet_name=excel_sheet_name)
    count = 1
    # save_path = save_path+excel_sheet_name+'/'

    for _,record in labelevent.iterrows():

        label_record = record.tolist()
        patient_id,event_start_time,event_end_time = label_record
        patient_file_path = patient_folder_path+'/'+str(int(patient_id))


        for block_file in listdir(patient_file_path):

            # trying to find the ecg signal and ppg signal during the label event time
            block_path = patient_file_path+'/'+block_file
            all_signals = h5py.File(block_path, 'r')
            signals_keys = set(all_signals.keys())
            block_start_time,block_end_time = all_signals['time'][0],all_signals['time'][-1]
            if block_start_time <= event_start_time <= event_end_time <= block_end_time:
                start_index = int((event_start_time-block_start_time)*fs)
                end_index = int((event_end_time-block_start_time)*fs)

                #event_time = all_signals['time'][start_index:end_index +1]
                ecg, ppg = None, None
                if 'GE_WAVE_ECG_2_ID' in signals_keys:
                    ecg = all_signals['GE_WAVE_ECG_2_ID'][start_index:end_index +1]
                if 'GE_WAVE_SPO2_WAVE_ID' in signals_keys:
                    ppg = all_signals['GE_WAVE_SPO2_WAVE_ID'][start_index:end_index +1]
                # print("loaded ppg: ", ppg)

                if ppg is None or ecg is None: continue
                # ECG signal preprocessing for denoising and R-peak detection
                R_peak_index,ecg_denoise = ecg_preprocessing_final(ecg)  # the location of R_peak during the label event
                ppg_denoise = PPG_denoising(ppg)
                ## extract cwt features for ecg signal and ppg signal
                ecg_cwt = compute_cwt_features(ecg_denoise,R_peak_index,scales = np.arange(1,129),windowL=-240,windowR=240,wavelet = 'morl')
                ppg_cwt = compute_cwt_features(ppg_denoise,R_peak_index,scales = np.arange(1,129),windowL=-240,windowR=240,wavelet = 'coif')

                if len(ecg_cwt)!=len(ppg_cwt): 
                    raise Exception("The beat length is not correct!!! Please check!")
                if not ecg_cwt or not ppg_cwt: continue

                for i in range(len(ecg_cwt)):
                    combined = np.stack((ecg_cwt[i],ppg_cwt[i]),axis=-1)
                    np.save(save_path+str(count)+'_'+excel_sheet_name,combined)
                    # temp = ecg_cwt[i]
                    # temp = np.reshape(temp,(128,480,1))
                    # np.save(save_path+str(count)+'_'+excel_sheet_name,temp)
                    count+=1

    return





def load_cwt_files(patient_folder_path,excel_file_path,save_path,label_type= ['PJ','PJRP','PO','PP','PS','PVC']):

    '''
    Implements function load_event_cwt_images to generate cwt features and then save into a specific folder
    Arguments:
        patient_folder_path: the path of the folder which save the patients' waveforms
        excel_file_path: the path of the excel file which contains the label events
        save_path: the folder path to save cwt features
        label_types: a default list containing labels

    Returns:
    no return

    '''

    for label in label_type:
        load_event_cwt_images(save_path,patient_folder_path,excel_file_path,excel_sheet_name=label)


############# you should modify this line to change these respective paths based on the instructions ##############################################

load_cwt_files(patient_folder_path='I:/COMP549/data',excel_file_path='I:/COMP549/events/Labelled_Events.xlsx',save_path='I:/COMP549/cwt_features_images_ecg/')


After detrend before wavelet:  [146.67326 167.97446 173.65193 ... 172.30809 172.17563 170.23831]


KeyboardInterrupt: 

## set up the library for deep learning 
if any error generated at this step, please update the required libraries


In [2]:
import time
import os

#from data_generator import get_train_valid_generator
#from losses import make_loss, dice_coef_clipped, binary_crossentropy, dice_coef, ceneterline_loss
import tensorflow as tf
import time
#import matplotlib.pyplot as plt
# -------------------------- set gpu using tf ---------------------------
# import tensorflow as tf
# import time
# config = tf.ConfigProto()
# config.gpu_options.allow_growth = True
# session = tf.Session(config=config)
# -------------------  start importing keras module ---------------------
from keras.callbacks import (ModelCheckpoint, CSVLogger, TensorBoard, EarlyStopping)
# import tensorflow.keras.backend.tensorflow_backend as K
from keras.optimizers import Adam
from CNN_models import *

import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'keras'

##  Train the deep learning model

### Please provide the following information:

EPOCHS: epoch number for training 

BATCH_SIZE: batch size for training

DATA_DIR: the path of folder containing "cwt images"

LOG_DIR: the path of folder you would like to save the training log

VAL_SIZE: the percentage of test dataset

In [4]:

##############################Please modify this part      ###############################################
EPOCHS = 20
BATCH_SIZE = 16#8
DATA_DIR = 'I:/COMP549/cwt_features_images_ecg' #I:/COMP549/cwt_features_images'
LOG_DIR = "./log"
VAL_SIZE = 0.15


#########################################################################################################



def summarize_diagnostics(history):
    # you could use this function to plot the result 
    fig, ax = plt.subplots(1,2, figsize=(20, 10))
    # plot loss
    ax[0].set_title('Loss Curves', fontsize=20)
    ax[0].plot(history.history['loss'], label='train')
    ax[0].plot(history.history['val_loss'], label='test')
    ax[0].set_xlabel('Epochs', fontsize=15)
    ax[0].set_ylabel('Loss', fontsize=15)
    ax[0].legend(fontsize=15)
    # plot accuracy
    ax[1].set_title('Classification Accuracy', fontsize=20)
    ax[1].plot(history.history['accuracy'], label='train')
    ax[1].plot(history.history['val_accuracy'], label='test')
    ax[1].set_xlabel('Epochs', fontsize=15)
    ax[1].set_ylabel('Accuracy', fontsize=15)
    ax[1].legend(fontsize=15)

def train():
    model = twoLayerCNN(input_size=(32,120,2))
    #model = VGG(input_shape=(128,480,2))
    model.summary()
#    model.load_weights(pre_model_path)
    # model.compile(optimizer=Adam(lr=3e-4), loss=make_loss('bce_dice'),
    #               metrics=[dice_coef, binary_crossentropy, ceneterline_loss, dice_coef_clipped])
    model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer= Adam(lr=3e-5),
              metrics=['accuracy'])
    print("got twolayerCNN")
    model_name = 'twolayerCNN_ecg-{}'.format(int(time.time()))

    if not os.path.exists("./results/"):
        os.mkdir('./results')
    if not os.path.exists("./weights/"):
        os.mkdir('./weights')
    save_model_weights = "./weights/ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.hdf5"
    print('Fitting model...')
    start_time = time.time()
    tensorboard = TensorBoard(log_dir = LOG_DIR, write_images=True)
    earlystop = tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=3, verbose=1, mode='min')
    checkpoint = tf.keras.callbacks.ModelCheckpoint(save_model_weights,
                            monitor="val_loss",
                            mode = "min",
                            verbose=1,
                            save_best_only=True,
                            save_weights_only=True)

    csv_logger = CSVLogger('./results/{}_train.log'.format(model_name))
    train_gen, valid_gen, num_train, num_valid = get_train_valid_generator(data_dir=DATA_DIR,batch_size=BATCH_SIZE,val_size = VAL_SIZE)
    history = model.fit(x = train_gen, 
                        validation_data=valid_gen,
                        epochs=EPOCHS,
                        steps_per_epoch=(num_train+BATCH_SIZE-1)//BATCH_SIZE,
                        validation_steps=(num_valid+BATCH_SIZE-1)//BATCH_SIZE,
                        callbacks=[earlystop, checkpoint, tensorboard, csv_logger])

    end_time = time.time()
    print("Training time(h):", (end_time - start_time) / 3600)
    summarize_diagnostics(history)

if __name__ == "__main__":
    train()


conv1 shape : (None, 32, 120, 32)
conv2 shape:  (None, 16, 60, 64)
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 120, 2)]      0         
_________________________________________________________________
conv2d (Conv2D)              (None, 32, 120, 32)       608       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 60, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 16, 60, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 60, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 30, 64)         0         
_____________________________________________________

KeyboardInterrupt: 