<a href="https://colab.research.google.com/github/supertime1/Afib_PPG/blob/master/Afib_ECG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Introduction

This notebook trains an ECG DNN by using labeled ECG data from "The PhysioNet Computing in Cardiology Challenge 2017" (https://physionet.org/content/challenge-2017/1.0.0/). The ECG DNN model will be used to label the ECG data from MIMIC-III waveform dataset, so as the concurrent PPG data.

The ECG data used in training and validation has the following important attributes:
*   sampling frequency: 300Hz
*   4 lables: Normal (N), AF (A), Other rhythm (O), Noisy (~)
*   length: 9 - 60s with 30s mean.
*   preprosessed: data has been band pass filtered by AliveCor device


Only time length >30s is used in training, since PPG data usually requires 30s for Afib detection.

#Import Data and Label

In [0]:
!pip install tensorflow-gpu==2.0.0

In [0]:
!pip install wfdb

In [0]:
from IPython.display import display
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import os
import shutil
import glob
import wfdb
import tensorflow as tf

In [0]:
hd_names = []
for name in glob.glob("C:/Users/57lzhang.US04WW4008/Desktop/Afib/Afib_ECG data/training2017/*.hea"):#'/content/drive/My Drive/training2017/*.hea'): 
  position = name.index('.hea')
  name = name[0:position] #remove the .hea part to comply with the wfdb.rdrecord format
  hd_names.append(name)
len(hd_names)

In [0]:
qualified_names = [] #a list of file names that contain ECG Lead I data
for name in hd_names:
  record = wfdb.rdheader(name)
  if record.sig_len >= 9000: #extact only records contrains ECG lead I >30s
    qualified_names.append(name)
print('There are total', len(qualified_names), 'qualified records')

In [0]:
#load label numpy file
df = pd.read_csv(r'C:\Users\57lzhang.US04WW4008\Desktop\Afib\Afib_ECG data\training2017\REFERENCE.csv', sep=',', header=None) #'/content/drive/My Drive/training2017/REFERENCE.csv',sep=',', header=None)#
#create a new name list that only stores the file name
new_name =[]
for name in qualified_names:
  new_name.append(name[-6:])
len(new_name)

init_labels = df[df[0].isin(new_name)][1].to_numpy()

In [0]:
##read signals
ECG_signals = [] #create a  list to store all  ECG signals
for name in qualified_names:
  record = wfdb.rdrecord(name)
  ECG_signals.append(record.p_signal)

print('ECG signals len:', len(ECG_signals))

In [0]:
ECG_signals[0].shape

In [0]:
#A function to split the raw signal data with 30s per segment and keep the label
##source is the raw signal (e.g. ECG_signals) and seg_len = 30s * 300Hz = 9000
def generate_segment_data(source,init_labels,seg_len):
  n=0
  signals =[]
  labels = []
  for signal in source:
    for i in range(int(len(signal)/seg_len)):
      seg = signal[seg_len*i:seg_len*(i+1)]
      label = init_labels[n]
      signals.append(seg)
      labels.append(label)
  n+=1
  return signals,labels

In [0]:
#test generate_segment_data()
signals, labels = generate_segment_data(ECG_signals,init_labels,9000)

In [0]:
signals = list(map(lambda x: np.reshape(x,9000),signals))
labels = list(map(lambda x: np.reshape(x,1),labels))
print('signals dim:',np.asarray(signals).shape)
print('labels dim:',np.asarray(labels).shape)

In [0]:
dataset = tf.data.Dataset.from_tensor_slices((signals,labels))

In [0]:
#create training, validation and test dataset

#Transfer Learning

In [0]:
import tensorflow as tf
from tensorflow.keras import layers
import tensorflow_datasets as tfds
split = (80, 10, 10)
splits = tfds.Split.TRAIN.subsplit(weighted=split)
(cat_train, cat_valid, cat_test), info = tfds.load('cats_vs_dogs', split=list(splits), with_info=True, as_supervised=True)

In [0]:
IMAGE_SIZE = 100
def pre_process_image(image, label):
  image = tf.cast(image, tf.float32)
  image = image / 255.0
  image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
  return image, label

In [0]:
TRAIN_BATCH_SIZE = 64
cat_train = cat_train.map(pre_process_image).shuffle(1000).repeat().batch(TRAIN_BATCH_SIZE)
cat_valid = cat_valid.map(pre_process_image).repeat().batch(1000)

In [0]:
IMG_SHAPE = (IMAGE_SIZE, IMAGE_SIZE, 3)
res_net = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=IMG_SHAPE)

In [0]:
res_net.trainable = False

In [0]:
global_average_layer = layers.GlobalAveragePooling2D()
output_layer = layers.Dense(1, activation='sigmoid')
tl_model = tf.keras.Sequential([
  res_net,
  global_average_layer,
  output_layer
]

In [0]:
tl_model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='binary_crossentropy',
              metrics=['accuracy'])
callbacks = [tf.keras.callbacks.TensorBoard(log_dir='./log/transer_learning_model', update_freq='batch')]
tl_model.fit(cat_train, steps_per_epoch = 23262//TRAIN_BATCH_SIZE, epochs=7, 
             validation_data=cat_valid, validation_steps=10, callbacks=callbacks)

#Create Model