In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import scipy.io
import gc
import itertools
from sklearn.metrics import confusion_matrix
import sys
sys.path.insert(0, './preparation')
import os

# Keras imports
import keras
from keras.models import Model
from keras.layers import Input, Conv1D, Dense, Flatten, Dropout,MaxPooling1D, Activation, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.utils import plot_model
from keras import backend as K
from keras.callbacks import Callback,warnings, ReduceLROnPlateau

Using TensorFlow backend.


In [2]:
path = '/home/ubuntu/projects/ecg/'
data_path = path + 'data/train/training/'

In [3]:
print("Loading data training set")        
matfile = scipy.io.loadmat(data_path+'TrainData1000.mat')

Loading data training set


In [None]:
matfile

In [4]:
def loaddata(WINDOW_SIZE):    
    '''
        Load training/test data into workspace
        
        This function assumes you have downloaded and padded/truncated the 
        training set into a local file named "trainingset.mat". This file should 
        contain the following structures:
            - trainset: NxM matrix of N ECG segments with length M
            - traintarget: Nx4 matrix of coded labels where each column contains
            one in case it matches ['A', 'N', 'O', '~'].
        
    '''
    print("Loading data training set")        
    matfile = scipy.io.loadmat(data_path+'TrainData13000.mat')
    X = matfile['trainData']
    y = matfile['trainLabels']
    
    # Merging datasets    
    # Case other sets are available, load them then concatenate
    #y = np.concatenate((traintarget,augtarget),axis=0)     
    #X = np.concatenate((trainset,augset),axis=0)     

    X =  X[:,0:WINDOW_SIZE] 
    return (X, y)

In [5]:
def ResNet_model(WINDOW_SIZE):
    #parameters based on from Rajpurkar et al
    INPUT_FEAT = 1
    OUTPUT_CLASS = 2 #normal/abnormal: 

    k = 1
    p = True #pooling alternates every block
    convfilt = 64
    convstr = 1
    kern_size = 16
    poolsize = 2
    poolstr  = 2
    drop = 0.9
    
    input1 = Input(shape=(WINDOW_SIZE,INPUT_FEAT))
    
    #initial convblock
    x = Conv1D(filters=convfilt,
               kernel_size=kern_size,
               padding='same',
               strides=convstr,
               kernel_initializer='he_normal')(input1)                
    x = BatchNormalization()(x)        
    x = Activation('relu')(x)  
    
    #convblock 2 and branches:
    #main sequence
    x1 =  Conv1D(filters=convfilt,
               kernel_size=kern_size,
               padding='same',
               strides=convstr,
               kernel_initializer='he_normal')(x)      
    x1 = BatchNormalization()(x1)    
    x1 = Activation('relu')(x1)
    x1 = Dropout(drop)(x1)
    x1 =  Conv1D(filters=convfilt,
               kernel_size=kern_size,
               padding='same',
               strides=convstr,
               kernel_initializer='he_normal')(x1)
    x1 = MaxPooling1D(pool_size=poolsize,
                      strides=poolstr)(x1)
    #shortcut
    x2 = MaxPooling1D(pool_size=poolsize,
                      strides=poolstr)(x)
    #combine both
    x = keras.layers.add([x1, x2])
    del x1,x2
    
    #repeated convblock creation
    p = not p 
    for l in range(15):
        
        if (l%4 == 0) and (l>0):
            k += 1
            xshort = Conv1D(filters=convfilt*k,kernel_size=1)(x)
        else:
            xshort = x        
        #main section       
        x1 = BatchNormalization()(x)
        x1 = Activation('relu')(x1)
        x1 = Dropout(drop)(x1)
        x1 =  Conv1D(filters=convfilt*k,
               kernel_size=kern_size,
               padding='same',
               strides=convstr,
               kernel_initializer='he_normal')(x1)        
        x1 = BatchNormalization()(x1)
        x1 = Activation('relu')(x1)
        x1 = Dropout(drop)(x1)
        x1 =  Conv1D(filters=convfilt*k,
               kernel_size=kern_size,
               padding='same',
               strides=convstr,
               kernel_initializer='he_normal')(x1)        
        if p:
            x1 = MaxPooling1D(pool_size=poolsize,strides=poolstr)(x1)                

        #shortcut for ResNet
        if p:
            x2 = MaxPooling1D(pool_size=poolsize,strides=poolstr)(xshort)
        else:
            x2 = xshort       
        # combine sides
        x = keras.layers.add([x1, x2])
        #to pool or not to pool
        p = not p 

    x = BatchNormalization()(x)
    x = Activation('relu')(x) 
    x = Flatten()(x)
    out = Dense(OUTPUT_CLASS, activation='softmax')(x)
    model = Model(inputs=input1, outputs=out)
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [6]:
#config = tf.ConfigProto(allow_soft_placement=True)
#config.gpu_options.allow_growth = True
#sess = tf.Session(config=config)
seed = 7
np.random.seed(seed)

# Parameters
FS = 300
WINDOW_SIZE = 30*FS     # padding window for CNN

# Loading data
(X,y) = loaddata(WINDOW_SIZE)

Loading data training set


In [None]:
X.shape

In [7]:
calls = [
                # Early stopping definition
                EarlyStopping(monitor='val_loss', patience=3, verbose=1),
                # Decrease learning rate by 0.1 factor
                ReduceLROnPlateau(monitor='val_loss', patience=1,verbose=1, mode='min', factor=0.1),            
                # Saving best model
                ModelCheckpoint('weights-best_2.hdf5', monitor='val_loss', save_best_only=True, verbose=1),
                ]

In [8]:
batch =64
epochs = 20
Ntrain = X.shape[0] # number of recordings on training set
num_valid = int(Ntrain/5) # number of recordings to take as validation        

In [9]:
# Need to add dimension for training
X = np.expand_dims(X, axis=2)
#classes = ['A', 'N', 'O', '~']
classes = ['N','O']
Nclass = len(classes)

In [10]:
X.shape

(13000, 9000, 1)

In [11]:
X[0].shape

(9000, 1)

In [12]:
# Load model
model = ResNet_model(WINDOW_SIZE)

In [14]:
# split train and validation sets
idxval = np.random.choice(Ntrain, num_valid,replace=False)
idxtrain = np.invert(np.in1d(range(Ntrain),idxval))
ytrain = y[np.asarray(idxtrain),:]
Xtrain = X[np.asarray(idxtrain),:,:]         
Xval = X[np.asarray(idxval),:,:]
yval = y[np.asarray(idxval),:]

In [15]:
Xval.shape

(2600, 9000, 1)

In [16]:
# Train model
model.fit(Xtrain, ytrain,
          validation_data=(Xval, yval),
          epochs=epochs, batch_size=batch,
         callbacks=calls)

Train on 10400 samples, validate on 2600 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 00003: reducing learning rate to 0.00010000000475.
Epoch 00003: val_loss did not improve
Epoch 4/20
Epoch 00004: reducing learning rate to 1.0000000475e-05.
Epoch 00004: val_loss did not improve
Epoch 00004: early stopping


<keras.callbacks.History at 0x7f04ffac8ad0>