In [1]:
import wfdb
import glob
import os
import random
import matplotlib.pyplot as plt
import heartpy
import scipy.signal
import numpy as np
import itertools
import sklearn.model_selection
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv1D, MaxPooling1D, Flatten

Using TensorFlow backend.


In [9]:
def create_segmented_signals(signal, annmap, sample_rate, sec):
    seg_len = sec*sample_rate
    segments = []
    
    curr_ini = curr_fin = 0
    
    for i, sample in enumerate(annmap):
        if sample['ann'] == 'N':
            if curr_ini == 0:
                if i+1 < len(annmap)-1 and annmap[i+1]['ann'] == 'N':
                    curr_ini = random.randint(sample['time'], annmap[i+1]['time'])
                else:
                    continue
            curr_fin = sample['time']
            
            if curr_fin - curr_ini > seg_len and curr_ini + seg_len <= signal.shape[0]:
                segments.append(
                    {
                        'data': signal[curr_ini:curr_ini+seg_len,:],
                        'ann': 'N'
                    }
                )
                curr_ini = curr_fin
        else:
            curr_ini = curr_fin = 0
            if sample['time'] > 2*seg_len and sample['time'] < signal.shape[0] - 2*seg_len:
                rand_start = sample['time'] - random.randint(seg_len//3, 2*seg_len//3)
                segments.append(
                    {
                        'data': signal[rand_start:rand_start+seg_len,:],
                        'ann': sample['ann'],
                        'time': sample['time']
                    }
                )
    
    return segments

In [2]:
filelist = [filename.split('.')[0] for filename in glob.glob('files/*.dat')]
notes = ['A','F','Q','n','R','B','S','j','+','V']

Creating the segments variable, a list of dictionaries containing the fields 'data', 'ann', and 'time'

In [29]:
train_test_ratio = 0.3
threshold = 100

test_threshold = int(threshold*train_test_ratio)
train_threshold = threshold - test_threshold

# filter definition
sample_rate = 257
n_samp = 101
filt = scipy.signal.firwin(n_samp, cutoff=5, fs=sample_rate, pass_zero='highpass')
padding = (n_samp//2)

# populating the segments list
for note in notes:
    patient_sane_train = []
    patient_sane_test = []
    patient_ill_train = []
    patient_ill_test = []

    for file in filelist:
        segments = []
        record = wfdb.rdrecord(file)
        annotations = wfdb.rdann(file, 'atr')
        annmap = [{'time':samp, 'ann':symb} for samp, symb in zip(annotations.sample, annotations.symbol) if symb == note or symb == 'N']

        # signal transformation pipeline
        signal = record.p_signal
        for i in range(signal.shape[-1]):
            signal[:,i] = np.convolve(signal[:,i], filt)[padding:-padding]

        segments += create_segmented_signals(signal, annmap, sample_rate, 2)
        del signal

        sane_segments = [s['data'] for s in segments if s['ann'] == 'N']
        ill_segments = [s['data'] for s in segments if s['ann'] != 'N']
        del segments
        
        if len(sane_segments) == 0 or len(ill_segments) == 0:
            continue

        try:
            sane_train, sane_test = sklearn.model_selection.train_test_split(sane_segments, test_size=train_test_ratio)
            ill_train, ill_test = sklearn.model_selection.train_test_split(ill_segments, test_size=train_test_ratio)
        except:
            continue
            
        if len(sane_train) == 0 or len(sane_test) == 0 or len(ill_train) == 0 or len(ill_test) == 0:
            continue

        while len(sane_train) < train_threshold:
            sane_train += sane_train
        while len(sane_test) < test_threshold:
            sane_test += sane_test
        while len(ill_train) < train_threshold:
            ill_train += ill_train
        while len(ill_test) < test_threshold:
            ill_test += ill_test
        
        patient_sane_train += sane_train[:train_threshold]
        patient_sane_test += sane_test[:test_threshold]
        patient_ill_train += ill_train[:train_threshold]
        patient_ill_test += ill_test[:test_threshold]
    
    trainX = np.array(patient_sane_train + patient_ill_train)
    trainY = [[1,0]]*len(patient_sane_train) + [[0,1]]*len(patient_ill_train)
    testX = patient_sane_test + patient_ill_test
    testY = [[1,0]]*len(patient_sane_test) + [[0,1]]*len(patient_ill_test)
    
    with open('mals/mal_'+note, 'wb') as file:
        np.savez(file,
                 trainX=np.array(trainX, dtype=np.float32),
                 trainY=np.array(trainY, dtype=np.uint8),
                 testX=np.array(testX, dtype=np.float32),
                 testY=np.array(testY, dtype=np.uint8)
                )
        

In [3]:
for note in notes:
    model = Sequential([
        Conv1D(32, kernel_size=5, input_shape=(514, 12)),
        MaxPooling1D(),
        Activation('relu'),
        Conv1D(64, kernel_size=5),
        MaxPooling1D(),
        Activation('relu'),
        Conv1D(128, kernel_size=5),
        MaxPooling1D(),
        Activation('relu'),
        Flatten(),
        Dense(20),
        Activation('relu'),
        Dense(2),
        Activation('softmax')
    ])
    model.compile(optimizer='rmsprop',
             loss='categorical_crossentropy',
             metrics=['accuracy'])
    
    data = np.load(os.path.join('mals', 'mal_'+note))
    
    try:
        model.fit(data['trainX'],
              data['trainY'],
              epochs=10,
              batch_size=32,
              validation_data=(data['testX'], data['testY']))

        model.save(os.path.join('models', 'model_'+note+'.h5'))
    except:
        print('ERROR: could not train on '+note)
        continue

Train on 3500 samples, validate on 1500 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 2240 samples, validate on 960 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 140 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 280 samples, validate on 120 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 140 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
ERROR: could not train on B
Train on 560 samples, validate on 240 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 280 samples, validat

In [None]:
def load_file(filename, path):
    return np.load(os.path.join(path, filename))

def 