In [1]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
import wfdb
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.metrics import confusion_matrix

def read_records(record_files):
    records = []
    labels = []
    for record_file in record_files:
        record = wfdb.rdrecord(record_file)
        if record.file_name[0].endswith('.dat'):
            # TODO work out how to deal with MIT-BIH with its different hea/atr/dat files
            # and very low samples. Split into many files?
            ann = wfdb.rdann(record_file,'atr')
        else:
            for comment in record.comments:
                if comment.startswith('Dx') or comment.startswith(' Dx'):
                    dxs = set(arr.strip() for arr in comment.split(': ')[1].split(','))
                    labels.append(dxs)
                    
        records.append(wfdb.rdrecord(record_file))
    return records, labels


def create_one_hot_labels(all_labels, target_classes, num_recordings):
    discard_index = list()
    labels = np.zeros((num_recordings, len(target_classes)))#, dtype=np.bool)
    for i in range(num_recordings):
        dxs = all_labels[i]
        flag = np.zeros((1,len(dxs)), dtype = bool)
        count = 0
        for dx in dxs:
            if dx in target_classes:
                j = target_classes.index(dx)
                labels[i, j] = 1
                flag [0 ,count] = True

            count += 1

    def __del__(self):
        self.f.close()

In [83]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import keras_nlp as knlp
from tensorflow.keras.layers import LayerNormalization, Dropout, Dense, Add, Input

def vgg_block(input, cnn_units):
    output = keras.layers.Conv1D(cnn_units, 3, padding='same', activation='relu')(input)
    output = keras.layers.BatchNormalization()(output)
    output = keras.layers.Conv1D(cnn_units, 3, padding='same', activation='relu')(output)
    output = keras.layers.BatchNormalization()(output)
    output = keras.layers.MaxPooling1D(2, padding='same')(output)
    return output

def transformer_encoder_block(inputs, att_dim, att_heads, dropout_rate, ff_dim):
    # Pre-Layer Normalization
    norm_input = LayerNormalization()(inputs)
    # Multi-head attention
    attention_output = knlp.layers.TransformerEncoder(att_dim, att_heads, dropout=dropout_rate)(norm_input)
    attention_output = Dropout(dropout_rate)(attention_output)
    # Add & Norm
    output = Add()([inputs, attention_output])
    output = LayerNormalization()(output)
    # Feed-forward
    ff_output = Dense(ff_dim, activation='relu')(output)
    ff_output = Dense(inputs.shape[-1])(ff_output)
    ff_output = Dropout(dropout_rate)(ff_output)
    # Add & Norm
    output = Add()([output, ff_output])
    return LayerNormalization()(output)


def get_crt_model(n_classes, cnn_units=64, vgg_blocks=2, rnn_units=64, transformer_encoders=2, 
                  att_dim=64, att_heads=8, dropout_rate=0.2, fnn_units=64, ff_dim=256):
    
    input_layer = keras.Input(shape=(4096, 12))
    output = input_layer

    classes3 = list()
    for x in classes2:
        if x not in classes3:
            classes3.append(x)

    output = keras.layers.Bidirectional(keras.layers.GRU(rnn_units, return_sequences=True))(output)

    if transformer_encoders > 0:
        # Dynamic Position Encoding
        dynamic_wavelength = compute_dynamic_wavelength(output.shape[1])  # ensure this function is defined
        positional_encoding = knlp.layers.SinePositionEncoding(max_wavelength=dynamic_wavelength)(output)
        output = Add()([output, positional_encoding])
        output = LayerNormalization()(output)

        for _ in range(transformer_encoders):
            output = transformer_encoder_block(output, att_dim, att_heads, dropout_rate, ff_dim)

        output = keras.layers.GlobalAveragePooling1D()(output)

    output = Dropout(0.2)(output)
    output = keras.layers.Dense(fnn_units, activation='relu')(output)
    output = keras.layers.Dense(fnn_units // 2, activation='relu')(output)
    output = keras.layers.Dense(n_classes, activation='softmax')(output)
    
    return keras.Model(inputs=input_layer, outputs=output)

In [84]:
val_split = 0.02
lr = 0.0001
batch_size = 32

def train():
    opt = tf.keras.optimizers.Adam(lr, clipnorm=1.0)
    callbacks = [tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                    factor=0.1,
                                    patience=7,
                                    min_lr=lr / 100),
                    tf.keras.callbacks.EarlyStopping(patience=9,  # Patience should be larger than the one in ReduceLROnPlateau
                                min_delta=0.00001)]

filepath = os.path.join('output', "model.h5")

    # If you are continuing an interrupted section, uncomment line bellow:
    #model = keras.models.load_model(PATH_TO_PREV_MODEL, compile=False)
    model = get_crt_model(train_seq.n_classes)
    model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt)
    # Create log
    callbacks += [tf.keras.callbacks.TensorBoard(log_dir='./logs', write_graph=False),
                    tf.keras.callbacks.CSVLogger('training.log', append=False)]  # Change append to true if continuing training
    # Save the BEST and LAST model
    #    callbacks += [tf.keras.callbacks.ModelCheckpoint('./backup_model_last.hdf5', overwrite=True),
    #                  tf.keras.callbacks.ModelCheckpoint('./backup_model_best.hdf5', save_best_only=True, overwrite=True)]
    # Train neural network
    history = model.fit(train_seq, 
                        epochs=70,
                        initial_epoch=0,  # If you are continuing a interrupted section change here
                        #callbacks=callbacks,
                        validation_data=valid_seq,
                        verbose=1)
    # Save final result
    model.save(path_to_model)

In [85]:
train()

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70




INFO:tensorflow:Assets written to: final_model.hf5\assets


INFO:tensorflow:Assets written to: final_model.hf5\assets


In [86]:
import numpy as np
import warnings
import argparse
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam

output_file = "out"

def evaluate():
    seq = ECGSequence(path_to_hdf5, dataset_name, batch_size=batch_size)
    # Import model
    model = load_model(path_to_model, compile=False)
    model.compile(loss='binary_crossentropy', optimizer=Adam())
    y_score = model.predict(seq,  verbose=1)

    # Generate dataframe
    np.save("predict_outputs", y_score)

    print("Output predictions saved")

In [87]:
evaluate()

Output predictions saved


In [88]:
data = np.load('predict_outputs.npy')
print(data)

[[0.09314 0.09314 0.2532  0.2532  0.1536  0.1536 ]
 [0.09314 0.09314 0.2532  0.2532  0.1536  0.1536 ]
 [0.09314 0.09314 0.2532  0.2532  0.1536  0.1536 ]
 ...
 [0.09314 0.09314 0.2532  0.2532  0.1536  0.1536 ]
 [0.09314 0.09314 0.2532  0.2532  0.1536  0.1536 ]
 [0.09314 0.09314 0.2532  0.2532  0.1536  0.1536 ]]
