In [10]:
import h5py
import math
import pandas as pd
from tensorflow.keras.utils import Sequence
import numpy as np

class ECGSequence(Sequence):
    @classmethod
    def get_train_and_val(cls, path_to_hdf5, hdf5_dset, path_to_csv, batch_size=8, val_split=0.02):
        n_samples = len(pd.read_csv(path_to_csv))
        n_train = math.ceil(n_samples*(1-val_split))
        train_seq = cls(path_to_hdf5, hdf5_dset, path_to_csv, batch_size, end_idx=n_train)
        valid_seq = cls(path_to_hdf5, hdf5_dset, path_to_csv, batch_size, start_idx=n_train)
        return train_seq, valid_seq

    def __init__(self, path_to_hdf5, hdf5_dset, path_to_csv=None, batch_size=8,
                 start_idx=0, end_idx=None):
        if path_to_csv is None:
            self.y = None
        else:
            self.y = pd.read_csv(path_to_csv).values
        # Get tracings
        self.f = h5py.File(path_to_hdf5, "r")
        self.x = self.f[hdf5_dset]
        self.batch_size = batch_size
        if end_idx is None:
            end_idx = len(self.x)
        self.start_idx = start_idx
        self.end_idx = end_idx

    @property
    def n_classes(self):
        return self.y.shape[1]

    def __getitem__(self, idx):
        start = self.start_idx + idx * self.batch_size
        end = min(start + self.batch_size, self.end_idx)
        if self.y is None:
            return np.array(self.x[start:end, :, :])
        else:
            return np.array(self.x[start:end, :, :]), np.array(self.y[start:end])

    def __len__(self):
        return math.ceil((self.end_idx - self.start_idx) / self.batch_size)

    def __del__(self):
        self.f.close()

In [11]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
import keras_nlp as knlp

path_to_hdf5 = "data/ecg_tracings.hdf5"
dataset_name = "tracings"
path_to_csv = "data/annotations/gold_standard.csv"
path_to_model = "final_model.hf5"

def vgg_block(input, cnn_units):
    output = input
    output = keras.layers.Conv1D(cnn_units, 3, padding='same', activation='relu')(output)
    output = keras.layers.BatchNormalization()(output)
    output = keras.layers.Conv1D(cnn_units, 3, padding='same', activation='relu')(output)
    output = keras.layers.BatchNormalization()(output)
    output = keras.layers.MaxPooling1D(2, padding='same')(output)
    return output

def get_crt_model(n_classes, 
                  cnn_units=64, 
                  vgg_blocks=2, 
                  rnn_units=64, 
                  transformer_encoders=2, 
                  att_dim=64, 
                  att_heads=8, fnn_units=64):
    input = keras.Input(shape=(4096, 12))
    output = input

    for _ in range(vgg_blocks):
        output = vgg_block(output, cnn_units)

    output = keras.layers.Bidirectional(keras.layers.GRU(rnn_units, return_sequences=True), merge_mode='sum')(output)

    if transformer_encoders > 0:
        output = output + knlp.layers.SinePositionEncoding(max_wavelength=10000)(output)

        for _ in range(transformer_encoders):
            output = knlp.layers.TransformerEncoder(att_dim, att_heads)(output)

        output = keras.layers.GlobalAveragePooling1D()(output)
        
    output = keras.layers.Dropout(0.2)(output)
    output = keras.layers.Dense(fnn_units, activation='relu')(output)
    output = keras.layers.Dense(fnn_units, activation='relu')(output)

    output = keras.layers.Dense(n_classes, activation='softmax')(output)
    return keras.Model(inputs = input, outputs = output)


In [12]:
val_split = 0.02
lr = 0.001
batch_size = 64

def train():
    opt = tf.keras.optimizers.Adam(lr)
    callbacks = [tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                    factor=0.1,
                                    patience=7,
                                    min_lr=lr / 100),
                    tf.keras.callbacks.EarlyStopping(patience=9,  # Patience should be larger than the one in ReduceLROnPlateau
                                min_delta=0.00001)]

    train_seq, valid_seq = ECGSequence.get_train_and_val(
        path_to_hdf5, dataset_name, path_to_csv, batch_size, val_split)

    # If you are continuing an interrupted section, uncomment line bellow:
    #   model = keras.models.load_model(PATH_TO_PREV_MODEL, compile=False)
    model = get_crt_model(train_seq.n_classes)
    model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt)
    # Create log
    callbacks += [tf.keras.callbacks.TensorBoard(log_dir='./logs', write_graph=False),
                    tf.keras.callbacks.CSVLogger('training.log', append=False)]  # Change append to true if continuing training
    # Save the BEST and LAST model
    #    callbacks += [tf.keras.callbacks.ModelCheckpoint('./backup_model_last.hdf5', overwrite=True),
    #                  tf.keras.callbacks.ModelCheckpoint('./backup_model_best.hdf5', save_best_only=True, overwrite=True)]
    # Train neural network
    history = model.fit(train_seq, 
                        epochs=70,
                        initial_epoch=0,  # If you are continuing a interrupted section change here
                        callbacks=callbacks,
                        validation_data=valid_seq,
                        verbose=1)
    # Save final result
    model.save(path_to_model)

Epoch 1/70
Epoch 2/70
 1/13 [=>............................] - ETA: 37s - loss: 0.1959

KeyboardInterrupt: 

In [None]:
train()

In [13]:
import numpy as np
import warnings
import argparse
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam

output_file = "out"

def evaluate():
    seq = ECGSequence(path_to_hdf5, dataset_name, batch_size=batch_size)
    # Import model
    model = load_model(path_to_model, compile=False)
    model.compile(loss='binary_crossentropy', optimizer=Adam())
    y_score = model.predict(seq,  verbose=1)

    # Generate dataframe
    np.save("predict_outputs", y_score)

    print("Output predictions saved")

In [None]:
evaluate()