In [1]:
import argparse
import os

os.environ['CUDA_VISIBLE_DEVICES'] = ''
# os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

import warnings

import biosppy as bp
import numpy as np
import pandas as pd
import scipy.io
import tensorflow as tf

from sklearn import metrics, preprocessing
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import (LSTM, Activation, BatchNormalization,
                                     Conv1D, Dense, Dropout, Embedding,
                                     Flatten, GlobalAveragePooling1D,
                                     MaxPooling1D)
from tensorflow.keras.models import Sequential, model_from_yaml
from tqdm.auto import tqdm
import eco2ai

warnings.filterwarnings("ignore")

from tensorflow.keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


def preprocess(fp):
    name = fp # './' + sys.argv[1] + '.mat'
    mat = scipy.io.loadmat(name)

    mdata = mat['val']

    #print(mdata.shape)
    nd = np.asarray([mdata]).flatten()
    out = bp.signals.ecg.ecg(signal=nd.astype(float), sampling_rate=300., show=False)

    ot = np.asarray(out[1])



    length = ot.shape[0]
    #print("length of filtered signal is", length)
    maxLen = 18286

    if (length < maxLen):
        diff = maxLen - length
        ap = np.concatenate([ot, np.zeros(diff)])
    else:
        ap = ot[0 : maxLen]

    # print(ap.shape[0])
    cPD = pd.DataFrame(ap)

    la = cPD.diff()
    la = la.transpose()
    #print (la.shape)
    X = la.values.astype(np.float32)

    ## Set NaNs to 10e-6
    X[np.isnan(X)] = 0

    X_train = preprocessing.scale(X, axis=1)

    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)

    return X_train

class DataGenerator(tf.keras.utils.Sequence):
  def __init__(self, x_data, y_data, batch_size):
    self.x, self.y = x_data, y_data
    self.batch_size = batch_size
    self.num_batches = np.ceil(len(x_data) / batch_size)
    self.batch_idx = np.array_split(range(len(x_data)), self.num_batches)

  def __len__(self):
    return len(self.batch_idx)

  def __getitem__(self, idx):
    batch_x = self.x[self.batch_idx[idx]]
    batch_y = self.y[self.batch_idx[idx]]
    return batch_x, batch_y

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class args:
    max_epochs=100
    batch_size=32
    log_dir='./logs'
    debug=0

In [3]:
import time

In [4]:
# Processing time
start_processing = time.time()

train_df = pd.read_csv('/home/huypham/Projects/ecg/dataset/cinc2017/processed/y_train.csv')
val_df = pd.read_csv('/home/huypham/Projects/ecg/dataset/cinc2017/processed/y_val.csv')
test_df = pd.read_csv('/home/huypham/Projects/ecg/dataset/cinc2017/processed/y_test.csv')

if args.debug:
    args.max_epochs = 2
    train_df = train_df.sample(args.debug)
    val_df = val_df.sample(args.debug)
    test_df = test_df.sample(args.debug)


train_file = train_df.idx.apply(lambda x: os.path.join('/home/huypham/Projects/ecg/dataset/cinc2017/raw/training', x) + '.mat')
val_file = val_df.idx.apply(lambda x: os.path.join('/home/huypham/Projects/ecg/dataset/cinc2017/raw/training', x) + '.mat')
test_file = test_df.idx.apply(lambda x: os.path.join('/home/huypham/Projects/ecg/dataset/cinc2017/raw/training', x) + '.mat')


train_features = [preprocess(fp) for fp in tqdm(train_file, desc='train')]
val_features = [preprocess(fp) for fp in tqdm(val_file, desc='val')]
test_features = [preprocess(fp) for fp in tqdm(test_file, desc='test')]

X_train = np.concatenate(train_features, axis=0)
X_val = np.concatenate(val_features, axis=0)
X_test = np.concatenate(test_features, axis=0)

y_train = train_df.drop(columns='idx').to_numpy()
y_val = val_df.drop(columns='idx').to_numpy()
y_test = test_df.drop(columns='idx').to_numpy()

train_generator = DataGenerator(X_train, y_train, batch_size=args.batch_size)
val_generator = DataGenerator(X_val, y_val, batch_size=args.batch_size)
test_generator = DataGenerator(X_test, y_test, batch_size=args.batch_size)

end_processing = time.time()

train: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 5116/5116 [01:13<00:00, 69.62it/s]
val: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 1706/1706 [00:24<00:00, 69.42it/s]
test: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 1706/1706 [00:24<00:00, 69.74it/s]


In [5]:
(len(train_file), len(val_file), len(test_file))

(5116, 1706, 1706)

In [6]:
start_infer = time.time()

yaml_file = open("/home/huypham/Projects/ecg/tmp/cinc2017_ruhi/model_v6v9_layer_cnn.yaml", "r")
loaded_model_yaml = yaml_file.read()
yaml_file.close()
model = model_from_yaml(loaded_model_yaml)

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy', 'AUC', f1]
)
# print(model.summary())


early_stopping = EarlyStopping(monitor='val_f1', patience=5, verbose=1, mode='max', restore_best_weights=True)
best_checkpoint = ModelCheckpoint(os.path.join(args.log_dir, 'model.hdf5'), save_best_only=True, monitor='val_f1', mode='max')
tsb = tf.keras.callbacks.TensorBoard(log_dir=args.log_dir)

# model.fit_generator(
#     train_generator,
#     epochs=args.max_epochs,
#     validation_data=val_generator,
#     callbacks=[early_stopping, best_checkpoint, tsb]
# )

# import ipdb; ipdb.set_trace()

# Load the best
model.load_weights('/home/huypham/Projects/ecg/tmp/cinc2017_ruhi/logs/model.hdf5')
probs = model.predict(test_generator)
labels = np.where(probs > 0.5, 1, 0)
# test_f1 = metrics.f1_score(y_test, labels, average='micro')
# np.save(os.path.join(args.log_dir, 'test_probs'), probs)

test_loss, test_accuracy, test_auc, test_f1 = model.evaluate(test_generator)
print('Test:')
print('\tLoss:', test_loss)
print('\tAccuracy:', test_accuracy)
print('\tAUC:', test_auc)
print('\tF1:', test_f1)

end_infer = time.time()

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


2023-06-17 22:09:46.575419: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2023-06-17 22:09:46.579710: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-06-17 22:09:46.579726: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: huypc
2023-06-17 22:09:46.579729: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: huypc
2023-06-17 22:09:46.579780: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 530.41.3
2023-06-17 22:09:46.579793: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 530.41.3
2023-06-17 22:09:46.579797: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 530.41.3
2023-06-17 22:09:46.579944: I tensorflow/core/platform/cpu_feature_guard

Test:
	Loss: 0.42606900190865554
	Accuracy: 0.8575615
	AUC: 0.9671117
	F1: 0.84881467


In [7]:
processing_time = (end_processing - start_processing) / (len(train_file) + len(val_file) + len(test_file))
print('Processing Time:', processing_time)

Processing Time: 0.014379045305735175


In [8]:
infer_time = (end_infer - start_infer) / (len(test_file))
print('Infer Time:', infer_time)

Infer Time: 0.06598330158980482


In [15]:
import joblib
joblib.dump([y_test, labels], '/home/huypham/Projects/ecg/tmp/cinc2017_ruhi/y_test_label.joblib')

['/home/huypham/Projects/ecg/tmp/cinc2017_ruhi/y_test_label.joblib']

In [16]:
from imblearn import metrics as imetrics
def calculate_sensitivity_specificity(y_true, y_pred):
    supports = []
    sensitivity = []
    specificity = []
    
    for i, label in enumerate(range(y_true.shape[1])):
        true = [lbl[i] for lbl in y_true]
        pred = [lbl[i] for lbl in y_pred]
        supports.append(sum(true))
        sensitivity.append(imetrics.sensitivity_score(true, pred))
        specificity.append(imetrics.specificity_score(true, pred))

    sens = np.average(sensitivity, weights=supports)
    spec = np.average(specificity, weights=supports)
    print('Sensitivity:', sens)
    print('Specificity:', spec)
    return sens, spec

calculate_sensitivity_specificity(y_test, labels)

Sensitivity: 0.8264947245017585
Specificity: 0.8567247228104179


(0.8264947245017585, 0.8567247228104179)

In [17]:
0.01437+0.06598

0.08035