In [23]:
import matplotlib.pyplot as plt
import os
import numpy as np
import shutil
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error

import tensorflow as tf
import keras_tuner as kt
from tensorflow.keras.models import Sequential, load_model # type: ignore
from tensorflow.keras.layers import Conv1D, Input, Dense, Dropout, GlobalMaxPooling1D, LayerNormalization, BatchNormalization, MaxPooling1D # type: ignore
from tensorflow.keras.optimizers import Adam # type: ignore
from tensorflow.keras.losses import BinaryCrossentropy # type: ignore
from tensorflow.keras.metrics import BinaryAccuracy # type: ignore
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau # type: ignore

print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))

FOLDER_FILTER = '../24h-ecg-normalized-filter/'
MODEL = "../model/"
FOLDER_STROKE_MODEL = "stroke-model/"
FOLDER_STROKE_MODEL_TUNING = "stroke_model_tuning/"
LABEL_FILE = "label.txt"
DATASET_STROKE = "../24h-ecg-quality"
DATASET_STROKE_UNQUALITY = "../24h-ecg-unquality"
METADATA = '../subjects.xlsx'

Num GPUs Available:  0


# Predict all rest dataset in FOLDER_FILTER

In [None]:
label_after_qc= os.path.join(FOLDER_FILTER, "label.txt")
loaded_model = load_model(os.path.join(MODEL, "qc_model.keras"))

with open(label_after_qc, 'w') as label_file:
    for file_name in os.listdir(FOLDER_FILTER):
        file_path = os.path.join(FOLDER_FILTER, file_name)
        
        if file_name.endswith('p_signal.txt'):
            with open(file_path, 'r') as signal_file:
                signal = np.loadtxt(signal_file)
                prediction = loaded_model.predict(np.expand_dims(signal, axis=0))
                label_file.write(f"{file_name} {prediction}\n")

print("Predictions complete and written to label.txt.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

# Copy all qualified datasets to QUALITY_DATASET

In [None]:
folders = {
    True: DATASET_STROKE,
    False: DATASET_STROKE_UNQUALITY
}

with open(label_after_qc, 'r') as label_file:
    for line in label_file:
        file_name, label = line.strip().split()
        label = float(label.strip('[]'))
        
        source_file_path = os.path.join(FOLDER_FILTER, file_name)
        destination_folder = folders[label > 0.5]
        destination_file_path = os.path.join(destination_folder, file_name)
        
        shutil.copy2(source_file_path, destination_file_path)

print("Qualified segments have been moved to equivalent folder!")

Qualified segments have been moved to equivalent folder!


# Write label file from subject.xlsx

In [4]:
label_file = os.path.join(DATASET_STROKE, LABEL_FILE)
label_file_unquality = os.path.join(DATASET_STROKE_UNQUALITY, LABEL_FILE)

In [16]:
label_file = os.path.join(DATASET_STROKE, LABEL_FILE)
label_file_unquality = os.path.join(DATASET_STROKE_UNQUALITY, LABEL_FILE)

if os.path.exists(label_file):
    os.remove(label_file)
    
if os.path.exists(label_file_unquality):
    os.remove(label_file_unquality)
    
def write_labels(dataset_path, label_file_path, metadata):
    with open(label_file_path, 'w') as f:
        for file_name in os.listdir(dataset_path):
            if file_name.endswith('_signal.txt'):
                patient_code = file_name.split('-')[0].upper()
                patient_data = metadata[metadata['subject_number'].str.upper() == patient_code]
                if not patient_data.empty:
                    patient_info = patient_data.iloc[0]
                    label_info = [
                        patient_info['GROUP'],
                        # patient_info['GROUP_TYPE'],
                        # patient_info['STROKE YR PATIENT MEDICAL HISTORY'],
                        patient_info['STROKE GROUP'],
                        # patient_info['STROKE SIDE'],
                        # patient_info['STROKE ETIOLOGY']
                    ]
                    f.write(f"{file_name} {' '.join(map(str, label_info))}\n")
                else:
                    print(file_name)

metadata = pd.read_excel(METADATA, sheet_name='subjects')
write_labels(DATASET_STROKE, label_file, metadata)
print("_________________")
write_labels(DATASET_STROKE_UNQUALITY, label_file_unquality, metadata)

print("Label files created successfully.")

s0268-06060109_44_p_signal.txt
s0268-06060109_11_p_signal.txt
s0268-06060109_43_p_signal.txt
s0268-06060109_16_p_signal.txt
s0268-06060109_8_p_signal.txt
s0268-06060109_56_p_signal.txt
s0268-06060109_51_p_signal.txt
s0268-06060109_67_p_signal.txt
s0268-06060109_32_p_signal.txt
s0268-06060109_98_p_signal.txt
s0268-06060109_60_p_signal.txt
s0268-06060109_35_p_signal.txt
s0268-06060109_75_p_signal.txt
s0268-06060109_20_p_signal.txt
s0268-06060109_72_p_signal.txt
s0268-06060109_27_p_signal.txt
s0268-06060109_50_p_signal.txt
s0268-06060109_57_p_signal.txt
s0268-06060109_17_p_signal.txt
s0268-06060109_10_p_signal.txt
s0268-06060109_45_p_signal.txt
s0268-06060109_9_p_signal.txt
s0268-06060109_26_p_signal.txt
s0268-06060109_73_p_signal.txt
s0268-06060109_21_p_signal.txt
s0268-06060109_74_p_signal.txt
s0268-06060109_100_p_signal.txt
s0268-06060109_34_p_signal.txt
s0268-06060109_61_p_signal.txt
s0268-06060109_99_p_signal.txt
s0268-06060109_33_p_signal.txt
s0268-06060109_66_p_signal.txt
s0268-060

# Building Stroke Model

In [17]:
input_dim = (6000, 5)

def load_data(data_folder, label_file):
    labels = {}
    with open(label_file, 'r') as f:
        for line in f:
            parts = line.strip().split()
            filename = parts[0]
            label = list(map(float, parts[1:]))
            labels[filename] = label
    
    signals = []
    signal_labels = []
    for filename in os.listdir(data_folder):
        if filename.endswith('_signal.txt'):
            file_path = os.path.join(data_folder, filename)
            signal = np.loadtxt(file_path).reshape(input_dim)
            if filename in labels:
                signals.append(signal)
                signal_labels.append(labels[filename])
    
    return np.array(signals), np.array(signal_labels)

label_file = os.path.join(DATASET_STROKE, LABEL_FILE)
X, y = load_data(DATASET_STROKE, label_file)

In [18]:
print(X.shape, y.shape)
for i in range(10):
    print(X[0][i], y[i])

(16619, 6000, 5) (16619, 2)
[ 3.69290573e-02  4.11215188e-02 -4.19246147e-03  5.25394991e-05
  3.10499791e-07] [0. 0.]
[ 2.91545190e-02  4.16469138e-02 -1.24923948e-02  5.56444970e-05
  4.74214794e-07] [0. 0.]
[ 2.52672498e-02  4.22344087e-02 -1.69671590e-02  6.20237950e-05
  6.53971971e-07] [0. 0.]
[ 2.42954325e-02  4.28873897e-02 -1.85919572e-02  6.87239364e-05
  6.84156079e-07] [1. 1.]
[ 2.52672498e-02  4.36088875e-02 -1.83416377e-02  7.57069166e-05
  7.10384661e-07] [1. 1.]
[ 2.62390671e-02  4.44015280e-02 -1.81624610e-02  8.29316296e-05
  7.32478487e-07] [1. 1.]
[ 2.72108844e-02  4.52675201e-02 -1.80566357e-02  9.03564863e-05
  7.50439954e-07] [0. 0.]
[ 2.91545190e-02  4.62086578e-02 -1.70541388e-02  9.79404287e-05
  7.64338057e-07] [0. 0.]
[ 3.01263362e-02  4.72263286e-02 -1.70999924e-02  1.05643247e-04
  7.74242038e-07] [1. 1.]
[ 3.10981535e-02  4.83215227e-02 -1.72233692e-02  1.13425269e-04
  7.80141246e-07] [0. 0.]


In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [29]:

checkpoint_path = os.path.join(MODEL + FOLDER_STROKE_MODEL, "stroke_checkpoint.keras")
model_path = os.path.join(MODEL + FOLDER_STROKE_MODEL, "stroke_model.keras")

if os.path.exists(checkpoint_path):
    os.remove(checkpoint_path)

if os.path.exists(model_path):
    os.remove(model_path)
    
epochs = 50
batch_size = 32

def build_model(hp):
    model = Sequential()
    
    for i in range(hp.Int('conv_layers', 1, 3)):
        if i == 0:
            model.add(Conv1D(
                filters=hp.Int(f'filters_{i}', min_value=16, max_value=64, step=4),
                kernel_size=hp.Int(f'kernel_size_{i}', min_value=10, max_value=30, step=1),
                strides=hp.Int(f'strides_{i}', min_value=1, max_value=12, step=1),
                activation='relu',
                input_shape=input_dim
            ))
        else:
            model.add(Conv1D(
                filters=hp.Int(f'filters_{i}', min_value=16, max_value=64, step=4),
                kernel_size=hp.Int(f'kernel_size_{i}', min_value=10, max_value=50, step=4),
                strides=hp.Int(f'strides_{i}', min_value=1, max_value=12, step=1),
                activation='relu'
            ))
        model.add(MaxPooling1D(pool_size=2))
        model.add(Dropout(rate=hp.Float('dropout', 0.2, 0.7, step=0.25)))
    
    model.add(GlobalMaxPooling1D())
    
    model.add(Dense(128, activation='relu', kernel_regularizer='l2'))
    model.add(Dropout(rate=hp.Float('dense_dropout', 0.1, 0.7, step=0.25)))
    
    model.add(Dense(2, activation='sigmoid'))
    
    model.compile(
        optimizer=Adam(learning_rate=hp.Float('learning_rate', 1e-5, 1e-2, sampling='log')),
        loss=BinaryCrossentropy(),
        metrics=[BinaryAccuracy()]
    )
    return model

tuner = kt.Hyperband(
    build_stroke_model,
    objective='binary_accuracy',
    max_epochs=50,
    factor=3,
    directory=MODEL,
    project_name='stroke_model_tuning'
)

model_checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001, verbose=1)
    # verbose=1 -> =====
    # verbose=2 -> Epoch 1/10

callbacks = [model_checkpoint, early_stopping, reduce_lr]

tuner.search(X_train, y_train, epochs=epochs, validation_data=(X_val, y_val), batch_size=batch_size, callbacks=callbacks)

best_model = tuner.get_best_models(num_models=1)[0]

# Evaluate the best model
score = best_model.evaluate(X_test, y_test, verbose=0)
print(f"Test loss: {score[0]}")
print(f"Test accuracy: {score[1]}")

# Save the best model
best_model.save(model_path)

Trial 12 Complete [00h 00m 04s]

Best binary_accuracy So Far: 0.5135388970375061
Total elapsed time: 00h 14m 41s

Search: Running Trial #13

Value             |Best Value So Far |Hyperparameter
24                |42                |num_filters_first_layer
17                |29                |first_kernel_size
8                 |3                 |stride
0.2               |0.45              |first_dropout_rate
54                |92                |num_filters_second_layer
27                |29                |second_kernel_size
0.45              |0.2               |second_dropout_rate
56                |50                |num_filters_third_layer
0.45              |0.2               |dropout_rate_dense
1.4113e-05        |0.00084688        |learning_rate
2                 |2                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
3                 |3                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/2


Traceback (most recent call last):
  File "/Users/macad/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/base_tuner.py", line 274, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/Users/macad/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/base_tuner.py", line 239, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/macad/anaconda3/lib/python3.11/site-packages/keras_tuner/src/tuners/hyperband.py", line 427, in run_trial
    return super().run_trial(trial, *fit_args, **fit_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/macad/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/tuner.py", line 314, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  

RuntimeError: Number of consecutive failures exceeded the limit of 3.
Traceback (most recent call last):
  File "/Users/macad/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/base_tuner.py", line 274, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/Users/macad/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/base_tuner.py", line 239, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/macad/anaconda3/lib/python3.11/site-packages/keras_tuner/src/tuners/hyperband.py", line 427, in run_trial
    return super().run_trial(trial, *fit_args, **fit_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/macad/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/tuner.py", line 314, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/macad/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/tuner.py", line 233, in _build_and_fit_model
    results = self.hypermodel.fit(hp, model, *args, **kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/macad/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/hypermodel.py", line 149, in fit
    return model.fit(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/macad/anaconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/Users/macad/anaconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 124, in error_handler
    del filtered_tb
ValueError: Exception encountered when calling Conv1D.call().

[1mNegative dimension size caused by subtracting 27 from 22 for '{{node sequential_1/conv1d_2_1/convolution}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 8, 1], use_cudnn_on_gpu=true](sequential_1/conv1d_2_1/convolution/ExpandDims, sequential_1/conv1d_2_1/convolution/ExpandDims_1)' with input shapes: [?,1,22,54], [1,27,54,56].[0m

Arguments received by Conv1D.call():
  • inputs=tf.Tensor(shape=(None, 22, 54), dtype=float32)


In [28]:
checkpoint_path = os.path.join(MODEL + FOLDER_STROKE_MODEL, "stroke_checkpoint.keras")
model_path = os.path.join(MODEL + FOLDER_STROKE_MODEL, "stroke_model.keras")

if os.path.exists(checkpoint_path):
    os.remove(checkpoint_path)

if os.path.exists(model_path):
    os.remove(model_path)
    
epochs = 50
batch_size = 32
kernel_size = 20
stride = 2
input_dim = (6000, 5)
num_filters1 = 28
num_filters2 = 64
dropout_rate = 0.5
learning_rate = 0.001

def build_stroke_model():
    model = Sequential()
    model.add(Conv1D(filters=num_filters1, kernel_size=kernel_size, strides=stride, activation='relu', input_shape=input_dim))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(dropout_rate))
    
    model.add(Conv1D(filters=num_filters2, kernel_size=kernel_size, strides=stride, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(dropout_rate))
    
    model.add(Conv1D(filters=num_filters2, kernel_size=kernel_size, strides=stride, activation='relu'))
    model.add(GlobalMaxPooling1D())
    model.add(Dense(128, activation='relu', kernel_regularizer='l2'))
    model.add(Dropout(dropout_rate))
    
    model.add(Dense(2, activation='sigmoid'))
    
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss=BinaryCrossentropy(), metrics=[BinaryAccuracy()])
    return model

model = build_stroke_model()

# Define callbacks
model_checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001, verbose=1)

callbacks = [model_checkpoint, early_stopping, reduce_lr]

# Train the model
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val), callbacks=callbacks)

# Save the trained model
model.save(model_path)

Epoch 1/50
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 224ms/step - binary_accuracy: 0.5111 - loss: 1.0585
Epoch 1: val_loss improved from inf to 0.69482, saving model to ../model/stroke-model/stroke_checkpoint.keras
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 251ms/step - binary_accuracy: 0.5111 - loss: 1.0579 - val_binary_accuracy: 0.5429 - val_loss: 0.6948 - learning_rate: 0.0010
Epoch 2/50
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 252ms/step - binary_accuracy: 0.5179 - loss: 0.6838
Epoch 2: val_loss improved from 0.69482 to 0.68466, saving model to ../model/stroke-model/stroke_checkpoint.keras
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 267ms/step - binary_accuracy: 0.5179 - loss: 0.6837 - val_binary_accuracy: 0.5560 - val_loss: 0.6847 - learning_rate: 0.0010
Epoch 3/50
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 234ms/step - binary_accuracy: 0.6178 - loss: 0.6385
Epoch

KeyboardInterrupt: 