In [None]:
# Environment Setup
#%pip install importlib_metadata --force-reinstall
#%pip install keras
#%env SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True
#%pip install wfdb
#%pip install tqdm
# %pip install keras-tuner

In [None]:
# Required Libraries
import glob
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
import keras_tuner as kt
from sklearn.model_selection import train_test_split
import wfdb
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
# Check GPU Availability
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
!nvidia-smi

# Data Collection

Description of Datasets Used:
 - CHF-RR Dataset: Congestive heart failure RR interval database. It comprised patients with heart failure. This dataset contained records of 29 patients from CHF201 to CHF229.
 - NSR-RR Dataset: Normal sinus rhythm RR interval database. This dataset had 54 normal sinus rhythm recordings ranging in age from 28 to 76. Records from NSR001 through NSR054 of 54 patients were available.
 
 
Both datasets were provided by PhysioBank.

In [None]:
# Data Collection

# Description of Datasets Used:
# CHF-RR Dataset: Congestive heart failure RR interval database. It comprised patients with heart failure. 
# This dataset contained records of 29 patients from CHF201 to CHF229.
# NSR-RR Dataset: Normal sinus rhythm RR interval database. This dataset had 54 normal sinus rhythm recordings 
# ranging in age from 28 to 76. Records from NSR001 through NSR054 of 54 patients were available.
# Both datasets were provided by PhysioBank.

# Downloading Data

# Function to download a database from PhysioBank
def download(database):
    """
    Downloads the specified database from PhysioBank and saves it in the current working directory.

    Parameters:
    database (str): Name of the database to download.
    """
    cwd = os.getcwd()
    dl_dir = os.path.join(cwd, database)
    wfdb.dl_database(database, dl_dir=dl_dir)
    print(f"Downloaded {database} to {dl_dir}")
    display(os.listdir(dl_dir))

# Display available databases from PhysioBank
dbs = wfdb.get_dbs()
print("Available Databases from PhysioBank:")
display(dbs)

# Uncomment the following lines to download specific datasets
download('nsrdb')  # Download Normal Sinus Rhythm RR Interval Database
download('chfdb')  # Download Congestive Heart Failure RR Interval Database


# Converting Data

In [None]:
# Converting Data

# Function to convert a wfdb record to a DataFrame
def wfdb_to_dataframe(record):
    """
    Converts a wfdb record to a DataFrame.

    Parameters:
    record (wfdb.Record): The wfdb record to convert.

    Returns:
    pd.DataFrame: DataFrame containing the signal data.
    """
    df = pd.DataFrame(record.p_signal, columns=record.sig_name)
    return df

# Function to load a wfdb record and convert it to a DataFrame
def load_record(uri):
    """
    Loads a wfdb record and converts it to a DataFrame.

    Parameters:
    uri (str): URI of the wfdb record to load.

    Returns:
    pd.DataFrame: DataFrame containing the signal data.
    """
    record = wfdb.rdrecord(uri)
    return wfdb_to_dataframe(record)

# Example usage of load_record to load and plot a record
a = load_record('nsrdb/16265')
plt.plot(a['ECG1'][:212])
plt.show()

# Example usage of load_record to load and plot a different record
b = load_record('chfdb/chf03')
plt.plot(b['ECG1'][:212])
plt.show()

# Example usage of load_record to load and plot a different record
b = load_record('chfdb/chf03')
plt.plot(b['ECG2'][:212])
plt.show()

# List of all files in the 'nsrdb' directory with the '.hea' extension
nsrdb_list = glob.glob("nsrdb/*.hea")
print("Number of NSRDB files:", len(nsrdb_list))

# List of all files in the 'chfdb' directory with the '.hea' extension
chfdb_list = glob.glob("chfdb/*.hea")
print("Number of CHFDB files:", len(chfdb_list))


# Data Creation and Processing

In [None]:
# Data Creation and Processing

# Function to create a database from a list of files
def create_db(file_list, record_class, clip_duration=60, sampling_rate=212):
    """
    Create a database from a list of files.

    Parameters:
    file_list (list): List of file paths for the records.
    record_class (int): Class label for the records.
    clip_duration (int): Duration of each clip in seconds.
    sampling_rate (int): Sampling rate of the records.

    Returns:
    pd.DataFrame: DataFrame containing the database.
    """
    clip_len = sampling_rate * clip_duration
    num_channels = len(load_record(file_list[0].removesuffix('.hea')).columns)
    total_clips = sum(len(load_record(file_path.removesuffix('.hea'))) // clip_len for file_path in file_list)

    # Preallocate a NumPy array for all clips
    all_clips = np.empty((total_clips, clip_len * num_channels + 1))  # +1 for the class label

    clip_index = 0
    for file_path in file_list:
        record_df = load_record(file_path.removesuffix('.hea'))
        num_clips = len(record_df) // clip_len

        for i in range(num_clips):
            clip = record_df.iloc[i * clip_len: (i + 1) * clip_len].values.flatten()
            all_clips[clip_index, :-1] = clip  # Fill all but the last column with clip data
            all_clips[clip_index, -1] = record_class  # Last column is the class label
            clip_index += 1

    # Convert the NumPy array directly to a DataFrame
    column_names = [f't{i}' for i in range(clip_len * num_channels)] + ['class']
    df_clips = pd.DataFrame(all_clips, columns=column_names)

    return df_clips

# Create a database from 'nsrdb' records with class label 0
nsrdb_list = glob.glob("nsrdb/*.hea")
nsrdb_df = create_db(nsrdb_list, 0)
print("Shape of NSRDB DataFrame:", nsrdb_df.shape)

# Create a database from 'chfdb' records with class label 1
chfdb_list = glob.glob("chfdb/*.hea")
chfdb_df = create_db(chfdb_list, 1)
print("Shape of CHFDB DataFrame:", chfdb_df.shape)

# Concatenate the dataframes to create the full dataset
full_df = pd.concat([nsrdb_df, chfdb_df], axis=0, ignore_index=True, sort=False)
print("Shape of Full DataFrame:", full_df.shape)

# Split the dataset into features (X) and labels (y)
y_df = full_df.pop("class")
x_df = full_df

In [None]:
full_df.head()

# Data Splitting

In [None]:
# Data Splitting

# Split the full dataset into training, validation, and testing sets
X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.30)
X_train, X_validate, y_train, y_validate = train_test_split(X_train, y_train, test_size=0.30)

print("Training Data:", X_train.shape)
print("Validation Data:", X_validate.shape)
print("Testing Data:", X_test.shape)

# Model Definition

In [None]:
from tensorflow.keras.callbacks import TensorBoard
import datetime

input_shape = (X_train.shape[1], 1)

# Create a TensorBoard callback
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

model = tf.keras.Sequential([
    tf.keras.Input(shape=input_shape),
    tf.keras.layers.Conv1D(32, 3, activation='relu', name='conv1d_2'),
    tf.keras.layers.BatchNormalization(name='batch_normalization_2'),
    tf.keras.layers.MaxPooling1D(2, name='max_pooling1d_2'),
    tf.keras.layers.Conv1D(64, 5, activation='relu', name='conv1d_3'),
    tf.keras.layers.BatchNormalization(name='batch_normalization_3'),
    tf.keras.layers.MaxPooling1D(2, name='max_pooling1d_3'), 
    tf.keras.layers.Conv1D(32, 5, activation='relu', name='conv1d_4'),
    tf.keras.layers.BatchNormalization(name='batch_normalization_4'),
    tf.keras.layers.MaxPooling1D(2, name='max_pooling1d_4'), 
    tf.keras.layers.Flatten(name='flatten_1'),
    tf.keras.layers.Dense(32, activation='relu', name='dense_3'),
    tf.keras.layers.Dropout(0.5, name='dropout_2'),
    tf.keras.layers.Dense(1, activation='sigmoid', name='dense_5')
])

model.summary()


# Model Training


In [None]:
from keras.optimizers import Adam

# Set the learning rate
adam = Adam(learning_rate=0.00005)

# Compile the model with the modified optimizer
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])

# Train the model on the training data while validating on the dedicated validation set
history = model.fit(
    X_train, 
    y_train, 
    epochs=5, 
    validation_data=(X_validate, y_validate),
    callbacks=[tensorboard_callback]
)

# Model Evaluation
score = model.evaluate(X_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

# Model Evaluation & Results

In [None]:
from sklearn.metrics import classification_report, roc_auc_score, roc_curve
from sklearn.metrics import precision_recall_fscore_support


# Generate Predictions
y_pred_prob = model.predict(X_test)

# If y_pred_prob is 3D, reduce it to 1D
if y_pred_prob.ndim == 3:
    y_pred_prob = y_pred_prob[:, -1, 0]

# Convert probabilities to class labels
y_pred = (y_pred_prob > 0.5).astype(int)

# Generate and Plot Confusion Matrix
cm = confusion_matrix(y_test, y_pred)

# Calculate additional performance metrics
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred, average='binary')
roc_auc = roc_auc_score(y_test, y_pred_prob)

# Plotting the metrics
plt.figure(figsize=(12, 4))

# Plot Confusion Matrix
plt.subplot(1, 3, 1)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')

# Plot ROC Curve
plt.subplot(1, 3, 2)
fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')

# Plot Training and Validation Metrics
plt.subplot(1, 3, 3)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.tight_layout()
plt.show()

# Print classification report with five decimal points
report = classification_report(y_test, y_pred, target_names=['NSR', 'CHF'], digits=5)
print(report)
print()

# Calculate ROC AUC with five decimal points
roc_auc = roc_auc_score(y_test, y_pred_prob)
print(f'ROC AUC: {roc_auc:.5f}')
print()

# Print F1 score, Precision, and Recall with five decimal points
print(f'Precision: {precision:.5f}')
print(f'Recall: {recall:.5f}')
print(f'F1 Score: {f1_score:.5f}')
