In [213]:
import pandas as pd

def load_ecg_data_and_annotations(ecg_file_path, annotation_file_path):
    ecg_data = pd.read_csv(ecg_file_path)
    annotations = pd.read_csv(annotation_file_path)

    return ecg_data, annotations


In [214]:
import pandas as pd
from scipy.signal import butter, filtfilt

def _butter_filter(sequence):
    fs = 360  # Sampling frequency
    nyquist = 0.5 * fs
    low = 0.4 / nyquist
    high = 45 / nyquist

    b, a = butter(N=3, Wn=[low, high], btype='band')
    return filtfilt(b, a, sequence)

def apply_filter(ecg_data):
    filtered_data = ecg_data.copy()
    for lead in ['MLII', 'V1']:
        # Check if the lead is in the DataFrame
        if lead in ecg_data.columns:
            filtered_data[lead] = _butter_filter(ecg_data[lead].values)

    return filtered_data

In [215]:
import numpy as np
from scipy.signal import find_peaks

def detect_r_peaks(ecg_lead, distance=180):
    peaks, _ = find_peaks(ecg_lead, distance=distance)
    return peaks

In [515]:
ecg_data_207, annotations_207 = load_ecg_data_and_annotations('../data/207/207.csv', '../data/207/207annotations.csv')
filtered_ecg_data_207 = apply_filter(ecg_data_207)

In [516]:
window_size = 180 # 180
r_peaks = detect_r_peaks(filtered_ecg_data_207['MLII'].values, distance=window_size)
r_peaks[:10]

array([  50,  296,  555,  836, 1047, 1304, 1588, 1806, 2029, 2343])

In [517]:
segment_data = []

for r_peak in r_peaks:
    start = max(0, r_peak - window_size // 2)
    end = min(len(filtered_ecg_data_207), r_peak + window_size // 2)

    relevant_annotations = annotations_207[(annotations_207['Sample #'] >= start) & (annotations_207['Sample #'] <= end)]
    relevant_annotations = relevant_annotations[relevant_annotations['Type'].isin(['L', 'V', 'A', 'E', '!'])]

    if not relevant_annotations.empty:
        closest_annotation = relevant_annotations.iloc[(relevant_annotations['Sample #'] - r_peak).abs().argsort()[:1]]
        label = closest_annotation['Type'].values[0]
        segment_data.append({'Start': start, 'End': end, 'Label': label})

segments = pd.DataFrame(segment_data)

In [None]:
segment_data = []

for r_peak in r_peaks:
    start = max(0, r_peak - window_size // 2)
    end = min(len(filtered_ecg_data_207), r_peak + window_size // 2)

    # Closes Annotation to R-Peak
    closest_annotation = annotations_207.iloc[(annotations_207['Sample #'] - r_peak).abs().argsort()[:1]]

    if closest_annotation['Type'].values[0] in ['L', 'V', 'A', 'E', '!']:
    
        label = closest_annotation['Type'].values[0]
        segment_data.append({'Start': start, 'End': end, 'Label': label})

segments = pd.DataFrame(segment_data)
segments.tail()



In [518]:
# Remove last Row since window size < 180
segments.drop(segments.tail(1).index,inplace=True)

In [519]:
from tensorflow.keras.utils import to_categorical

segments_feature_1 = []
segments_feature_2 = []
segment_labels = []


for index, row in segments.iterrows():
    start_index = int(row['Start'])
    end_index = int(row['End'])
    label = row['Label']
    
    segment_mlII = filtered_ecg_data_207['MLII'][start_index:end_index+1].values
    segment_v1 = filtered_ecg_data_207['V1'][start_index:end_index+1].values
    
    segments_feature_1.append(segment_mlII)
    segments_feature_2.append(segment_v1)
    segment_labels.append(label)

combined_segments = [np.column_stack((mlII, v1)) for mlII, v1 in zip(segments_feature_1, segments_feature_2)]
combined_segments_array = np.array([np.array(segment) for segment in combined_segments], dtype=object)

label_mapping = {'L': 0, 'V': 1, 'A': 2, 'E': 3, '!': 4}
integer_labels = np.array([label_mapping[label] for label in segment_labels])
one_hot_labels = to_categorical(integer_labels)

print(f"Combined Segments Shape: {combined_segments_array.shape}")
print(f"One-Hot Labels Shape: {one_hot_labels.shape}")


Combined Segments Shape: (719, 181, 2)
One-Hot Labels Shape: (719, 5)


In [520]:
from sklearn.model_selection import train_test_split

# Split data into train/test
train_x, test_x, train_y, test_y = train_test_split(
    combined_segments_array, one_hot_labels, test_size=0.2, random_state=42, stratify=one_hot_labels
)

In [521]:
integer_labels_from_one_hot = np.argmax(one_hot_labels, axis=1)

class_counts = np.bincount(integer_labels_from_one_hot)
class_names = ['L', 'V', 'A', 'E', '!']

for class_name, count in zip(class_names, class_counts):
    print(f"Class {class_name}: {count}")


Class L: 282
Class V: 47
Class A: 75
Class E: 104
Class !: 211


In [522]:
from sklearn.preprocessing import StandardScaler
import numpy as np

# Standardise Train Set
nsamples, ntimesteps, nfeatures = train_x.shape
train_x_reshaped = train_x.reshape((nsamples*ntimesteps, nfeatures))

scaler = StandardScaler()
scaler.fit(train_x_reshaped)

train_x_standardised = scaler.transform(train_x_reshaped)
train_x_standardised = train_x_standardised.reshape((nsamples, ntimesteps, nfeatures))

In [523]:
def get_metrics(results, metrics_names, metric_key):
    for name, value in zip(metrics_names, results):
        if metric_key in name:
            return value
    return None

In [527]:
hyperparameter_space = {
    'dropout_rate': [0.2, 0.3, 0.4, 0.5],
    'lstm_units': [32, 64],
    'batch_size': [16, 32, 64],
    'learning_rate': [0.1, 0.01, 0.001],
    'num_lstm_layers': [2, 3, 4],
    'reg_learning_rate': [0.1, 0.01, 0.001]
}

In [536]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

def build_and_train_model(train_x, train_y, dropout_rate, lstm_units, batch_size, learning_rate, reg_learning_rate, num_lstm_layers, val_x=None, val_y=None):
    model = Sequential()
    model.add(LSTM(lstm_units, return_sequences=True, input_shape=(train_x.shape[1], train_x.shape[2]),
                   kernel_regularizer=l2(reg_learning_rate), 
                   recurrent_regularizer=l2(reg_learning_rate)))
    
    for i in range(1, num_lstm_layers):
        model.add(LSTM(lstm_units, return_sequences=True if i < num_lstm_layers - 1 else False,
                       kernel_regularizer=l2(reg_learning_rate), 
                       recurrent_regularizer=l2(reg_learning_rate)))
        
    model.add(Dropout(dropout_rate))
    model.add(Dense(train_y.shape[1], activation='softmax', kernel_regularizer=l2(reg_learning_rate)))
    
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy', Precision(), Recall()])
    model.fit(train_x, train_y, validation_data=(val_x, val_y) if val_x != None else None, epochs=30, batch_size=batch_size, verbose=0)

    if val_x != None:
        results = model.evaluate(val_x, val_y, verbose=0)
    else:
        results = model.evaluate(train_x, train_y, verbose=0)
    metrics_names = model.metrics_names

    accuracy = results[metrics_names.index('accuracy')]
    precision = get_metrics(results, metrics_names, 'precision')
    recall = get_metrics(results, metrics_names, 'recall')
    
    return model, accuracy, precision, recall


In [534]:
from sklearn.model_selection import StratifiedKFold
import numpy as np

n_splits = 5
kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

y_labels = np.argmax(train_y, axis=1)

n_iterations = 10
best_score = 0
best_params = {}

for iteration in range(n_iterations):
    chosen_params = { param: np.random.choice(values) for param, values in hyperparameter_space.items() }
    print(f"Current Hyperparameters: {chosen_params}")

    accuracies = []
    precisions = []
    recalls = []
    for train_index, val_index in kf.split(train_x_standardised, y_labels):
        train_x_fold, val_x_fold = train_x_standardised[train_index], train_x_standardised[val_index]
        train_y_fold, val_y_fold = train_y[train_index], train_y[val_index]

        model, accuracy, precision, recall = build_and_train_model(train_x_fold, train_y_fold, val_x_fold, val_y_fold, **chosen_params)
        accuracies.append(accuracy)
        precisions.append(precision)
        recalls.append(recall)

        print(f"Fold Scores: Acc - {accuracy} Pr - {precision} Re - {recall}")
    
    avg_accuracy = np.mean(accuracies)
    avg_precision = np.mean(precisions)
    avg_recall = np.mean(recalls)

    print(f"Current Mean Scores: Acc - {avg_accuracy} Pr - {avg_precision} Re - {avg_recall} , Current Hyperparameters: {chosen_params}")

    # Update best params etc.
    if avg_accuracy > best_score:
        best_score = avg_accuracy
        best_params = chosen_params
        best_metrics = {
            'accuracy': avg_accuracy,
            'precision': avg_precision,
            'recall': avg_recall
        }
        print(f"New best score: {avg_accuracy:.4f} with params: {best_params} and metrics: {best_metrics}")

# Final best results
print(f"Best score: {best_score:.4f}")
print(f"Best params: {best_params}")
print(f"Best metrics: {best_metrics}")

# TODO:
# Try out wighted classes
# Try out diff. window sizes


Current Hyperparameters: {'dropout_rate': 0.2, 'lstm_units': 64, 'batch_size': 16, 'learning_rate': 0.001, 'num_lstm_layers': 3, 'reg_learning_rate': 0.01}
Fold Scores: Acc - 0.886956512928009 Pr - 0.8938053250312805 Re - 0.8782608509063721 , Current Hyperparameters: {'dropout_rate': 0.2, 'lstm_units': 64, 'batch_size': 16, 'learning_rate': 0.001, 'num_lstm_layers': 3, 'reg_learning_rate': 0.01}
Fold Scores: Acc - 0.8695651888847351 Pr - 0.8839285969734192 Re - 0.8608695864677429 , Current Hyperparameters: {'dropout_rate': 0.2, 'lstm_units': 64, 'batch_size': 16, 'learning_rate': 0.001, 'num_lstm_layers': 3, 'reg_learning_rate': 0.01}
Fold Scores: Acc - 0.904347836971283 Pr - 0.9272727370262146 Re - 0.886956512928009 , Current Hyperparameters: {'dropout_rate': 0.2, 'lstm_units': 64, 'batch_size': 16, 'learning_rate': 0.001, 'num_lstm_layers': 3, 'reg_learning_rate': 0.01}
Fold Scores: Acc - 0.8782608509063721 Pr - 0.9082568883895874 Re - 0.8608695864677429 , Current Hyperparameters: {'

In [None]:
print(f"Dropout Rate: {best_params['dropout_rate']}")
print(f"LSTM Layers: {best_params['num_lstm_layers']}")
print(f"LSTM Units: {best_params['lstm_units']}")
print(f"Batch Size: {best_params['batch_size']}")
print(f"Learning Rate: {best_params['learning_rate']}")
print(f"Regularizer Learning Rate: {best_params['reg_learning_rate']}")

# Train final model with the best parameters
model, accuracy, precision, recall = build_and_train_model(train_x_standardised, train_y, **best_params)

In [544]:
# Standardise Test Set
test_nsamples, test_ntimesteps, test_nfeatures = test_x.shape
test_x_reshaped = test_x.reshape((test_nsamples * test_ntimesteps, test_nfeatures))
test_x_standardised = scaler.transform(test_x_reshaped)
test_x_standardised = test_x_standardised.reshape((test_nsamples, test_ntimesteps, test_nfeatures))

In [550]:
# Evaluate on Test Set
test_loss, test_accuracy, test_precision, test_recall = model.evaluate(test_x_standardised, test_y, verbose=1)
test_f1_score = 2 * (test_precision * test_recall) / (test_precision + test_recall)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")
print(f"Test Precision: {test_precision}")
print(f"Test Recall: {test_recall}")
print(f"Test F1 Score: {test_f1_score}")

Test Loss: 0.806583821773529
Test Accuracy: 0.8333333134651184
Test Precision: 0.8417266011238098
Test Recall: 0.8125
Test F1 Score: 0.8268551151921768


In [557]:
from sklearn.metrics import classification_report

predictions = model.predict(test_x_standardised)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(test_y, axis=1)
report = classification_report(true_classes, predicted_classes, target_names=['L', 'V', 'A', 'E', '!'])
print(report)

              precision    recall  f1-score   support

           L       0.90      0.91      0.90        57
           V       0.67      0.22      0.33         9
           A       1.00      0.53      0.70        15
           E       0.72      0.86      0.78        21
           !       0.80      0.95      0.87        42

    accuracy                           0.83       144
   macro avg       0.82      0.70      0.72       144
weighted avg       0.84      0.83      0.82       144

