In [1]:
import wfdb
import numpy as np
import pywt
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv1D, LSTM, GRU, Dense, Input, Add, Flatten, Reshape, UpSampling1D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Define the new models
def build_1d_cnn(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = Conv1D(64, 15, padding='same', activation='relu')(inp)
    x = Conv1D(128, 15, padding='same', activation='relu')(x)
    x = Conv1D(256, 15, padding='same', activation='relu')(x)
    x = Conv1D(1, 15, padding='same', activation='tanh')(x)
    return Model(inp, x)

def build_lstm(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = LSTM(128, return_sequences=True)(inp)
    x = LSTM(64, return_sequences=True)(x)
    x = Dense(1, activation='tanh')(x)
    return Model(inp, x)

def build_gru(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = GRU(128, return_sequences=True)(inp)
    x = GRU(64, return_sequences=True)(x)
    x = Dense(1, activation='tanh')(x)
    return Model(inp, x)

def build_denoising_autoencoder(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    
    # Encoder
    encoded = Conv1D(128, 15, padding='same', activation='relu')(inp)
    encoded = Conv1D(64, 15, padding='same', activation='relu')(encoded)
    
    # Decoder with upsampling
    decoded = UpSampling1D(size=2)(encoded)
    decoded = Conv1D(64, 15, padding='same', activation='relu')(decoded)
    decoded = UpSampling1D(size=2)(decoded)
    decoded = Conv1D(1, 15, padding='same', activation='tanh')(decoded)
    
    return Model(inp, decoded)

# Ensemble method: averaging predictions from all models
def ensemble_predict(models, noisy_signal):
    predictions = [model.predict(noisy_signal) for model in models]
    ensemble_prediction = np.mean(predictions, axis=0)
    return ensemble_prediction.squeeze()

# Load ECG and noise data
def load_ecg_data_with_labels(record_numbers, segment_length=512):
    ecg_segments = []
    labels = []
    for rec_num in record_numbers:
        record = wfdb.rdrecord(f'M:\\Dissertation\\New folder\\mit-bih-arrhythmia-database-1.0.0/{rec_num}')
        annotation = wfdb.rdann(f'M:\\Dissertation\\New folder\\mit-bih-arrhythmia-database-1.0.0/{rec_num}', 'atr')
        
        for i in range(len(annotation.sample)):
            start = max(0, annotation.sample[i] - segment_length // 2)
            end = min(len(record.p_signal), start + segment_length)
            if end - start == segment_length:
                ecg_segments.append(record.p_signal[start:end, 0])  # Assuming MLII lead
                labels.append(annotation.symbol[i])
    
    return np.array(ecg_segments), np.array(labels)

def load_noise_data():
    em = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\em', sampfrom=0).p_signal[:, 0]
    bw = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\bw', sampfrom=0).p_signal[:, 0]
    ma = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\ma', sampfrom=0).p_signal[:, 0]
    return em, bw, ma

# Load ECG and noise data
ecg_records = [103, 105, 111, 116, 122, 205, 213, 219, 223, 230]  # Add more records as needed
ecg_segments, labels = load_ecg_data_with_labels(ecg_records)
em_noise, bw_noise, ma_noise = load_noise_data()

# Split data into 70% training, 15% validation, and 15% testing
X_train_val, X_test, y_train_val, y_test = train_test_split(ecg_segments, labels, test_size=0.15, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1765, random_state=42)  # 0.1765 * 85% ≈ 15%

# Compute class weights to handle imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

# Extend and add noise to ECG segments
def extend_noise_signal(noise_signal, target_length):
    repeated_noise = np.tile(noise_signal, int(np.ceil(target_length / len(noise_signal))))
    return repeated_noise[:target_length]

def calculate_snr(signal, noise):
    signal_power = np.sum(np.square(signal))
    noise_power = np.sum(np.square(noise))
    snr = 10 * np.log10(signal_power / noise_power)
    return snr

def calculate_rmse(signal, denoised_signal):
    return np.sqrt(np.mean((signal - denoised_signal) ** 2))

# Add noise to ECG segments using multiple noise types and combinations
def add_combined_noise(ecg_segments, noises, target_snr_db):
    noisy_segments = []
    
    # Generate noise signal by combining all noise types
    combined_noise_signal = sum(extend_noise_signal(noise, ecg_segments.shape[1]) for noise in noises)
    
    for ecg_segment in ecg_segments:
        current_snr = calculate_snr(ecg_segment, combined_noise_signal[:len(ecg_segment)])
        scaling_factor = np.sqrt(np.sum(np.square(ecg_segment)) / (np.sum(np.square(combined_noise_signal)) * 10**(target_snr_db / 10)))
        scaled_noise = combined_noise_signal[:len(ecg_segment)] * scaling_factor
        noisy_segment = ecg_segment + scaled_noise
        noisy_segments.append(noisy_segment)
    return np.array(noisy_segments)

# Denoise function
def denoise_signal(models, noisy_signal):
    noisy_signal = np.expand_dims(noisy_signal, axis=-1)
    denoised_signal = ensemble_predict(models, noisy_signal)
    return denoised_signal.squeeze()

# Function to extract wavelet features
def extract_wavelet_features(ecg_slice):
    coeffs = pywt.wavedec(ecg_slice, 'db6', level=5)
    return coeffs[0]  # You may want to use more features from different levels

# Function to classify heartbeats using SVM
def classify_heartbeats(features, labels):
    clf = SVC(kernel='linear', class_weight=class_weight_dict)
    clf.fit(features, labels)
    return clf

# Add single noises to combined noises
noises = [em_noise, bw_noise, ma_noise]

snr_db = 0  # Example SNR value

# Prepare the clean ECG segments as target data for training
clean_ecg_segments_train = X_train.copy()

# Create noisy training data by combining all noise types
noisy_ecg_slices_train = add_combined_noise(X_train, noises, snr_db)
noisy_ecg_slices_val = add_combined_noise(X_val, noises, snr_db)

# Instantiate the models
models = [
    build_1d_cnn(),
    build_lstm(),
    build_gru(),
    build_denoising_autoencoder()
]

# Expand dimensions for training and validation
noisy_ecg_slices_train_expanded = np.expand_dims(noisy_ecg_slices_train, axis=-1)  # Expand dims for the models
noisy_ecg_slices_val_expanded = np.expand_dims(noisy_ecg_slices_val, axis=-1)  # Expand dims for validation
clean_ecg_segments_val_expanded = np.expand_dims(X_val, axis=-1)

# Train each model
for model in models:
    model.compile(optimizer=Adam(), loss='mse')
    model.fit(noisy_ecg_slices_train_expanded, np.expand_dims(clean_ecg_segments_train, axis=-1), 
              epochs=10, batch_size=32, validation_data=(noisy_ecg_slices_val_expanded, clean_ecg_segments_val_expanded))

# Evaluate the trained models on each noise type and combination
results = {}
for noise_name, noise_data in noises.items():
    noisy_ecg_slices_test = add_combined_noise(X_test, noises, snr_db)
    
    # Denoise and classify
    denoised_ecg_slices_test = denoise_signal(models, noisy_ecg_slices_test)
    
    # Calculate SNR and RMSE
    snr_values = [calculate_snr(ecg, denoised) for ecg, denoised in zip(X_test, denoised_ecg_slices_test)]
    rmse_values = [calculate_rmse(ecg, denoised) for ecg, denoised in zip(X_test, denoised_ecg_slices_test)]
    
    features_noisy = np.array([extract_wavelet_features(slice) for slice in noisy_ecg_slices_test])
    features_denoised = np.array([extract_wavelet_features(slice) for slice in denoised_ecg_slices_test])
    
    test_labels = y_test[:len(features_noisy)]
    
    model_noisy = classify_heartbeats(features_noisy, test_labels)
    model_denoised = classify_heartbeats(features_denoised, test_labels)
    
    # Predict the classes
    predictions_noisy = model_noisy.predict(features_noisy)
    predictions_denoised = model_denoised.predict(features_denoised)
    
    # Get the unique classes present in test_labels
    unique_classes = np.unique(test_labels)
    class_names = [name for i, name in enumerate(['N', 'V', 'A', 'L']) if i in unique_classes]
    
    # Evaluate accuracy for each class
    report_noisy = classification_report(test_labels, predictions_noisy, target_names=class_names, output_dict=True, zero_division=0)
    report_denoised = classification_report(test_labels, predictions_denoised, target_names=class_names, output_dict=True, zero_division=0)
    
    # Store accuracy and SNR, RMSE for each class
    results[noise_name] = {
        'noisy': {class_name: report_noisy[class_name]['precision'] for class_name in class_names},
        'denoised': {class_name: report_denoised[class_name]['precision'] for class_name in class_names},
        'snr': np.mean(snr_values),
        'rmse': np.mean(rmse_values)
    }

# Output the results for each class and noise condition
for noise_name, metrics in results.items():
    print(f"Noise type: {noise_name}")
    print(f"Average SNR after denoising: {metrics['snr']:.4f} dB")
    print(f"Average RMSE after denoising: {metrics['rmse']:.4f}")
    print("Noisy data accuracies:")
    for class_label, accuracy in metrics['noisy'].items():
        print(f"  {class_label}: {accuracy:.4f}")
    print("Denoised data accuracies:")
    for class_label, accuracy in metrics['denoised'].items():
        print(f"  {class_label}: {accuracy:.4f}")
    print("\n")


Epoch 1/10
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 463ms/step - loss: 0.0956 - val_loss: 0.0457
Epoch 2/10
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 464ms/step - loss: 0.0490 - val_loss: 0.0411
Epoch 3/10
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 461ms/step - loss: 0.0450 - val_loss: 0.0334
Epoch 4/10
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m264s[0m 464ms/step - loss: 0.0424 - val_loss: 0.0313
Epoch 5/10
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 462ms/step - loss: 0.0399 - val_loss: 0.0307
Epoch 6/10
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m251s[0m 455ms/step - loss: 0.0416 - val_loss: 0.0304
Epoch 7/10
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 464ms/step - loss: 0.0380 - val_loss: 0.0300
Epoch 8/10
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 463ms/step - loss: 0.0349 - val_loss: 0.0288
Epoch 10

ValueError: Dimensions must be equal, but are 512 and 2048 for '{{node compile_loss/mse/sub}} = Sub[T=DT_FLOAT](data_1, functional_7_1/conv1d_7_1/Tanh)' with input shapes: [?,512,1], [?,2048,1].

In [2]:
import wfdb
import numpy as np
import pywt
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv1D, LSTM, GRU, Dense, Input, Add, Flatten, Reshape
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Define the new models
def build_1d_cnn(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = Conv1D(64, 15, padding='same', activation='relu')(inp)
    x = Conv1D(128, 15, padding='same', activation='relu')(x)
    x = Conv1D(256, 15, padding='same', activation='relu')(x)
    x = Conv1D(1, 15, padding='same', activation='tanh')(x)
    return Model(inp, x)

def build_lstm(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = LSTM(128, return_sequences=True)(inp)
    x = LSTM(64, return_sequences=True)(x)
    x = Dense(1, activation='tanh')(x)
    return Model(inp, x)

def build_gru(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = GRU(128, return_sequences=True)(inp)
    x = GRU(64, return_sequences=True)(x)
    x = Dense(1, activation='tanh')(x)
    return Model(inp, x)

def build_denoising_autoencoder(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    
    # Encoder
    encoded = Conv1D(128, 15, padding='same', activation='relu')(inp)
    encoded = Conv1D(64, 15, padding='same', activation='relu')(encoded)
    
    # Decoder (without upsampling to maintain the same input/output size)
    decoded = Conv1D(64, 15, padding='same', activation='relu')(encoded)
    decoded = Conv1D(128, 15, padding='same', activation='relu')(decoded)
    decoded = Conv1D(1, 15, padding='same', activation='tanh')(decoded)
    
    return Model(inp, decoded)

# Ensemble method: averaging predictions from all models
def ensemble_predict(models, noisy_signal):
    predictions = [model.predict(noisy_signal) for model in models]
    ensemble_prediction = np.mean(predictions, axis=0)
    return ensemble_prediction.squeeze()

# Load ECG and noise data
def load_ecg_data_with_labels(record_numbers, segment_length=512):
    ecg_segments = []
    labels = []
    for rec_num in record_numbers:
        record = wfdb.rdrecord(f'M:\\Dissertation\\New folder\\mit-bih-arrhythmia-database-1.0.0/{rec_num}')
        annotation = wfdb.rdann(f'M:\\Dissertation\\New folder\\mit-bih-arrhythmia-database-1.0.0/{rec_num}', 'atr')
        
        for i in range(len(annotation.sample)):
            start = max(0, annotation.sample[i] - segment_length // 2)
            end = min(len(record.p_signal), start + segment_length)
            if end - start == segment_length:
                ecg_segments.append(record.p_signal[start:end, 0])  # Assuming MLII lead
                labels.append(annotation.symbol[i])
    
    return np.array(ecg_segments), np.array(labels)

def load_noise_data():
    em = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\em', sampfrom=0).p_signal[:, 0]
    bw = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\bw', sampfrom=0).p_signal[:, 0]
    ma = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\ma', sampfrom=0).p_signal[:, 0]
    return em, bw, ma

# Load ECG and noise data
ecg_records = [103, 105, 111, 116, 122, 205, 213, 219, 223, 230]  # Add more records as needed
ecg_segments, labels = load_ecg_data_with_labels(ecg_records)
em_noise, bw_noise, ma_noise = load_noise_data()

# Split data into 70% training, 15% validation, and 15% testing
X_train_val, X_test, y_train_val, y_test = train_test_split(ecg_segments, labels, test_size=0.15, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1765, random_state=42)  # 0.1765 * 85% ≈ 15%

# Compute class weights to handle imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

# Extend and add noise to ECG segments
def extend_noise_signal(noise_signal, target_length):
    repeated_noise = np.tile(noise_signal, int(np.ceil(target_length / len(noise_signal))))
    return repeated_noise[:target_length]

def calculate_snr(signal, noise):
    signal_power = np.sum(np.square(signal))
    noise_power = np.sum(np.square(noise))
    snr = 10 * np.log10(signal_power / noise_power)
    return snr

def calculate_rmse(signal, denoised_signal):
    return np.sqrt(np.mean((signal - denoised_signal) ** 2))

# Add noise to ECG segments using multiple noise types and combinations
def add_combined_noise(ecg_segments, noises, target_snr_db):
    noisy_segments = []
    
    # Generate noise signal by combining all noise types
    combined_noise_signal = sum(extend_noise_signal(noise, ecg_segments.shape[1]) for noise in noises)
    
    for ecg_segment in ecg_segments:
        current_snr = calculate_snr(ecg_segment, combined_noise_signal[:len(ecg_segment)])
        scaling_factor = np.sqrt(np.sum(np.square(ecg_segment)) / (np.sum(np.square(combined_noise_signal)) * 10**(target_snr_db / 10)))
        scaled_noise = combined_noise_signal[:len(ecg_segment)] * scaling_factor
        noisy_segment = ecg_segment + scaled_noise
        noisy_segments.append(noisy_segment)
    return np.array(noisy_segments)

# Denoise function
def denoise_signal(models, noisy_signal):
    noisy_signal = np.expand_dims(noisy_signal, axis=-1)
    denoised_signal = ensemble_predict(models, noisy_signal)
    return denoised_signal.squeeze()

# Function to extract wavelet features
def extract_wavelet_features(ecg_slice):
    coeffs = pywt.wavedec(ecg_slice, 'db6', level=5)
    return coeffs[0]  # You may want to use more features from different levels

# Function to classify heartbeats using SVM
def classify_heartbeats(features, labels):
    clf = SVC(kernel='linear', class_weight=class_weight_dict)
    clf.fit(features, labels)
    return clf

# Add single noises to combined noises
noises = [em_noise, bw_noise, ma_noise]

snr_db = 0  # Example SNR value

# Prepare the clean ECG segments as target data for training
clean_ecg_segments_train = X_train.copy()

# Create noisy training data by combining all noise types
noisy_ecg_slices_train = add_combined_noise(X_train, noises, snr_db)
noisy_ecg_slices_val = add_combined_noise(X_val, noises, snr_db)

# Instantiate the models
models = [
    build_1d_cnn(),
    build_lstm(),
    build_gru(),
    build_denoising_autoencoder()
]

# Expand dimensions for training and validation
noisy_ecg_slices_train_expanded = np.expand_dims(noisy_ecg_slices_train, axis=-1)  # Expand dims for the models
noisy_ecg_slices_val_expanded = np.expand_dims(noisy_ecg_slices_val, axis=-1)  # Expand dims for validation
clean_ecg_segments_val_expanded = np.expand_dims(X_val, axis=-1)

# Train each model
for model in models:
    model.compile(optimizer=Adam(), loss='mse')
    model.fit(noisy_ecg_slices_train_expanded, np.expand_dims(clean_ecg_segments_train, axis=-1), 
              epochs=2, batch_size=32, validation_data=(noisy_ecg_slices_val_expanded, clean_ecg_segments_val_expanded))

# Evaluate the trained models on each noise type and combination
noisy_ecg_slices_test = add_combined_noise(X_test, noises, snr_db)

# Denoise and classify
denoised_ecg_slices_test = denoise_signal(models, noisy_ecg_slices_test)

# Calculate SNR and RMSE
snr_values = [calculate_snr(ecg, denoised) for ecg, denoised in zip(X_test, denoised_ecg_slices_test)]
rmse_values = [calculate_rmse(ecg, denoised) for ecg, denoised in zip(X_test, denoised_ecg_slices_test)]

features_noisy = np.array([extract_wavelet_features(slice) for slice in noisy_ecg_slices_test])
features_denoised = np.array([extract_wavelet_features(slice) for slice in denoised_ecg_slices_test])

test_labels = y_test[:len(features_noisy)]

model_noisy = classify_heartbeats(features_noisy, test_labels)
model_denoised = classify_heartbeats(features_denoised, test_labels)

# Predict the classes
predictions_noisy = model_noisy.predict(features_noisy)
predictions_denoised = model_denoised.predict(features_denoised)

# Get the unique classes present in test_labels
unique_classes = np.unique(test_labels)
class_names = [name for i, name in enumerate(['N', 'V', 'A', 'L']) if i in unique_classes]

# Evaluate accuracy for each class
report_noisy = classification_report(test_labels, predictions_noisy, target_names=class_names, output_dict=True, zero_division=0)
report_denoised = classification_report(test_labels, predictions_denoised, target_names=class_names, output_dict=True, zero_division=0)

# Output the results for the combined noise
results = {
    'noisy': {class_name: report_noisy[class_name]['precision'] for class_name in class_names},
    'denoised': {class_name: report_denoised[class_name]['precision'] for class_name in class_names},
    'snr': np.mean(snr_values),
    'rmse': np.mean(rmse_values)
}

print(f"Average SNR after denoising: {results['snr']:.4f} dB")
print(f"Average RMSE after denoising: {results['rmse']:.4f}")
print("Noisy data accuracies:")
for class_label, accuracy in results['noisy'].items():
    print(f"  {class_label}: {accuracy:.4f}")
print("Denoised data accuracies:")
for class_label, accuracy in results['denoised'].items():
    print(f"  {class_label}: {accuracy:.4f}")
print("\n")


Epoch 1/2
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m190s[0m 339ms/step - loss: 0.0754 - val_loss: 0.0407
Epoch 2/2
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m194s[0m 351ms/step - loss: 0.0486 - val_loss: 0.0344
Epoch 1/2
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m468s[0m 822ms/step - loss: 0.0958 - val_loss: 0.0834
Epoch 2/2
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m386s[0m 698ms/step - loss: 0.0738 - val_loss: 0.0503
Epoch 1/2
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m386s[0m 684ms/step - loss: 0.1163 - val_loss: 0.0565
Epoch 2/2
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m355s[0m 642ms/step - loss: 0.0521 - val_loss: 0.0335
Epoch 1/2
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 220ms/step - loss: 0.0904 - val_loss: 0.0369
Epoch 2/2
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 218ms/step - loss: 0.0402 - val_loss: 0.0319
[1m119/119[0m 

ValueError: The classes, ['"', '+', 'A', 'F', 'L', 'N', 'Q', 'V', 'x', '|', '~'], are not in class_weight

In [3]:
import wfdb
import numpy as np
import pywt
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv1D, LSTM, GRU, Dense, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Define the new models
def build_1d_cnn(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = Conv1D(64, 15, padding='same', activation='relu')(inp)
    x = Conv1D(128, 15, padding='same', activation='relu')(x)
    x = Conv1D(256, 15, padding='same', activation='relu')(x)
    x = Conv1D(1, 15, padding='same', activation='tanh')(x)
    return Model(inp, x)

def build_lstm(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = LSTM(128, return_sequences=True)(inp)
    x = LSTM(64, return_sequences=True)(x)
    x = Dense(1, activation='tanh')(x)
    return Model(inp, x)

def build_gru(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = GRU(128, return_sequences=True)(inp)
    x = GRU(64, return_sequences=True)(x)
    x = Dense(1, activation='tanh')(x)
    return Model(inp, x)

def build_denoising_autoencoder(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    
    # Encoder
    encoded = Conv1D(128, 15, padding='same', activation='relu')(inp)
    encoded = Conv1D(64, 15, padding='same', activation='relu')(encoded)
    
    # Decoder (without upsampling to maintain the same input/output size)
    decoded = Conv1D(64, 15, padding='same', activation='relu')(encoded)
    decoded = Conv1D(128, 15, padding='same', activation='relu')(decoded)
    decoded = Conv1D(1, 15, padding='same', activation='tanh')(decoded)
    
    return Model(inp, decoded)

# Ensemble method: averaging predictions from all models
def ensemble_predict(models, noisy_signal):
    predictions = [model.predict(noisy_signal) for model in models]
    ensemble_prediction = np.mean(predictions, axis=0)
    return ensemble_prediction.squeeze()

# Load ECG and noise data
def load_ecg_data_with_labels(record_numbers, segment_length=512):
    ecg_segments = []
    labels = []
    for rec_num in record_numbers:
        record = wfdb.rdrecord(f'M:\\Dissertation\\New folder\\mit-bih-arrhythmia-database-1.0.0/{rec_num}')
        annotation = wfdb.rdann(f'M:\\Dissertation\\New folder\\mit-bih-arrhythmia-database-1.0.0/{rec_num}', 'atr')
        
        for i in range(len(annotation.sample)):
            start = max(0, annotation.sample[i] - segment_length // 2)
            end = min(len(record.p_signal), start + segment_length)
            if end - start == segment_length:
                ecg_segments.append(record.p_signal[start:end, 0])  # Assuming MLII lead
                labels.append(annotation.symbol[i])
    
    return np.array(ecg_segments), np.array(labels)

def load_noise_data():
    em = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\em', sampfrom=0).p_signal[:, 0]
    bw = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\bw', sampfrom=0).p_signal[:, 0]
    ma = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\ma', sampfrom=0).p_signal[:, 0]
    return em, bw, ma

# Load ECG and noise data
ecg_records = [103, 105, 111, 116, 122, 205, 213, 219, 223, 230]  # Add more records as needed
ecg_segments, labels = load_ecg_data_with_labels(ecg_records)
em_noise, bw_noise, ma_noise = load_noise_data()

# Filter and map labels to integer categories
label_mapping = {'N': 0, 'V': 1, 'A': 2, 'L': 3}  # Modify this based on the classes you want to classify
mapped_labels = np.array([label_mapping.get(label, -1) for label in labels])
valid_indices = mapped_labels != -1  # Filter out invalid labels

# Filter data and labels to only include valid classes
ecg_segments = ecg_segments[valid_indices]
mapped_labels = mapped_labels[valid_indices]

# Split data into 70% training, 15% validation, and 15% testing
X_train_val, X_test, y_train_val, y_test = train_test_split(ecg_segments, mapped_labels, test_size=0.15, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1765, random_state=42)  # 0.1765 * 85% ≈ 15%

# Compute class weights to handle imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

# Extend and add noise to ECG segments
def extend_noise_signal(noise_signal, target_length):
    repeated_noise = np.tile(noise_signal, int(np.ceil(target_length / len(noise_signal))))
    return repeated_noise[:target_length]

def calculate_snr(signal, noise):
    signal_power = np.sum(np.square(signal))
    noise_power = np.sum(np.square(noise))
    snr = 10 * np.log10(signal_power / noise_power)
    return snr

def calculate_rmse(signal, denoised_signal):
    return np.sqrt(np.mean((signal - denoised_signal) ** 2))

# Add noise to ECG segments using multiple noise types and combinations
def add_combined_noise(ecg_segments, noises, target_snr_db):
    noisy_segments = []
    
    # Generate noise signal by combining all noise types
    combined_noise_signal = sum(extend_noise_signal(noise, ecg_segments.shape[1]) for noise in noises)
    
    for ecg_segment in ecg_segments:
        current_snr = calculate_snr(ecg_segment, combined_noise_signal[:len(ecg_segment)])
        scaling_factor = np.sqrt(np.sum(np.square(ecg_segment)) / (np.sum(np.square(combined_noise_signal)) * 10**(target_snr_db / 10)))
        scaled_noise = combined_noise_signal[:len(ecg_segment)] * scaling_factor
        noisy_segment = ecg_segment + scaled_noise
        noisy_segments.append(noisy_segment)
    return np.array(noisy_segments)

# Denoise function
def denoise_signal(models, noisy_signal):
    noisy_signal = np.expand_dims(noisy_signal, axis=-1)
    denoised_signal = ensemble_predict(models, noisy_signal)
    return denoised_signal.squeeze()

# Function to extract wavelet features
def extract_wavelet_features(ecg_slice):
    coeffs = pywt.wavedec(ecg_slice, 'db6', level=5)
    return coeffs[0]  # You may want to use more features from different levels

# Function to classify heartbeats using SVM
def classify_heartbeats(features, labels):
    clf = SVC(kernel='linear', class_weight=class_weight_dict)
    clf.fit(features, labels)
    return clf

# Add single noises to combined noises
noises = [em_noise, bw_noise, ma_noise]

snr_db = 0  # Example SNR value

# Prepare the clean ECG segments as target data for training
clean_ecg_segments_train = X_train.copy()

# Create noisy training data by combining all noise types
noisy_ecg_slices_train = add_combined_noise(X_train, noises, snr_db)
noisy_ecg_slices_val = add_combined_noise(X_val, noises, snr_db)

# Instantiate the models
models = [
    build_1d_cnn(),
    build_lstm(),
    build_gru(),
    build_denoising_autoencoder()
]

# Expand dimensions for training and validation
noisy_ecg_slices_train_expanded = np.expand_dims(noisy_ecg_slices_train, axis=-1)  # Expand dims for the models
noisy_ecg_slices_val_expanded = np.expand_dims(noisy_ecg_slices_val, axis=-1)  # Expand dims for validation
clean_ecg_segments_val_expanded = np.expand_dims(X_val, axis=-1)

# Train each model
for model in models:
    model.compile(optimizer=Adam(), loss='mse')
    model.fit(noisy_ecg_slices_train_expanded, np.expand_dims(clean_ecg_segments_train, axis=-1), 
              epochs=1, batch_size=32, validation_data=(noisy_ecg_slices_val_expanded, clean_ecg_segments_val_expanded))

# Evaluate the trained models on each noise type and combination
noisy_ecg_slices_test = add_combined_noise(X_test, noises, snr_db)

# Denoise and classify
denoised_ecg_slices_test = denoise_signal(models, noisy_ecg_slices_test)

# Calculate SNR and RMSE
snr_values = [calculate_snr(ecg, denoised) for ecg, denoised in zip(X_test, denoised_ecg_slices_test)]
rmse_values = [calculate_rmse(ecg, denoised) for ecg, denoised in zip(X_test, denoised_ecg_slices_test)]

features_noisy = np.array([extract_wavelet_features(slice) for slice in noisy_ecg_slices_test])
features_denoised = np.array([extract_wavelet_features(slice) for slice in denoised_ecg_slices_test])

test_labels = y_test[:len(features_noisy)]

model_noisy = classify_heartbeats(features_noisy, test_labels)
model_denoised = classify_heartbeats(features_denoised, test_labels)

# Predict the classes
predictions_noisy = model_noisy.predict(features_noisy)
predictions_denoised = model_denoised.predict(features_denoised)

# Get the unique classes present in test_labels
unique_classes = np.unique(test_labels)
class_names = [name for i, name in enumerate(['N', 'V', 'A', 'L']) if i in unique_classes]

# Evaluate accuracy for each class
report_noisy = classification_report(test_labels, predictions_noisy, target_names=class_names, output_dict=True, zero_division=0)
report_denoised = classification_report(test_labels, predictions_denoised, target_names=class_names, output_dict=True, zero_division=0)

# Output the results for the combined noise
results = {
    'noisy': {class_name: report_noisy[class_name]['precision'] for class_name in class_names},
    'denoised': {class_name: report_denoised[class_name]['precision'] for class_name in class_names},
    'snr': np.mean(snr_values),
    'rmse': np.mean(rmse_values)
}

print(f"Average SNR after denoising: {results['snr']:.4f} dB")
print(f"Average RMSE after denoising: {results['rmse']:.4f}")
print("Noisy data accuracies:")
for class_label, accuracy in results['noisy'].items():
    print(f"  {class_label}: {accuracy:.4f}")
print("Denoised data accuracies:")
for class_label, accuracy in results['denoised'].items():
    print(f"  {class_label}: {accuracy:.4f}")
print("\n")


[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 395ms/step - loss: 0.0722 - val_loss: 0.0401
[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m455s[0m 829ms/step - loss: 0.1033 - val_loss: 0.0648
[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m410s[0m 745ms/step - loss: 0.0992 - val_loss: 0.0455
[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 222ms/step - loss: 0.0805 - val_loss: 0.0378
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 79ms/step
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 194ms/step
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 171ms/step
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 48ms/step
Average SNR after denoising: 0.8871 dB
Average RMSE after denoising: 0.1521
Noisy data accuracies:
  N: 0.9983
  V: 0.7321
  A: 0.0925
  L: 0.9099
Denoised data accuracies:
  N: 0.9927
  V: 0.5114
  A: 0.0590
  L: 0.7831




In [4]:
import wfdb
import numpy as np
import pywt
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv1D, LSTM, GRU, Dense, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Define the new models
def build_1d_cnn(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = Conv1D(64, 15, padding='same', activation='relu')(inp)
    x = Conv1D(128, 15, padding='same', activation='relu')(x)
    x = Conv1D(256, 15, padding='same', activation='relu')(x)
    x = Conv1D(1, 15, padding='same', activation='tanh')(x)
    return Model(inp, x)

def build_lstm(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = LSTM(128, return_sequences=True)(inp)
    x = LSTM(64, return_sequences=True)(x)
    x = Dense(1, activation='tanh')(x)
    return Model(inp, x)

def build_gru(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = GRU(128, return_sequences=True)(inp)
    x = GRU(64, return_sequences=True)(x)
    x = Dense(1, activation='tanh')(x)
    return Model(inp, x)

def build_denoising_autoencoder(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    
    # Encoder
    encoded = Conv1D(128, 15, padding='same', activation='relu')(inp)
    encoded = Conv1D(64, 15, padding='same', activation='relu')(encoded)
    
    # Decoder (without upsampling to maintain the same input/output size)
    decoded = Conv1D(64, 15, padding='same', activation='relu')(encoded)
    decoded = Conv1D(128, 15, padding='same', activation='relu')(decoded)
    decoded = Conv1D(1, 15, padding='same', activation='tanh')(decoded)
    
    return Model(inp, decoded)

# Ensemble method: averaging predictions from all models
def ensemble_predict(models, noisy_signal):
    predictions = [model.predict(noisy_signal) for model in models]
    ensemble_prediction = np.mean(predictions, axis=0)
    return ensemble_prediction.squeeze()

# Load ECG and noise data
def load_ecg_data_with_labels(record_numbers, segment_length=512):
    ecg_segments = []
    labels = []
    for rec_num in record_numbers:
        record = wfdb.rdrecord(f'M:\\Dissertation\\New folder\\mit-bih-arrhythmia-database-1.0.0/{rec_num}')
        annotation = wfdb.rdann(f'M:\\Dissertation\\New folder\\mit-bih-arrhythmia-database-1.0.0/{rec_num}', 'atr')
        
        for i in range(len(annotation.sample)):
            start = max(0, annotation.sample[i] - segment_length // 2)
            end = min(len(record.p_signal), start + segment_length)
            if end - start == segment_length:
                ecg_segments.append(record.p_signal[start:end, 0])  # Assuming MLII lead
                labels.append(annotation.symbol[i])
    
    return np.array(ecg_segments), np.array(labels)

def load_noise_data():
    em = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\em', sampfrom=0).p_signal[:, 0]
    bw = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\bw', sampfrom=0).p_signal[:, 0]
    ma = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\ma', sampfrom=0).p_signal[:, 0]
    return em, bw, ma

# Load ECG and noise data
ecg_records = [103, 105, 111, 116, 122, 205, 213, 219, 223, 230]  # Add more records as needed
ecg_segments, labels = load_ecg_data_with_labels(ecg_records)
em_noise, bw_noise, ma_noise = load_noise_data()

# Filter and map labels to integer categories
label_mapping = {'N': 0, 'V': 1, 'A': 2, 'L': 3}  # Modify this based on the classes you want to classify
mapped_labels = np.array([label_mapping.get(label, -1) for label in labels])
valid_indices = mapped_labels != -1  # Filter out invalid labels

# Filter data and labels to only include valid classes
ecg_segments = ecg_segments[valid_indices]
mapped_labels = mapped_labels[valid_indices]

# Split data into 70% training, 15% validation, and 15% testing
X_train_val, X_test, y_train_val, y_test = train_test_split(ecg_segments, mapped_labels, test_size=0.15, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1765, random_state=42)  # 0.1765 * 85% ≈ 15%

# Compute class weights to handle imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

# Extend and add noise to ECG segments
def extend_noise_signal(noise_signal, target_length):
    repeated_noise = np.tile(noise_signal, int(np.ceil(target_length / len(noise_signal))))
    return repeated_noise[:target_length]

def calculate_snr(signal, noise):
    signal_power = np.sum(np.square(signal))
    noise_power = np.sum(np.square(noise))
    snr = 10 * np.log10(signal_power / noise_power)
    return snr

def calculate_rmse(signal, denoised_signal):
    return np.sqrt(np.mean((signal - denoised_signal) ** 2))

# Add noise to ECG segments using multiple noise types and combinations
def add_combined_noise(ecg_segments, noises, target_snr_db):
    noisy_segments = []
    
    # Generate noise signal by combining all noise types
    combined_noise_signal = sum(extend_noise_signal(noise, ecg_segments.shape[1]) for noise in noises)
    
    for ecg_segment in ecg_segments:
        current_snr = calculate_snr(ecg_segment, combined_noise_signal[:len(ecg_segment)])
        scaling_factor = np.sqrt(np.sum(np.square(ecg_segment)) / (np.sum(np.square(combined_noise_signal)) * 10**(target_snr_db / 10)))
        scaled_noise = combined_noise_signal[:len(ecg_segment)] * scaling_factor
        noisy_segment = ecg_segment + scaled_noise
        noisy_segments.append(noisy_segment)
    return np.array(noisy_segments)

# Denoise function
def denoise_signal(models, noisy_signal):
    noisy_signal = np.expand_dims(noisy_signal, axis=-1)
    denoised_signal = ensemble_predict(models, noisy_signal)
    return denoised_signal.squeeze()

# Function to extract wavelet features
def extract_wavelet_features(ecg_slice):
    coeffs = pywt.wavedec(ecg_slice, 'db6', level=5)
    return coeffs[0]  # You may want to use more features from different levels

# Function to classify heartbeats using SVM
def classify_heartbeats(features, labels):
    clf = SVC(kernel='linear', class_weight=class_weight_dict)
    clf.fit(features, labels)
    return clf

# Add single noises to combined noises
noises_dict = {
    'EM': [em_noise],
    'BW': [bw_noise],
    'MA': [ma_noise],
    'EM+MA': [em_noise, ma_noise],
    'EM+BW': [em_noise, bw_noise],
    'MA+BW': [ma_noise, bw_noise],
    'EM+BW+MA': [em_noise, bw_noise, ma_noise]
}

snr_db = 0  # Example SNR value

# Prepare the clean ECG segments as target data for training
clean_ecg_segments_train = X_train.copy()

# Create noisy training data by combining all noise types
noisy_ecg_slices_train = add_combined_noise(X_train, [em_noise, bw_noise, ma_noise], snr_db)
noisy_ecg_slices_val = add_combined_noise(X_val, [em_noise, bw_noise, ma_noise], snr_db)

# Instantiate the models
models = [
    build_1d_cnn(),
    build_lstm(),
    build_gru(),
    build_denoising_autoencoder()
]

# Expand dimensions for training and validation
noisy_ecg_slices_train_expanded = np.expand_dims(noisy_ecg_slices_train, axis=-1)  # Expand dims for the models
noisy_ecg_slices_val_expanded = np.expand_dims(noisy_ecg_slices_val, axis=-1)  # Expand dims for validation
clean_ecg_segments_val_expanded = np.expand_dims(X_val, axis=-1)

# Train each model
for model in models:
    model.compile(optimizer=Adam(), loss='mse')
    model.fit(noisy_ecg_slices_train_expanded, np.expand_dims(clean_ecg_segments_train, axis=-1), 
              epochs=1, batch_size=32, validation_data=(noisy_ecg_slices_val_expanded, clean_ecg_segments_val_expanded))

# Evaluate the trained models on each noise type and combination
results = {}

for noise_name, noise_data in noises_dict.items():
    noisy_ecg_slices_test = add_combined_noise(X_test, noise_data, snr_db)

    # Denoise and classify
    denoised_ecg_slices_test = denoise_signal(models, noisy_ecg_slices_test)

    # Calculate SNR and RMSE
    snr_values = [calculate_snr(ecg, denoised) for ecg, denoised in zip(X_test, denoised_ecg_slices_test)]
    rmse_values = [calculate_rmse(ecg, denoised) for ecg, denoised in zip(X_test, denoised_ecg_slices_test)]

    features_noisy = np.array([extract_wavelet_features(slice) for slice in noisy_ecg_slices_test])
    features_denoised = np.array([extract_wavelet_features(slice) for slice in denoised_ecg_slices_test])

    test_labels = y_test[:len(features_noisy)]

    model_noisy = classify_heartbeats(features_noisy, test_labels)
    model_denoised = classify_heartbeats(features_denoised, test_labels)

    # Predict the classes
    predictions_noisy = model_noisy.predict(features_noisy)
    predictions_denoised = model_denoised.predict(features_denoised)

    # Get the unique classes present in test_labels
    unique_classes = np.unique(test_labels)
    class_names = [name for i, name in enumerate(['N', 'V', 'A', 'L']) if i in unique_classes]

    # Evaluate accuracy for each class
    report_noisy = classification_report(test_labels, predictions_noisy, target_names=class_names, output_dict=True, zero_division=0)
    report_denoised = classification_report(test_labels, predictions_denoised, target_names=class_names, output_dict=True, zero_division=0)

    # Store accuracy and SNR, RMSE for each class
    results[noise_name] = {
        'snr': np.mean(snr_values),
        'rmse': np.mean(rmse_values),
        'noisy': {class_name: report_noisy[class_name]['precision'] for class_name in class_names},
        'denoised': {class_name: report_denoised[class_name]['precision'] for class_name in class_names},
    }

# Output the results for each noise condition
for noise_name, metrics in results.items():
    print(f"Noise type: {noise_name}")
    print(f"Average SNR after denoising: {metrics['snr']:.4f} dB")
    print(f"Average RMSE after denoising: {metrics['rmse']:.4f}")
    print("Noisy data accuracies:")
    for class_label, accuracy in metrics['noisy'].items():
        print(f"  {class_label}: {accuracy:.4f}")
    print("Denoised data accuracies:")
    for class_label, accuracy in metrics['denoised'].items():
        print(f"  {class_label}: {accuracy:.4f}")
    print("\n")


[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m181s[0m 335ms/step - loss: 0.0801 - val_loss: 0.0412
[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m336s[0m 623ms/step - loss: 0.1153 - val_loss: 0.0558
[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m456s[0m 843ms/step - loss: 0.0966 - val_loss: 0.0513
[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 317ms/step - loss: 0.0716 - val_loss: 0.0366
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 104ms/step
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 235ms/step
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 225ms/step
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 64ms/step
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 101ms/step
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 239ms/step
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 209ms/step
[1m

In [None]:
import wfdb
import numpy as np
import pywt
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv1D, LSTM, GRU, Dense, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Define the new models
def build_1d_cnn(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = Conv1D(64, 15, padding='same', activation='relu')(inp)
    x = Conv1D(128, 15, padding='same', activation='relu')(x)
    x = Conv1D(256, 15, padding='same', activation='relu')(x)
    x = Conv1D(1, 15, padding='same', activation='tanh')(x)
    return Model(inp, x)

def build_lstm(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = LSTM(128, return_sequences=True)(inp)
    x = LSTM(64, return_sequences=True)(x)
    x = Dense(1, activation='tanh')(x)
    return Model(inp, x)

def build_gru(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    x = GRU(128, return_sequences=True)(inp)
    x = GRU(64, return_sequences=True)(x)
    x = Dense(1, activation='tanh')(x)
    return Model(inp, x)

def build_denoising_autoencoder(input_shape=(512, 1)):
    inp = Input(shape=input_shape)
    
    # Encoder
    encoded = Conv1D(128, 15, padding='same', activation='relu')(inp)
    encoded = Conv1D(64, 15, padding='same', activation='relu')(encoded)
    
    # Decoder (without upsampling to maintain the same input/output size)
    decoded = Conv1D(64, 15, padding='same', activation='relu')(encoded)
    decoded = Conv1D(128, 15, padding='same', activation='relu')(decoded)
    decoded = Conv1D(1, 15, padding='same', activation='tanh')(decoded)
    
    return Model(inp, decoded)

# Ensemble method: averaging predictions from all models
def ensemble_predict(models, noisy_signal):
    predictions = [model.predict(noisy_signal) for model in models]
    ensemble_prediction = np.mean(predictions, axis=0)
    return ensemble_prediction.squeeze()

# Load ECG and noise data
def load_ecg_data_with_labels(record_numbers, segment_length=512):
    ecg_segments = []
    labels = []
    for rec_num in record_numbers:
        record = wfdb.rdrecord(f'M:\\Dissertation\\New folder\\mit-bih-arrhythmia-database-1.0.0/{rec_num}')
        annotation = wfdb.rdann(f'M:\\Dissertation\\New folder\\mit-bih-arrhythmia-database-1.0.0/{rec_num}', 'atr')
        
        for i in range(len(annotation.sample)):
            start = max(0, annotation.sample[i] - segment_length // 2)
            end = min(len(record.p_signal), start + segment_length)
            if end - start == segment_length:
                ecg_segments.append(record.p_signal[start:end, 0])  # Assuming MLII lead
                labels.append(annotation.symbol[i])
    
    return np.array(ecg_segments), np.array(labels)

def load_noise_data():
    em = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\em', sampfrom=0).p_signal[:, 0]
    bw = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\bw', sampfrom=0).p_signal[:, 0]
    ma = wfdb.rdrecord(r'M:\\Dissertation\\New folder\\mit-bih-noise-stress-test-database-1.0.0\\ma', sampfrom=0).p_signal[:, 0]
    return em, bw, ma

# Load ECG and noise data
ecg_records = [103, 105, 111, 116, 122, 205, 213, 219, 223, 230]  # Add more records as needed
ecg_segments, labels = load_ecg_data_with_labels(ecg_records)
em_noise, bw_noise, ma_noise = load_noise_data()

# Filter and map labels to integer categories
label_mapping = {'N': 0, 'V': 1, 'A': 2, 'L': 3}  # Modify this based on the classes you want to classify
mapped_labels = np.array([label_mapping.get(label, -1) for label in labels])
valid_indices = mapped_labels != -1  # Filter out invalid labels

# Filter data and labels to only include valid classes
ecg_segments = ecg_segments[valid_indices]
mapped_labels = mapped_labels[valid_indices]

# Split data into 70% training, 15% validation, and 15% testing
X_train_val, X_test, y_train_val, y_test = train_test_split(ecg_segments, mapped_labels, test_size=0.15, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1765, random_state=42)  # 0.1765 * 85% ≈ 15%

# Compute class weights to handle imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

# Extend and add noise to ECG segments
def extend_noise_signal(noise_signal, target_length):
    repeated_noise = np.tile(noise_signal, int(np.ceil(target_length / len(noise_signal))))
    return repeated_noise[:target_length]

def calculate_snr(signal, noise):
    signal_power = np.sum(np.square(signal))
    noise_power = np.sum(np.square(noise))
    snr = 10 * np.log10(signal_power / noise_power)
    return snr

def calculate_rmse(signal, denoised_signal):
    return np.sqrt(np.mean((signal - denoised_signal) ** 2))

# Add noise to ECG segments using multiple noise types and combinations
def add_combined_noise(ecg_segments, noises, target_snr_db):
    noisy_segments = []
    
    # Generate noise signal by combining all noise types
    combined_noise_signal = sum(extend_noise_signal(noise, ecg_segments.shape[1]) for noise in noises)
    
    for ecg_segment in ecg_segments:
        current_snr = calculate_snr(ecg_segment, combined_noise_signal[:len(ecg_segment)])
        scaling_factor = np.sqrt(np.sum(np.square(ecg_segment)) / (np.sum(np.square(combined_noise_signal)) * 10**(target_snr_db / 10)))
        scaled_noise = combined_noise_signal[:len(ecg_segment)] * scaling_factor
        noisy_segment = ecg_segment + scaled_noise
        noisy_segments.append(noisy_segment)
    return np.array(noisy_segments)

# Denoise function
def denoise_signal(models, noisy_signal):
    noisy_signal = np.expand_dims(noisy_signal, axis=-1)
    denoised_signal = ensemble_predict(models, noisy_signal)
    return denoised_signal.squeeze()

# Function to extract wavelet features
def extract_wavelet_features(ecg_slice):
    coeffs = pywt.wavedec(ecg_slice, 'db6', level=5)
    return coeffs[0]  # You may want to use more features from different levels

# Function to classify heartbeats using SVM
def classify_heartbeats(features, labels):
    clf = SVC(kernel='linear', class_weight=class_weight_dict)
    clf.fit(features, labels)
    return clf

# Add single noises to combined noises
noises_dict = {
    'EM': [em_noise],
    'BW': [bw_noise],
    'MA': [ma_noise],
    'EM+MA': [em_noise, ma_noise],
    'EM+BW': [em_noise, bw_noise],
    'MA+BW': [ma_noise, bw_noise],
    'EM+BW+MA': [em_noise, bw_noise, ma_noise]
}

snr_db = 0  # Example SNR value

# Prepare the clean ECG segments as target data for training
clean_ecg_segments_train = X_train.copy()

# Create noisy training data by combining all noise types
noisy_ecg_slices_train = add_combined_noise(X_train, [em_noise, bw_noise, ma_noise], snr_db)
noisy_ecg_slices_val = add_combined_noise(X_val, [em_noise, bw_noise, ma_noise], snr_db)

# Instantiate the models
models = [
    build_1d_cnn(),
    build_lstm(),
    build_gru(),
    build_denoising_autoencoder()
]

# Expand dimensions for training and validation
noisy_ecg_slices_train_expanded = np.expand_dims(noisy_ecg_slices_train, axis=-1)  # Expand dims for the models
noisy_ecg_slices_val_expanded = np.expand_dims(noisy_ecg_slices_val, axis=-1)  # Expand dims for validation
clean_ecg_segments_val_expanded = np.expand_dims(X_val, axis=-1)

# Train each model
for model in models:
    model.compile(optimizer=Adam(), loss='mse')
    model.fit(noisy_ecg_slices_train_expanded, np.expand_dims(clean_ecg_segments_train, axis=-1), 
              epochs=5, batch_size=32, validation_data=(noisy_ecg_slices_val_expanded, clean_ecg_segments_val_expanded))

# Evaluate the trained models on each noise type and combination
results = {}

for noise_name, noise_data in noises_dict.items():
    noisy_ecg_slices_test = add_combined_noise(X_test, noise_data, snr_db)

    # Denoise and classify
    denoised_ecg_slices_test = denoise_signal(models, noisy_ecg_slices_test)

    # Calculate SNR and RMSE
    snr_values = [calculate_snr(ecg, denoised) for ecg, denoised in zip(X_test, denoised_ecg_slices_test)]
    rmse_values = [calculate_rmse(ecg, denoised) for ecg, denoised in zip(X_test, denoised_ecg_slices_test)]

    features_noisy = np.array([extract_wavelet_features(slice) for slice in noisy_ecg_slices_test])
    features_denoised = np.array([extract_wavelet_features(slice) for slice in denoised_ecg_slices_test])

    test_labels = y_test[:len(features_noisy)]

    model_noisy = classify_heartbeats(features_noisy, test_labels)
    model_denoised = classify_heartbeats(features_denoised, test_labels)

    # Predict the classes
    predictions_noisy = model_noisy.predict(features_noisy)
    predictions_denoised = model_denoised.predict(features_denoised)

    # Get the unique classes present in test_labels
    unique_classes = np.unique(test_labels)
    class_names = [name for i, name in enumerate(['N', 'V', 'A', 'L']) if i in unique_classes]

    # Evaluate accuracy for each class
    report_noisy = classification_report(test_labels, predictions_noisy, target_names=class_names, output_dict=True, zero_division=0)
    report_denoised = classification_report(test_labels, predictions_denoised, target_names=class_names, output_dict=True, zero_division=0)

    # Store accuracy and SNR, RMSE for each class
    results[noise_name] = {
        'snr': np.mean(snr_values),
        'rmse': np.mean(rmse_values),
        'noisy': {class_name: report_noisy[class_name]['precision'] for class_name in class_names},
        'denoised': {class_name: report_denoised[class_name]['precision'] for class_name in class_names},
    }

# Output the results for each noise condition
for noise_name, metrics in results.items():
    print(f"Noise type: {noise_name}")
    print(f"Average SNR after denoising: {metrics['snr']:.4f} dB")
    print(f"Average RMSE after denoising: {metrics['rmse']:.4f}")
    print("Noisy data accuracies:")
    for class_label, accuracy in metrics['noisy'].items():
        print(f"  {class_label}: {accuracy:.4f}")
    print("Denoised data accuracies:")
    for class_label, accuracy in metrics['denoised'].items():
        print(f"  {class_label}: {accuracy:.4f}")
    print("\n")


Epoch 1/5
[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 333ms/step - loss: 0.0757 - val_loss: 0.0401
Epoch 2/5
[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 347ms/step - loss: 0.0433 - val_loss: 0.0330
Epoch 3/5
[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 347ms/step - loss: 0.0357 - val_loss: 0.0307
Epoch 4/5
[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 431ms/step - loss: 0.0345 - val_loss: 0.0308
Epoch 5/5
[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m231s[0m 438ms/step - loss: 0.0345 - val_loss: 0.0294
Epoch 1/5
[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m440s[0m 819ms/step - loss: 0.1065 - val_loss: 0.0414
Epoch 2/5
[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m332s[0m 627ms/step - loss: 0.0623 - val_loss: 0.0485
Epoch 3/5
[1m529/529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m324s[0m 612ms/step - loss: 0.0529 - val_loss: 0.0349
Epoch 4/5
[1m52