In [None]:
import os
import wfdb
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras import layers
import pandas as pd

# Paths to the unzipped datasets
arrhythmia_dataset_path = r'M:\Dissertation\New folder\mit-bih-arrhythmia-database-1.0.0'
noise_dataset_path = r'M:\Dissertation\New folder\mit-bih-noise-stress-test-database-1.0.0'

# Load the MIT-BIH Arrhythmia records
arrhythmia_records = ['103', '105', '111', '116', '122', '205', '213', '219', '223', '230']

# Load the noise data using WFDB
def load_noise(record_name, noise_dataset_path):
    record = wfdb.rdrecord(os.path.join(noise_dataset_path, record_name))
    return record.p_signal[:, 0]  # Assuming you want the first channel of noise

# Load the noise signals
ma_noise = load_noise('ma', noise_dataset_path)
em_noise = load_noise('em', noise_dataset_path)
bw_noise = load_noise('bw', noise_dataset_path)

# Function to add noise to the ECG signal
def add_noise(ecg_signal, noise, snr_db):
    signal_power = np.mean(ecg_signal ** 2)
    noise_power = np.mean(noise ** 2)
    k = signal_power / (10 ** (snr_db / 10) * noise_power)
    noisy_signal = ecg_signal + np.sqrt(k) * noise
    return noisy_signal

# Dictionary to hold noisy and denoised signals
noisy_signals = {}

for record in arrhythmia_records:
    record_path = os.path.join(arrhythmia_dataset_path, record)
    ecg_record = wfdb.rdrecord(record_path)
    ecg_signal = ecg_record.p_signal[:, 0]  # Take first lead for simplicity
    
    # Add different types of noise
    noisy_signals[record] = {
        'EM': add_noise(ecg_signal, em_noise[:len(ecg_signal)], 0),
        'MA': add_noise(ecg_signal, ma_noise[:len(ecg_signal)], 0),
        'BW': add_noise(ecg_signal, bw_noise[:len(ecg_signal)], 0),
        'EM+MA': add_noise(add_noise(ecg_signal, em_noise[:len(ecg_signal)], 0), ma_noise[:len(ecg_signal)], 0),
        'EM+BW': add_noise(add_noise(ecg_signal, em_noise[:len(ecg_signal)], 0), bw_noise[:len(ecg_signal)], 0),
        'MA+BW': add_noise(add_noise(ecg_signal, ma_noise[:len(ecg_signal)], 0), bw_noise[:len(ecg_signal)], 0),
        'EM+MA+BW': add_noise(add_noise(add_noise(ecg_signal, em_noise[:len(ecg_signal)], 0), ma_noise[:len(ecg_signal)], 0), bw_noise[:len(ecg_signal)], 0)
    }

# Define CGAN components
def build_generator():
    model = tf.keras.Sequential([
        layers.Input(shape=(512, 1)),
        layers.Conv1D(64, kernel_size=3, padding="same", activation="relu"),
        layers.Conv1D(64, kernel_size=3, padding="same", activation="relu"),
        layers.Conv1D(1, kernel_size=3, padding="same"),
    ])
    return model

def build_discriminator():
    model = tf.keras.Sequential([
        layers.Input(shape=(512, 1)),
        layers.Conv1D(64, kernel_size=3, padding="same", activation="relu"),
        layers.Conv1D(64, kernel_size=3, padding="same", activation="relu"),
        layers.Flatten(),
        layers.Dense(1, activation="sigmoid"),
    ])
    return model

def build_cgan(generator, discriminator):
    discriminator.compile(optimizer='adam', loss='binary_crossentropy')
    discriminator.trainable = False

    noise_input = layers.Input(shape=(512, 1))
    generated_signal = generator(noise_input)
    valid = discriminator(generated_signal)

    cgan = tf.keras.models.Model(noise_input, valid)
    cgan.compile(optimizer='adam', loss='binary_crossentropy')

    return cgan

# Build and compile the CGAN
generator = build_generator()
discriminator = build_discriminator()
cgan = build_cgan(generator, discriminator)

# Function to train the CGAN
def train_cgan(generator, discriminator, cgan, X_train, X_train_noisy, epochs=10, batch_size=32):
    real = np.ones((batch_size, 1))
    fake = np.zeros((batch_size, 1))

    for epoch in range(epochs):
        idx = np.random.randint(0, X_train.shape[0], batch_size)
        real_signals = X_train[idx]

        noise = X_train_noisy[idx]
        gen_signals = generator.predict(noise)

        d_loss_real = discriminator.train_on_batch(real_signals, real)
        d_loss_fake = discriminator.train_on_batch(gen_signals, fake)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        g_loss = cgan.train_on_batch(noise, real)

        if epoch % 100 == 0:
            print(f"Epoch: {epoch}, D Loss: {d_loss}, G Loss: {g_loss}")

# Function to segment the signal
def segment_signal(signal, segment_length=512):
    n_segments = len(signal) // segment_length
    trimmed_signal = signal[:n_segments * segment_length]
    segmented_signal = trimmed_signal.reshape(n_segments, segment_length, 1)
    return segmented_signal

# Segment and reshape signals before training CGAN
for record in noisy_signals:
    for noise_type in noisy_signals[record]:
        ecg_record = wfdb.rdrecord(os.path.join(arrhythmia_dataset_path, record))
        ecg_signal = ecg_record.p_signal[:, 0]  # Use the first lead for simplicity

        X_train = segment_signal(ecg_signal)
        X_train_noisy = segment_signal(noisy_signals[record][noise_type])

        train_cgan(generator, discriminator, cgan, X_train, X_train_noisy)

# Define Pan-Tompkins QRS detector (simplified version for demonstration)
def detect_qrs(signal):
    return np.arange(0, len(signal), 256)

# Extract features using wavelet decomposition or another feature extraction method
def extract_features(signal, qrs_indices):
    return np.array([signal[qrs_index:qrs_index+512] for qrs_index in qrs_indices])

def get_labels(record_name):
    annotation = wfdb.rdann(os.path.join(arrhythmia_dataset_path, record_name), 'atr')
    labels = []
    for symbol in annotation.symbol:
        if symbol == 'N':
            labels.append(0)
        elif symbol == 'V':
            labels.append(1)
        elif symbol == 'A':
            labels.append(2)
        elif symbol == 'L':
            labels.append(3)
    return np.array(labels)

# Classify and evaluate the performance
results = {}
for record in noisy_signals:
    labels = get_labels(record)
    results[record] = {}
    for noise_type, noisy_signal in noisy_signals[record].items():
        qrs_indices = detect_qrs(noisy_signal)
        features_noisy = extract_features(noisy_signal, qrs_indices)
        features_denoised = extract_features(generator.predict(noisy_signal.reshape(-1, 512, 1)).flatten(), qrs_indices)

        scaler = StandardScaler()
        features_noisy = scaler.fit_transform(features_noisy)
        features_denoised = scaler.transform(features_denoised)

        svm = SVC()
        svm.fit(features_noisy, labels[:len(features_noisy)])
        acc_noisy = accuracy_score(labels[:len(features_noisy)], svm.predict(features_noisy))
        acc_denoised = accuracy_score(labels[:len(features_denoised)], svm.predict(features_denoised))

        improvement = acc_denoised - acc_noisy
        results[record][noise_type] = {'noisy_acc': acc_noisy, 'denoised_acc': acc_denoised, 'improvement': improvement}

        print(f"Record {record}, Noise {noise_type}, Noisy Acc: {acc_noisy}, Denoised Acc: {acc_denoised}, Improvement: {improvement}")

# Summarize the results in a DataFrame
result_data = []
for record in results:
    for noise_type, metrics in results[record].items():
        result_data.append({
            'Record': record,
            'Noise Type': noise_type,
            'Noisy Acc': metrics['noisy_acc'],
            'Denoised Acc': metrics['denoised_acc'],
            'Improvement': metrics['improvement']
        })

results_df = pd.DataFrame(result_data)

summary_df = results_df.groupby('Noise Type').agg({
    'Noisy Acc': 'mean',
    'Denoised Acc': 'mean',
    'Improvement': 'mean'
}).reset_index()

# Display the summary results
print(summary_df)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step




Epoch: 0, D Loss: 0.6779403686523438, G Loss: [array(0.6844532, dtype=float32), array(0.6844532, dtype=float32)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Epoch: 0, D Loss: 0.6925798058509827, G Loss: [array(0.69351906, dtype=float32), array(0.69351906, dtype=float32)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 