<a href="https://colab.research.google.com/github/ma23193/Dissertation/blob/main/diser6(update).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [16]:
import os

# Function to list available records in a directory
def list_records(directory):
    records = [f.split('.')[0] for f in os.listdir(directory) if f.endswith('.dat')]
    return sorted(set(records))

# Paths to the folders containing the extracted datasets
arrhythmia_folder_path = '/content/drive/MyDrive/Dissertation/mit-bih-arrhythmia-database-1.0.0'
noise_folder_path = '/content/drive/MyDrive/Dissertation/mit-bih-noise-stress-test-database-1.0.0'

arrhythmia_records = list_records(arrhythmia_folder_path)
noise_records = list_records(noise_folder_path)

print(f"Available arrhythmia records: {arrhythmia_records}")
print(f"Available noise records: {noise_records}")


Available arrhythmia records: ['100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '111', '112', '113', '114', '115', '116', '117', '118', '119', '121', '122', '123', '124', '200', '201', '202', '203', '205', '207', '208', '209', '210', '212', '213', '214', '215', '217', '219', '220', '221', '222', '223', '228', '230', '231', '232', '233', '234']
Available noise records: ['118e00', '118e06', '118e12', '118e18', '118e24', '118e_6', '119e00', '119e06', '119e12', '119e18', '119e24', '119e_6', 'bw', 'em', 'ma']


In [23]:
import os
import numpy as np
import wfdb
import torch
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import MinMaxScaler

# Load ECG data
def load_ecg_data(record_path):
    record = wfdb.rdrecord(record_path)
    signal = record.p_signal[:, 0]  # Use only the first channel
    return signal

# Preprocess data
def preprocess_data(arrhythmia_folder, noise_folder, train_records, test_records):
    clean_signals = []
    noise_signals = []

    # Load arrhythmia records
    for record in train_records + test_records:
        record_path = os.path.join(arrhythmia_folder, record)
        if os.path.exists(record_path + '.dat'):
            signal = load_ecg_data(record_path)
            clean_signals.append(signal)

    # Load noise records
    for record in noise_records:
        record_path = os.path.join(noise_folder, record)
        if os.path.exists(record_path + '.dat'):
            signal = load_ecg_data(record_path)
            noise_signals.append(signal)

    if clean_signals:
        clean_signals = np.concatenate(clean_signals, axis=0)
    else:
        clean_signals = np.empty((0,))

    if noise_signals:
        noise_signals = np.concatenate(noise_signals, axis=0)
    else:
        noise_signals = np.empty((0,))

    min_length = min(len(clean_signals), len(noise_signals))
    clean_signals = clean_signals[:min_length]
    noise_signals = noise_signals[:min_length]

    if clean_signals.size > 0 and noise_signals.size > 0:
        # Create noisy signals by adding noise to clean signals
        noisy_signals = clean_signals + noise_signals
        # Normalize signals
        scaler = MinMaxScaler()
        clean_signals = scaler.fit_transform(clean_signals.reshape(-1, 1)).flatten()
        noisy_signals = scaler.transform(noisy_signals.reshape(-1, 1)).flatten()
    else:
        noisy_signals = np.empty((0,))

    return clean_signals, noisy_signals

# Define the records to use for training and testing
train_records = ['100', '101', '102', '103', '104', '105', '106', '107', '108', '109']
test_records = ['111', '112', '113', '114', '115', '116', '117', '118', '119', '121']

# Filter out unavailable records
train_records = [rec for rec in train_records if rec in arrhythmia_records]
test_records = [rec for rec in test_records if rec in arrhythmia_records]

# Paths to the folders containing the extracted datasets
arrhythmia_folder_path = '/content/drive/MyDrive/Dissertation/mit-bih-arrhythmia-database-1.0.0'
noise_folder_path = '/content/drive/MyDrive/Dissertation/mit-bih-noise-stress-test-database-1.0.0'

clean_data, noisy_data = preprocess_data(arrhythmia_folder_path, noise_folder_path, train_records, test_records)

print(f"Clean data shape: {clean_data.shape}")
print(f"Noisy data shape: {noisy_data.shape}")

class ECGDataset(Dataset):
    def __init__(self, clean_data, noisy_data, segment_length=2560):
        self.clean_data = clean_data
        self.noisy_data = noisy_data
        self.segment_length = segment_length

    def __len__(self):
        return len(self.clean_data) // self.segment_length

    def __getitem__(self, idx):
        start_idx = idx * self.segment_length
        end_idx = start_idx + self.segment_length
        clean_sample = self.clean_data[start_idx:end_idx]
        noisy_sample = self.noisy_data[start_idx:end_idx]
        return torch.Tensor(noisy_sample).unsqueeze(0), torch.Tensor(clean_sample).unsqueeze(0)

segment_length = 2560  # As specified in the paper
train_size = int(0.8 * len(clean_data))
test_size = len(clean_data) - train_size

train_clean = clean_data[:train_size]
test_clean = clean_data[train_size:]
train_noisy = noisy_data[:train_size]
test_noisy = noisy_data[train_size:]

train_dataset = ECGDataset(train_clean, train_noisy, segment_length=segment_length)
test_dataset = ECGDataset(test_clean, test_noisy, segment_length=segment_length)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"Train loader created with {len(train_loader.dataset)} samples.")
print(f"Test loader created with {len(test_loader.dataset)} samples.")


Clean data shape: (9750000,)
Noisy data shape: (9750000,)
Train loader created with 3046 samples.
Test loader created with 761 samples.


In [24]:
import torch
import torch.nn as nn

class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 512, kernel_size=4, stride=2, padding=1),  # (N, 512, 1280)
            nn.PReLU(),
            nn.Conv1d(512, 512, kernel_size=4, stride=2, padding=1),  # (N, 512, 640)
            nn.PReLU(),
            nn.Conv1d(512, 512, kernel_size=4, stride=2, padding=1),  # (N, 512, 320)
            nn.PReLU(),
            nn.Conv1d(512, 512, kernel_size=4, stride=2, padding=1),  # (N, 512, 160)
            nn.PReLU(),
            nn.Conv1d(512, 256, kernel_size=4, stride=2, padding=1),  # (N, 256, 80)
            nn.PReLU(),
            nn.Conv1d(256, 128, kernel_size=4, stride=2, padding=1),  # (N, 128, 40)
            nn.PReLU(),
            nn.Conv1d(128, 64, kernel_size=4, stride=2, padding=1),  # (N, 64, 20)
            nn.PReLU()
        )

        # Decoder
        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(64, 128, kernel_size=4, stride=2, padding=1),  # (N, 128, 40)
            nn.PReLU(),
            nn.ConvTranspose1d(128, 256, kernel_size=4, stride=2, padding=1),  # (N, 256, 80)
            nn.PReLU(),
            nn.ConvTranspose1d(256, 512, kernel_size=4, stride=2, padding=1),  # (N, 512, 160)
            nn.PReLU(),
            nn.ConvTranspose1d(512, 512, kernel_size=4, stride=2, padding=1),  # (N, 512, 320)
            nn.PReLU(),
            nn.ConvTranspose1d(512, 512, kernel_size=4, stride=2, padding=1),  # (N, 512, 640)
            nn.PReLU(),
            nn.ConvTranspose1d(512, 512, kernel_size=4, stride=2, padding=1),  # (N, 512, 1280)
            nn.PReLU(),
            nn.ConvTranspose1d(512, 1, kernel_size=4, stride=2, padding=1)  # (N, 1, 2560)
        )

    def forward(self, x):
        encoded = []
        for layer in self.encoder:
            x = layer(x)
            encoded.append(x.clone())  # Save the output for skip connections

        # Decoder with skip connections
        for i, layer in enumerate(self.decoder):
            if isinstance(layer, nn.ConvTranspose1d):
                x = layer(x)
                if i < len(self.decoder) - 1:
                    x = nn.functional.prelu(x)
                    x += encoded[-(i + 1)]  # Skip connection

        print(f"Generator output shape: {x.shape}")
        return x


In [27]:
import torch
import torch.nn as nn

class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        # Encoder layers
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 512, kernel_size=4, stride=2, padding=1),  # (N, 512, 1280)
            nn.PReLU(),
            nn.Conv1d(512, 256, kernel_size=4, stride=2, padding=1),  # (N, 256, 640)
            nn.PReLU(),
            nn.Conv1d(256, 128, kernel_size=4, stride=2, padding=1),  # (N, 128, 320)
            nn.PReLU(),
            nn.Conv1d(128, 64, kernel_size=4, stride=2, padding=1),  # (N, 64, 160)
            nn.PReLU(),
            nn.Conv1d(64, 32, kernel_size=4, stride=2, padding=1),  # (N, 32, 80)
            nn.PReLU(),
            nn.Conv1d(32, 16, kernel_size=4, stride=2, padding=1),  # (N, 16, 40)
            nn.PReLU(),
            nn.Conv1d(16, 1, kernel_size=4, stride=2, padding=1)  # (N, 1, 20)
        )

        # Decoder layers (mirrored from encoder)
        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(1, 16, kernel_size=4, stride=2, padding=1),  # (N, 16, 40)
            nn.PReLU(),
            nn.ConvTranspose1d(16, 32, kernel_size=4, stride=2, padding=1),  # (N, 32, 80)
            nn.PReLU(),
            nn.ConvTranspose1d(32, 64, kernel_size=4, stride=2, padding=1),  # (N, 64, 160)
            nn.PReLU(),
            nn.ConvTranspose1d(64, 128, kernel_size=4, stride=2, padding=1),  # (N, 128, 320)
            nn.PReLU(),
            nn.ConvTranspose1d(128, 256, kernel_size=4, stride=2, padding=1),  # (N, 256, 640)
            nn.PReLU(),
            nn.ConvTranspose1d(256, 512, kernel_size=4, stride=2, padding=1),  # (N, 512, 1280)
            nn.PReLU(),
            nn.ConvTranspose1d(512, 1, kernel_size=4, stride=2, padding=1)  # (N, 1, 2560)
        )

    def forward(self, x):
        encoded = []
        # Encoder pass
        for layer in self.encoder:
            x = layer(x)
            encoded.append(x)  # Store output for skip connections

        # Decoder pass with skip connections
        for i, layer in enumerate(self.decoder):
            x = layer(x)
            if i < len(self.decoder) - 1:  # Skip the last layer
                x = nn.PReLU()(x)  # Apply PReLU activation
                # Perform skip connection only if shapes match
                if encoded[-(i + 1)].shape[2] == x.shape[2]:
                    x += encoded[-(i + 1)]  # Skip connection

        return x


In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Example dataset and dataloader (replace with your actual dataset)
train_dataset = TensorDataset(torch.randn(100, 1, 2560), torch.randn(100, 1, 2560))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Initialize models and optimizers
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
generator = Generator().to(device)
discriminator = Discriminator().to(device)
criterion = nn.BCELoss()
optimizer_G = optim.Adam(generator.parameters(), lr=0.0002)
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0002)

num_epochs = 10

# Training loop
for epoch in range(num_epochs):
    for i, (noisy, clean) in enumerate(train_loader):
        noisy, clean = noisy.to(device), clean.to(device)

        ############################
        # Train Discriminator
        ############################
        optimizer_D.zero_grad()

        # Real batch
        real_labels = torch.ones(clean.size(0), 1).to(device)
        outputs_real = discriminator(torch.cat((noisy, clean), dim=1))
        d_loss_real = criterion(outputs_real, real_labels)

        # Fake batch
        fake_clean = generator(noisy)
        fake_inputs = torch.cat((noisy, fake_clean), dim=1)
        fake_labels = torch.zeros(noisy.size(0), 1).to(device)
        outputs_fake = discriminator(fake_inputs.detach())
        d_loss_fake = criterion(outputs_fake, fake_labels)

        d_loss = d_loss_real + d_loss_fake
        d_loss.backward()
        optimizer_D.step()

        ############################
        # Train Generator
        ############################
        optimizer_G.zero_grad()

        outputs_gen = discriminator(torch.cat((noisy, generator(noisy)), dim=1))
        g_loss = criterion(outputs_gen, real_labels)

        g_loss.backward()
        optimizer_G.step()

        ############################
        # Print training progress
        ############################
        if (i+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], '
                  f'D Loss: {d_loss.item():.4f}, G Loss: {g_loss.item():.4f}')


Shape after conv layers: torch.Size([32, 81920])
Shape after linear layer: torch.Size([32, 1])
Shape after conv layers: torch.Size([32, 81920])
Shape after linear layer: torch.Size([32, 1])
Shape after conv layers: torch.Size([32, 81920])
Shape after linear layer: torch.Size([32, 1])
Shape after conv layers: torch.Size([32, 81920])
Shape after linear layer: torch.Size([32, 1])
Shape after conv layers: torch.Size([32, 81920])
Shape after linear layer: torch.Size([32, 1])
Shape after conv layers: torch.Size([32, 81920])
Shape after linear layer: torch.Size([32, 1])
Shape after conv layers: torch.Size([32, 81920])
Shape after linear layer: torch.Size([32, 1])
Shape after conv layers: torch.Size([32, 81920])
Shape after linear layer: torch.Size([32, 1])
Shape after conv layers: torch.Size([32, 81920])
Shape after linear layer: torch.Size([32, 1])
Shape after conv layers: torch.Size([4, 81920])
Shape after linear layer: torch.Size([4, 1])
Shape after conv layers: torch.Size([4, 81920])
Shap

In [29]:
def add_noise(signal, noise_type, snr_db):
    """
    Add noise of a specific type to the signal at a given SNR level.

    Args:
        signal (numpy array): Clean ECG signal.
        noise_type (str): Type of noise ('BW', 'EM', 'MA', 'EM+BW', 'MA+BW', 'MA+EM', 'MA+EM+BW').
        snr_db (float): Desired signal-to-noise ratio in dB.

    Returns:
        noisy_signal (numpy array): Noisy ECG signal.
    """
    noise = np.zeros_like(signal)
    if 'BW' in noise_type:
        noise += np.random.normal(0, 1, signal.shape)
    if 'EM' in noise_type:
        noise += np.random.normal(0, 1, signal.shape) * np.random.exponential(1, signal.shape)
    if 'MA' in noise_type:
        noise += np.convolve(np.random.normal(0, 1, signal.size), np.ones(10)/10, mode='same')

    signal_power = np.mean(signal**2)
    noise_power = np.mean(noise**2)
    noise_variance = signal_power / (10**(snr_db / 10))
    noise = noise * np.sqrt(noise_variance / noise_power)

    noisy_signal = signal + noise
    return noisy_signal


In [None]:
import numpy as np
import torch
import pandas as pd

def add_noise(signal, noise_type, snr_db):
    noise = np.zeros_like(signal)
    if 'BW' in noise_type:
        noise += np.random.normal(0, 1, signal.shape)
    if 'EM' in noise_type:
        noise += np.random.normal(0, 1, signal.shape) * np.random.exponential(1, signal.shape)
    if 'MA' in noise_type:
        noise += np.convolve(np.random.normal(0, 1, signal.size), np.ones(10)/10, mode='same')

    signal_power = np.mean(signal**2)
    noise_power = np.mean(noise**2)
    noise_variance = signal_power / (10**(snr_db / 10))
    noise = noise * np.sqrt(noise_variance / noise_power)

    noisy_signal = signal + noise
    return noisy_signal

def calculate_snr(clean, denoised):
    noise = clean - denoised
    signal_power = np.mean(clean**2)
    noise_power = np.mean(noise**2)
    snr = 10 * np.log10(signal_power / noise_power)
    return snr

def evaluate_model(generator, test_loader, noise_types, snr_levels):
    generator.eval()
    results = {noise_type: {snr: {"snr": [], "rmse": []} for snr in snr_levels} for noise_type in noise_types}

    with torch.no_grad():
        for noisy, clean in test_loader:
            clean = clean.numpy()
            for noise_type in noise_types:
                for snr_db in snr_levels:
                    noisy_signals = np.array([add_noise(c.flatten(), noise_type, snr_db) for c in clean])
                    noisy_tensor = torch.Tensor(noisy_signals).unsqueeze(1).to(device)
                    clean_tensor = torch.Tensor(clean).to(device)

                    denoised = generator(noisy_tensor).cpu().numpy().squeeze()

                    snr = calculate_snr(clean, denoised)
                    rmse = np.sqrt(np.mean((clean - denoised) ** 2))

                    results[noise_type][snr_db]["snr"].append(snr)
                    results[noise_type][snr_db]["rmse"].append(rmse)

    avg_results = {noise_type: {snr: {"snr": np.mean(results[noise_type][snr]["snr"]),
                                      "rmse": np.mean(results[noise_type][snr]["rmse"])}
                                for snr in snr_levels}
                   for noise_type in noise_types}

    table_data = []
    for snr_db in snr_levels:
        row = []
        for noise_type in noise_types:
            row.append(avg_results[noise_type][snr_db]['snr'])
            row.append(avg_results[noise_type][snr_db]['rmse'])
        table_data.append(row)

    columns = []
    for noise_type in noise_types:
        columns.append(f"{noise_type}_SNR")
        columns.append(f"{noise_type}_RMSE")

    df = pd.DataFrame(table_data, columns=columns, index=[f"{snr}dB" for snr in snr_levels])

    print(df)
    return df

# Define the noise types and SNR levels to evaluate
noise_types = ['BW', 'EM', 'MA', 'EM+BW', 'MA+BW', 'MA+EM', 'MA+EM+BW']
snr_levels = [0, 1, 2, 3, 4, 5]

# Evaluate the model and get the results in table form
avg_results_df = evaluate_model(generator, test_loader, noise_types, snr_levels)
