In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import wfdb
import matplotlib.pyplot as plt

# Define the ECGDataset class
class ECGDataset(Dataset):
    def __init__(self, raw_signals, noisy_signals):
        self.raw_signals = raw_signals
        self.noisy_signals = noisy_signals

    def __len__(self):
        return len(self.raw_signals)

    def __getitem__(self, idx):
        raw_signal = self.raw_signals[idx]
        noisy_signal = self.noisy_signals[idx]
        return torch.tensor(raw_signal, dtype=torch.float32), torch.tensor(noisy_signal, dtype=torch.float32)

# Define the Improved DAE model
class ImprovedDAE(nn.Module):
    def __init__(self):
        super(ImprovedDAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=3, stride=2, padding=1),
            nn.ReLU(True),
            nn.Conv1d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(True),
            nn.Conv1d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(True)
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(True),
            nn.ConvTranspose1d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(True),
            nn.ConvTranspose1d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# Define the function to add noise to the signal
def add_noise(signal, noise, snr):
    signal_power = np.mean(signal ** 2)
    noise_power = np.mean(noise ** 2)
    factor = (signal_power / noise_power) / (10 ** (snr / 10))
    noisy_signal = signal + noise * np.sqrt(factor)
    return noisy_signal

# Define the function to load MIT-BIH data
def load_mit_bih_data(records, noise_type, snr_levels, target_length=650000):
    raw_signals = []
    noisy_signals_dict = {snr: [] for snr in snr_levels}
    
    for record in records:
        raw_record = wfdb.rdrecord(f'mit-bih-arrhythmia-database-1.0.0/{record}')
        raw_signal = raw_record.p_signal[:, 0]  # Use the first channel for simplicity
        
        # Load noise and add it to the raw signal
        noise_record = wfdb.rdrecord(f'mit-bih-noise-stress-test-database-1.0.0/{noise_type}')
        noise_signal = noise_record.p_signal[:, 0]
        
        # Ensure the signals are of the same length
        min_length = min(len(raw_signal), len(noise_signal), target_length)
        raw_signal = raw_signal[:min_length]
        noise_signal = noise_signal[:min_length]
        
        # Pad signals to target length
        if min_length < target_length:
            raw_signal = np.pad(raw_signal, (0, target_length - min_length), 'constant')
            noise_signal = np.pad(noise_signal, (0, target_length - min_length), 'constant')
        
        raw_signals.append(raw_signal)
        
        for snr in snr_levels:
            noisy_signal = add_noise(raw_signal, noise_signal, snr)
            noisy_signals_dict[snr].append(noisy_signal)
    
    return np.array(raw_signals), {snr: np.array(noisy_signals_dict[snr]) for snr in snr_levels}

# Load the data
records = ['103', '105', '111', '116', '122', '205', '213', '219', '223', '230']
noise_types = ['bw', 'em', 'ma']
snr_levels = [0, 1, 2, 3, 4, 5]
target_length = 649984

raw_signals, noisy_signals_dict = {}, {}
for noise_type in noise_types:
    raw_signals[noise_type], noisy_signals_dict[noise_type] = load_mit_bih_data(records, noise_type, snr_levels, target_length)

# Create datasets and dataloaders
datasets = {}
for noise_type in noise_types:
    for snr in snr_levels:
        datasets[(noise_type, snr)] = ECGDataset(raw_signals[noise_type], noisy_signals_dict[noise_type][snr])

dataloaders = {key: DataLoader(dataset, batch_size=16, shuffle=True) for key, dataset in datasets.items()}

# Define the training function
def train_dae(model, dataloaders, num_epochs=10, lr=0.001):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    for (noise_type, snr), dataloader in dataloaders.items():
        for epoch in range(num_epochs):
            for raw_signals, noisy_signals in dataloader:
                raw_signals = raw_signals.unsqueeze(1)  # Add channel dimension
                noisy_signals = noisy_signals.unsqueeze(1)
                
                outputs = model(noisy_signals)
                loss = criterion(outputs, raw_signals)
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
            print(f'[{noise_type} SNR {snr}] Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')
    
    return model

# Initialize and train the model
dae_model = ImprovedDAE()
trained_model = train_dae(dae_model, dataloaders, num_epochs=10)

# Test the trained model on a sample
sample_noisy_signal = noisy_signals_dict['bw'][3][0]  # Example with 'bw' noise and SNR 3
sample_noisy_signal = torch.tensor(sample_noisy_signal, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
denoised_signal = trained_model(sample_noisy_signal).detach().numpy().squeeze()

# Plot the original, noisy, and denoised signals
plt.figure(figsize=(15, 5))
plt.plot(raw_signals['bw'][0], label='Original Signal')
plt.plot(sample_noisy_signal.squeeze(), label='Noisy Signal')
plt.plot(denoised_signal, label='Denoised Signal')
plt.legend()
plt.show()


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
import wfdb
import os
import matplotlib.pyplot as plt

# Define the ECGDataset class
class ECGDataset(Dataset):
    def __init__(self, raw_signals, noisy_signals):
        self.raw_signals = raw_signals
        self.noisy_signals = noisy_signals

    def __len__(self):
        return len(self.raw_signals)

    def __getitem__(self, idx):
        raw_signal = self.raw_signals[idx]
        noisy_signal = self.noisy_signals[idx]
        return torch.tensor(raw_signal, dtype=torch.float32), torch.tensor(noisy_signal, dtype=torch.float32)

# Define the Improved DAE model
class ImprovedDAE(nn.Module):
    def __init__(self):
        super(ImprovedDAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=3, stride=2, padding=1),
            nn.ReLU(True),
            nn.Conv1d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(True),
            nn.Conv1d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(True)
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(True),
            nn.ConvTranspose1d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(True),
            nn.ConvTranspose1d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# Define the function to add noise to the signal
def add_noise(signal, noise, snr):
    signal_power = np.mean(signal ** 2)
    noise_power = np.mean(noise ** 2)
    factor = (signal_power / noise_power) / (10 ** (snr / 10))
    noisy_signal = signal + noise * np.sqrt(factor)
    return noisy_signal

# Define the function to load MIT-BIH data
def load_mit_bih_data(records, noise_type, snr_levels, sample_length=101):
    raw_signals = []
    noisy_signals_dict = {snr: [] for snr in snr_levels}
    
    for record in records:
        raw_record = wfdb.rdrecord(os.path.join('mit-bih-arrhythmia-database-1.0.0', record))
        raw_signal = raw_record.p_signal[:, 0]  # Use the first channel for simplicity
        
        # Load noise and add it to the raw signal
        noise_record = wfdb.rdrecord(os.path.join('mit-bih-noise-stress-test-database-1.0.0', noise_type))
        noise_signal = noise_record.p_signal[:, 0]
        
        # Ensure the signals are of the same length
        min_length = min(len(raw_signal), len(noise_signal))
        raw_signal = raw_signal[:min_length]
        noise_signal = noise_signal[:min_length]
        
        for start_idx in range(0, min_length - sample_length + 1, sample_length):
            raw_sample = raw_signal[start_idx:start_idx + sample_length]
            noise_sample = noise_signal[start_idx:start_idx + sample_length]
            raw_signals.append(raw_sample)
            
            for snr in snr_levels:
                noisy_sample = add_noise(raw_sample, noise_sample, snr)
                noisy_signals_dict[snr].append(noisy_sample)
    
    return np.array(raw_signals), {snr: np.array(noisy_signals_dict[snr]) for snr in snr_levels}

# Load the data
records = ['103', '105', '111', '116', '122', '205', '213', '219', '223', '230']
noise_types = ['bw', 'em', 'ma']
snr_levels = [0, 1, 2, 3, 4, 5]
sample_length = 101

raw_signals, noisy_signals_dict = {}, {}
for noise_type in noise_types:
    raw_signals[noise_type], noisy_signals_dict[noise_type] = load_mit_bih_data(records, noise_type, snr_levels, sample_length)

# Create datasets and dataloaders
datasets = {}
for noise_type in noise_types:
    for snr in snr_levels:
        dataset = ECGDataset(raw_signals[noise_type], noisy_signals_dict[noise_type][snr])
        train_size = 30000
        test_size = 2000
        train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
        datasets[(noise_type, snr, 'train')] = train_dataset
        datasets[(noise_type, snr, 'test')] = test_dataset

dataloaders = {key: DataLoader(dataset, batch_size=64, shuffle=True) for key, dataset in datasets.items()}

# Define the training function
def train_dae(model, train_loaders, test_loaders, num_epochs=10, lr=0.001):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    for (noise_type, snr, phase), dataloader in dataloaders.items():
        if phase == 'train':
            for epoch in range(num_epochs):
                model.train()
                for raw_signals, noisy_signals in dataloader:
                    raw_signals = raw_signals.unsqueeze(1)  # Add channel dimension
                    noisy_signals = noisy_signals.unsqueeze(1)

                    outputs = model(noisy_signals)
                    loss = criterion(outputs, raw_signals)

                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                
                print(f'[{noise_type} SNR {snr}] Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')
    
    return model

# Initialize and train the model
dae_model = ImprovedDAE()
train_loaders = {key: loader for key, loader in dataloaders.items() if key[2] == 'train'}
test_loaders = {key: loader for key, loader in dataloaders.items() if key[2] == 'test'}
trained_model = train_dae(dae_model, train_loaders, test_loaders, num_epochs=10)

# Test the trained model on a sample
def test_dae(model, test_loader):
    model.eval()
    with torch.no_grad():
        for raw_signals, noisy_signals in test_loader:
            raw_signals = raw_signals.unsqueeze(1)
            noisy_signals = noisy_signals.unsqueeze(1)
            outputs = model(noisy_signals)
            raw_signals = raw_signals.squeeze().numpy()
            noisy_signals = noisy_signals.squeeze().numpy()
            outputs = outputs.squeeze().numpy()
            return raw_signals[0], noisy_signals[0], outputs[0]

# Test the model on a specific noise type and SNR level
raw_sample, noisy_sample, denoised_sample = test_dae(trained_model, test_loaders[('bw', 3, 'test')])

# Plot the original, noisy, and denoised signals
plt.figure(figsize=(15, 5))
plt.plot(raw_sample, label='Original Signal')
plt.plot(noisy_sample, label='Noisy Signal')
plt.plot(denoised_sample, label='Denoised Signal')
plt.legend()
plt.show()
