In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pywt
import os
import csv 
from tqdm import tqdm
import neurokit2 as nk

from ArrhythmiaModel import *

In [2]:
def preprocessing_data(data):
    band_passed_ecg =  nk.signal_filter(data, sampling_rate=360, lowcut=0.1, highcut=100, method='butterworth_zi', order = 2)
    emg = moving_average(band_passed_ecg, window_size=10)
    # Step 4: Downsample the filtered ECG signal 
    downsampled_ecg = nk.signal_resample(emg, sampling_rate=360, desired_sampling_rate=100)
    return downsampled_ecg

In [4]:
record_ranges = [(100,109),(111, 119),(121,124),(200,203),(205,205),(207,210),(212,215),(217,217),(219,223),(228,228),(230,232)]
print("Working with data")
seq_len = 500
stride = 500
mask_length = 30
data = np.array(load_multiple_records(record_ranges, seq_len, stride))
test_data = load_multiple_records([(233,234)], seq_len, stride)

for num in [3]: 
    print("="*20)
    print(f"Training the model with {num} R peaks")
    print("="*20)
    
    print("Preparing the data")
    train_loader, val_loader = prepare_data(data,num_rpeaks = num, seq_len=seq_len, mask_len = mask_length)
    test_dataset = ECGDataset(test_data, seq_len, num_peaks_to_mask = num)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
    
    print("Training the model")
    criterion = nn.MSELoss()
    mae_model = MAE1D_Mask()
    optimizer = optim.Adam(mae_model.parameters(), lr=0.001)    

    model_trainer = Trainer(model = mae_model, 
                            criterion = criterion, optimizer = optimizer,  
                            seq_len=seq_len, masking_length = mask_length, num_rpeak=num, 
                            scale = 'Minmax_05',
                            test_case = False)
    model_trainer.run(train_loader, val_loader, test_loader, epochs = 100)

Working with data


  0%|                                                                                                                                               | 0/11 [00:00<?, ?it/s]


NameError: name 'preprocessing_data' is not defined

# Fine- truning

In [11]:
from torch.utils.data import DataLoader, TensorDataset
from wesad_processing import *
WIN_SIZE = 10
def load_wesad_dataset(root_dir, test_subject):
    folder_ls = os.listdir(root_dir)
    for i in folder_ls:
        if i == ".ipynb_checkpoints" or i == '.DS_Store':
            folder_ls.remove(i)
    
    valid_ls = test_subject
    # Create the train list by excluding test_ls
    train_ls = [subject for subject in folder_ls if subject not in valid_ls]
    print("==========Loading Training set============")
    X_train, y_train = load_process_extract_ls(root_dir, train_ls,700, WIN_SIZE,WIN_SIZE,True)
    print("==========Loading Testing set============")
    X_test, y_test = load_process_extract_ls(root_dir,valid_ls,700, WIN_SIZE, WIN_SIZE,False)
    return X_train, X_test, y_train, y_test
    
class ECGClassificationDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X  # shape: [N, L]
        self.Y = Y  # shape: [N]
    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        seq = self.X[idx].flatten()
        return torch.tensor(seq, dtype=torch.float32).unsqueeze(0), torch.tensor(self.Y[idx], dtype=torch.long)

In [17]:
class DownstreamClassifier(nn.Module):
    def __init__(self, encoder, num_classes=2):
        super(DownstreamClassifier, self).__init__()
        self.encoder = encoder

        # Freeze encoder parameters
        for param in self.encoder.parameters():
            param.requires_grad = False
        
        # Classifier head after GAP
        self.classifier = nn.Sequential(
            nn.Linear(256, 128),
            nn.LeakyReLU(negative_slope = 0.01),
            nn.Linear(128, num_classes)
        )

    def forward(self, x, mask=None):
        # If no mask given, create a zero-mask (no masking)
        if mask is None:
            mask = torch.zeros_like(x, dtype=torch.bool)

        x = x.squeeze(1)         # (B, L)
        mask = mask.squeeze(1)   # (B, L)

        # Pass through encoder
        z = self.encoder(x, mask, mask)  # -> (B, C, L)

        # Global Average Pooling over sequence dimension
        z = F.adaptive_avg_pool1d(z, 1)  # -> (B, C, 1)
        z = z.squeeze(-1)                # -> (B, C)

        logits = self.classifier(z)      # -> (B, num_classes)
        return logits


In [18]:
def evaluate(model, dataloader, criterion, device):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam([
        {"params": model.encoder.parameters(), "lr": 1e-8},     # smaller LR for encoder
        {"params": model.classifier.parameters(), "lr": 1e-3}   # larger LR for classifier
    ], weight_decay=1e-4)
    # --- Validation ---
    model.eval()
    
    all_val_preds = []
    all_val_labels = []

    with torch.no_grad():
        for x_val, y_val in dataloader:
            x_val, y_val = x_val.to(device), y_val.to(device)
            outputs = model(x_val)
            loss = criterion(outputs, y_val)
            
            preds = torch.argmax(outputs, dim=1)
            all_val_preds.extend(preds.cpu().numpy())
            all_val_labels.extend(y_val.cpu().numpy())
    
    prec_temp = precision_score(all_val_labels, all_val_preds)
    rec_temp = recall_score(all_val_labels, all_val_preds)
    f1_temp = f1_score(all_val_labels, all_val_preds,average='macro')
    acc_temp = accuracy_score(all_val_labels, all_val_preds)
    
    return prec_temp, rec_temp, f1_temp, acc_temp


In [19]:
def train_evaluate_downstream_classifier(
    model,
    train_loader,
    val_loader,
    test_loader,
    num_epochs=60,
    device = 'cuda'
):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam([
        {"params": model.encoder.parameters(), "lr": 1e-8},     # smaller LR for encoder
        {"params": model.classifier.parameters(), "lr": 1e-3}   # larger LR for classifier
    ], weight_decay=1e-4)
    model.to(device)
    val_losses = []

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        all_train_preds = []
        all_train_labels = []

        for x, y in train_loader:
            x, y = x.to(device), y.to(device)

            outputs = model(x)
            loss = criterion(outputs, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            preds = torch.argmax(outputs, dim=1)
            all_train_preds.extend(preds.cpu().numpy())
            all_train_labels.extend(y.cpu().numpy())

        train_accuracy = accuracy_score(all_train_labels, all_train_preds)
        train_f1_macro = f1_score(all_train_labels, all_train_preds, average='macro')

        # --- Validation ---
        model.eval()
        val_loss = 0.0
        all_val_preds = []
        all_val_labels = []

        with torch.no_grad():
            for x_val, y_val in val_loader:
                x_val, y_val = x_val.to(device), y_val.to(device)
                outputs = model(x_val)
                loss = criterion(outputs, y_val)
                val_loss += loss.item()

                preds = torch.argmax(outputs, dim=1)
                all_val_preds.extend(preds.cpu().numpy())
                all_val_labels.extend(y_val.cpu().numpy())

        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = accuracy_score(all_val_labels, all_val_preds)
        val_f1_macro = f1_score(all_val_labels, all_val_preds, average='macro')
        val_losses.append(avg_val_loss)

        print(f"Epoch {epoch}: "
              f"Train Loss = {total_loss/len(train_loader):.4f}, "
              f"Train Acc = {train_accuracy:.4f}, "
              f"Train F1 = {train_f1_macro:.4f} | "
              f"Val Loss = {avg_val_loss:.4f}, "
              f"Val Acc = {val_accuracy:.4f}, "
              f"Val F1 = {val_f1_macro:.4f}")
        
    prec, rec, f1, acc = evaluate(model, test_loader, criterion, device)
    return prec, rec, f1, acc

In [20]:

def loso_training(root_dir, sample_rate, test_size, filename, load_file):
    prec, rec, acc, f1 = [], [], [], []
    folder_ls = [f for f in os.listdir(root_dir) if f not in (".ipynb_checkpoints", ".DS_Store")]
    start_index = np.arange(0, len(folder_ls) - test_size + 1, test_size)

    # Load pretrained model once
    auto_model = MAE1D_Mask()
    auto_model.load_state_dict(torch.load(
        'runs/seq500_rpeak3_0213_03092025/model_500_3_0213_03092025.pth', 
        weights_only=True
    ))
    encoder = auto_model.encoder

    for start in start_index:
        subj = folder_ls[start:start + test_size]
        print(f"***** Loop {start}: {subj} *****")

        # Load dataset
        X_train, X_test, y_train, y_test = load_wesad_dataset(root_dir, subj)
        train_dataset = ECGClassificationDataset(X_train, y_train)
        test_dataset = ECGClassificationDataset(X_test, y_test)

        # Split train/val
        train_size = int(0.8 * len(train_dataset))
        val_size = len(train_dataset) - train_size
        train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

        loaders = {
            'train': DataLoader(train_dataset, batch_size=128, shuffle=True),
            'val': DataLoader(val_dataset, batch_size=128, shuffle=False),
            'test': DataLoader(test_dataset, batch_size=128, shuffle=False)
        }

        # Train and evaluate downstream classifier
        model = DownstreamClassifier(encoder)
        prec_, rec_, f1_, acc_ = train_evaluate_downstream_classifier(
            model, loaders['train'], loaders['val'], loaders['test'], num_epochs=100, device='cuda'
        )
        prec.append(prec_); rec.append(rec_); f1.append(f1_); acc.append(acc_)
        print([start, acc_, f1_, rec_, prec_])

        # Save iteration results
        with open(filename, 'a', newline='') as f:
            csv.writer(f).writerow([start, acc_, f1_, rec_, prec_, subj])

    # Save mean results
    with open(filename, 'a', newline='') as f:
        csv.writer(f).writerow([np.mean(acc), np.mean(f1), np.mean(prec), np.mean(rec)])

    print(f"Accuracy: {np.mean(acc)}, F1: {np.mean(f1)}, Precision: {np.mean(prec)}, Recall: {np.mean(rec)}")
    return acc, f1, rec, prec


In [21]:
accuracy_ls, f1_score_ls, recall_score_ls, precision_score_ls = loso_training("/home/van/NamQuang/Dataset/WESAD_LOSO", 700,1,'test_loso.csv',  False)

***** Loop 0: ['S16'] *****
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S9/S9.pkl
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S13/S13.pkl
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S8/S8.pkl
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S4/S4.pkl
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S11/S11.pkl
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S15/S15.pkl
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S3/S3.pkl
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S2/S2.pkl
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S6/S6.pkl
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S7/S7.pkl
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S14/S14.pkl
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S10/S10.pkl
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S5/S5.pkl
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S17/S17.pkl
(4123, 1000, 1)
Working with: /home/van/NamQuang/Dataset/WESAD_LOSO/S16/S16.pkl
Epoch 0: Tra

KeyboardInterrupt: 