In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os

os.chdir('/content/drive/MyDrive/HRP')

Mounted at /content/drive


In [None]:
import pandas as pd
from tqdm import tqdm

csv_file = "ecgredone_data.csv"


total_lines = sum(1 for _ in open(csv_file)) - 1


chunks = []
chunksize = 1000
with tqdm(desc="Reading CSV in chunks", total=total_lines, unit="lines") as pbar:
    for chunk in pd.read_csv(csv_file, chunksize=chunksize):
        chunks.append(chunk)
        pbar.update(len(chunk))



Reading CSV in chunks: 100%|██████████| 812700/812700 [02:01<00:00, 6700.36lines/s]


In [None]:
df_main = pd.concat(chunks, ignore_index=True)

In [None]:
df_main.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 812700 entries, 0 to 812699
Data columns (total 4 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   Name    812700 non-null  object
 1   Key     812700 non-null  object
 2   Specs   541800 non-null  object
 3   Vals    812645 non-null  object
dtypes: object(4)
memory usage: 24.8+ MB


In [None]:
def get_dataframe(disease_code):
  d_ids = set(df_main[df_main['Vals'] == disease_code]['Name'])
  print(f'There are {len(d_ids)} patients in the dataset')
  df_d = df_main[df_main['Name'].isin(d_ids)]
  return df_d

In [None]:
df_sinus = get_dataframe('426783006')
df_sinus_a = get_dataframe('164889003')

There are 5908 patients in the dataset
There are 422 patients in the dataset


In [None]:
from tqdm import tqdm
import numpy as np

def create_formatted(df, disease=0, lead_target=0):
    """
    Preprocess the DataFrame to extract ECG leads and labels, returning PyTorch tensors.

    Parameters:
    - df: Pandas DataFrame containing ECG data.
    - disease: Integer label for the disease (e.g., 0 for Normal, 1 for Arrhythmia).
    - lead_target: Integer index of the target lead to use (e.g., 0 for Lead I).
                   If None, use all 12 leads.

    Returns:
    - examples: List of tuples (leads_ref, disease)
                where leads_ref is a PyTorch tensor of shape (num_leads, lead_length)
    """
    examples = []
    num_leads = 12
    lead_length = 5000  # Adjust if your signals have a different length

    for start_idx in tqdm(range(0, len(df), 18), desc=f"Processing {disease} samples"):
        try:
            leads_ref = []

            for lead_idx in range(num_leads):
                # If a specific lead is targeted, skip others
                if lead_target is not None and lead_idx != lead_target:
                    continue

                # Ensure we don't go out of bounds
                if start_idx + lead_idx >= len(df):
                    raise IndexError(f"Missing data for lead {lead_idx} at index {start_idx}. Skipping sample.")

                # Parse the signal
                row = df.iloc[start_idx + lead_idx]
                vals_str = row['Vals']
                lead_signal = np.array([float(x.strip()) for x in vals_str.strip('[]').split(',')])

                # Ensure the lead signal is of the correct length
                if lead_signal.shape[0] != lead_length:
                    if lead_signal.shape[0] < lead_length:
                        padding = lead_length - lead_signal.shape[0]
                        lead_signal = np.pad(lead_signal, (0, padding), 'constant')
                    else:
                        lead_signal = lead_signal[:lead_length]

                leads_ref.append(lead_signal)

            # Convert to a PyTorch tensor
            if leads_ref:  # Ensure leads_ref is not empty
                leads_ref = torch.tensor(np.array(leads_ref), dtype=torch.float32)  # Shape: (num_selected_leads, lead_length)
                label = torch.tensor(disease, dtype=torch.float32)  # Convert disease label to tensor
                examples.append((leads_ref, label))
            else:
                print(f"Warning: No leads extracted for sample starting at index {start_idx}. Skipping.")

        except IndexError as e:
            print(f"Warning: Incomplete sample at index {start_idx}. Skipping.")
            continue
        except Exception as e:
            print(f"Error processing sample at index {start_idx}: {e}")
            continue

    return examples


In [None]:
import torch
import torch.nn as nn

class FeatureNN(nn.Module):
    def __init__(self, input_size):
        super(FeatureNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)  # Binary classification
        self.dropout = nn.Dropout(0.3)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [None]:
!pip install biosppy
!pip install scipy

Collecting biosppy
  Downloading biosppy-2.2.2-py2.py3-none-any.whl.metadata (5.7 kB)
Collecting bidict (from biosppy)
  Downloading bidict-0.23.1-py3-none-any.whl.metadata (8.7 kB)
Collecting shortuuid (from biosppy)
  Downloading shortuuid-1.0.13-py3-none-any.whl.metadata (5.8 kB)
Collecting pywavelets (from biosppy)
  Downloading pywavelets-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting mock (from biosppy)
  Downloading mock-5.1.0-py3-none-any.whl.metadata (3.0 kB)
Downloading biosppy-2.2.2-py2.py3-none-any.whl (149 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.4/149.4 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bidict-0.23.1-py3-none-any.whl (32 kB)
Downloading mock-5.1.0-py3-none-any.whl (30 kB)
Downloading pywavelets-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m62.2 MB/s[0m eta [

In [None]:
import numpy as np
from scipy.signal import find_peaks, welch
from scipy.stats import skew, kurtosis

from biosppy.signals import ecg
import numpy as np

def extract_features(signal, fs=500):
    """
    Extract ECG-specific features from a single signal.
    """
    try:
        # Process the ECG signal
        out = ecg.ecg(signal=signal, sampling_rate=fs, show=False)
        r_peaks = out['rpeaks']

        # Calculate features
        rr_intervals = np.diff(r_peaks) / fs  # R-R intervals
        heart_rate = 60 / np.mean(rr_intervals) if len(rr_intervals) > 0 else 0.0

        energy = np.sum(signal**2)

        st_deviation = []
        for r_peak in r_peaks:
            st_idx = int(r_peak + 0.08 * fs)
            if st_idx < len(signal):
                st_deviation.append(signal[st_idx])
        st_deviation_mean = np.mean(st_deviation) if st_deviation else 0.0

        qrs_duration = np.mean(rr_intervals) if len(rr_intervals) > 0 else 0.0
        t_wave_amplitude = np.max(signal) - np.min(signal)

        # Combine features into an array
        features = [
            heart_rate,
            np.mean(rr_intervals) if len(rr_intervals) > 0 else 0.0,
            energy,
            st_deviation_mean,
            qrs_duration,
            t_wave_amplitude,
        ]

        # Debug feature dimensions
        for i, feature in enumerate(features):
            if isinstance(feature, np.ndarray):
                print(f"Feature {i} is not scalar. Shape: {feature.shape}")
            else:
                print(f"Feature {i} is scalar: {feature}")

        return np.array(features)

    except Exception as e:
        print(f"Error extracting features: {e}")
        return np.zeros(6)  # Return zeros on failure


In [None]:
def test_(model, test_data):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_data:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            predictions = (torch.sigmoid(outputs) > 0.5).float()
            correct += (predictions == labels.unsqueeze(1)).sum().item()
            total += labels.size(0)
    accuracy = correct / total
    print(f"Test Accuracy: {accuracy * 100:.2f}%")
    return accuracy


In [None]:
formatted_data = create_formatted(df_sinus[:18*5], disease=0, lead_target=0)
for i, (features, label) in enumerate(formatted_data[:5]):
    print(f"Sample {i}: Feature shape: {features.shape}, Label shape: {label.shape}")


Processing 0 samples: 100%|██████████| 5/5 [00:00<00:00, 633.16it/s]

Sample 0: Feature shape: torch.Size([1, 5000]), Label shape: torch.Size([])
Sample 1: Feature shape: torch.Size([1, 5000]), Label shape: torch.Size([])
Sample 2: Feature shape: torch.Size([1, 5000]), Label shape: torch.Size([])
Sample 3: Feature shape: torch.Size([1, 5000]), Label shape: torch.Size([])
Sample 4: Feature shape: torch.Size([1, 5000]), Label shape: torch.Size([])





In [None]:
for lead_num in range(12):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    feature_dim = 8  # Number of features extracted
    model = FeatureNN(input_size=feature_dim).to(device)

    criterion = nn.BCEWithLogitsLoss()  # Binary classification loss
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    print(f"Lead {lead_num}:")
    normal = create_formatted(df_sinus.iloc[:18*300], 0, lead_num)
    arythmia = create_formatted(df_sinus_a.iloc[:18*300], 1, lead_num)
    train_data = normal + arythmia
    train_loader = prepare_dataloader(train_data)

    for epoch in range(100):
        model.train()
        running_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Extract features from the raw signals
            inputs = extract_features_batch(inputs)

            optimizer.zero_grad()
            outputs = model(inputs)
            labels = labels.unsqueeze(1)  # Ensure labels have shape [batch_size, 1]
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        if epoch % 25 == 0:
            print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")
    test_data = create_formatted(df_sinus.iloc[18*300:18*400], 0, lead_num) + \
                create_formatted(df_sinus_a.iloc[18*300:18*400], 1, lead_num)
    x = test_(model, test_data)
    print()
    del model


Lead 0:


Processing 0 samples: 100%|██████████| 300/300 [00:00<00:00, 718.80it/s]
Processing 1 samples: 100%|██████████| 300/300 [00:00<00:00, 712.62it/s]


Error extracting features: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 1 has 2 dimension(s)
Error extracting features: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 1 has 2 dimension(s)
Error extracting features: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 1 has 2 dimension(s)
Error extracting features: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 1 has 2 dimension(s)
Error extracting features: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 1 has 2 dimension(s)
Error extracting features: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and th

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x6 and 8x64)

In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from biosppy.signals import ecg
import numpy as np
from tqdm import tqdm

# Feature Extraction Function
def extract_ecg_features(signal, fs=300):
    """
    Extract ECG-specific features from the signal.
    """
    try:
        # Process the ECG signal and extract R-peaks
        out = ecg.ecg(signal=signal, sampling_rate=fs, show=False)
        r_peaks = out['rpeaks']

        # Calculate features
        rr_intervals = np.diff(r_peaks) / fs  # R-R intervals
        heart_rate = 60 / np.mean(rr_intervals) if len(rr_intervals) > 0 else 0.0
        energy = np.sum(signal**2)

        # ST segment deviation (example calculation at 0.08s post R-peak)
        st_deviation = []
        for r_peak in r_peaks:
            st_idx = int(r_peak + 0.08 * fs)
            if st_idx < len(signal):
                st_deviation.append(signal[st_idx])
        st_deviation_mean = np.mean(st_deviation) if st_deviation else 0.0

        # Other features
        qrs_duration = np.mean(rr_intervals) if len(rr_intervals) > 0 else 0.0
        t_wave_amplitude = np.max(signal) - np.min(signal)  # Approximation

        # Combine features into an array
        features = [
            heart_rate,
            np.mean(rr_intervals) if len(rr_intervals) > 0 else 0.0,
            energy,
            st_deviation_mean,
            qrs_duration,
            t_wave_amplitude,
        ]
        return np.array(features)

    except Exception as e:
        print(f"Error extracting features: {e}")
        return np.zeros(6)  # Return zeros if feature extraction fails

# Batch Feature Extraction Function
def extract_features_batch(signals):
    """
    Extract features for a batch of signals.
    """
    features = []
    for signal in signals:
        # Convert PyTorch tensor to NumPy if necessary
        if isinstance(signal, torch.Tensor):
            signal = signal.numpy()

        # Extract features
        extracted_features = extract_ecg_features(signal)

        # Ensure the feature vector is flattened
        extracted_features = np.squeeze(extracted_features)
        features.append(extracted_features)

    # Convert to PyTorch tensor
    features = torch.tensor(features, dtype=torch.float32)
    return features

# Create Formatted Data Function
def create_formatted(df, disease=0, lead_target=0):
    """
    Preprocess the DataFrame to extract ECG leads and labels with medical features.
    """
    examples = []
    fs = 300  # Sampling frequency

    for start_idx in tqdm(range(0, len(df), 18), desc=f"Processing {disease} samples"):
        try:
            leads_ref = []

            for lead_idx in range(12):  # Loop through leads
                if lead_target is not None and lead_idx != lead_target:
                    continue

                if start_idx + lead_idx >= len(df):
                    raise IndexError(f"Missing data for lead {lead_idx} at index {start_idx}. Skipping sample.")

                row = df.iloc[start_idx + lead_idx]
                vals_str = row['Vals']
                lead_signal = np.array([float(x.strip()) for x in vals_str.strip('[]').split(',')])
                lead_signal = np.nan_to_num(lead_signal)  # Clean invalid values

                # Extract features
                features = extract_ecg_features(lead_signal, fs=fs)
                leads_ref.append(features)  # Append the feature array (1D)

            # Combine features and append to examples
            if leads_ref:
                leads_ref = torch.tensor(np.array(leads_ref), dtype=torch.float32)  # Shape: (num_leads, num_features)
                label_tensor = torch.tensor([disease], dtype=torch.float32)  # Ensure label shape is (1,)
                examples.append((leads_ref, label_tensor))

        except Exception as e:
            print(f"Error processing sample at index {start_idx}: {e}")
            continue

    return examples

# DataLoader Preparation Function
def prepare_dataloader(data, batch_size=32, shuffle=True):
    """
    Prepare a DataLoader from formatted data.
    """
    try:
        features = torch.stack([item[0] for item in data])  # Stacking feature tensors
        labels = torch.stack([item[1] for item in data])    # Stacking label tensors
        dataset = TensorDataset(features, labels)
        return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

    except Exception as e:
        print(f"Error in prepare_dataloader: {e}")
        return None

# Example Debugging
if __name__ == "__main__":
    # Simulate a small DataFrame with sample data
    import pandas as pd
    test_df = pd.DataFrame({
        'Vals': [
            '[0.1, 0.2, 0.3, 0, 0.0]', '[0.2, 0.4, 0.6, 0, 0.1]',  # Add realistic signal strings
            '[0.1, 0.2, 0.3, 0, 0.0]', '[0.2, 0.4, 0.6,0, 0.1]'
        ] * 18  # Simulate 12 leads per sample
    })

    formatted_data = create_formatted(df_sinus[:18*5], disease=0, lead_target=0)
    for i, (features, label) in enumerate(formatted_data[:5]):
        print(f"Sample {i}: Feature shape: {features.shape}, Label shape: {label.shape}")

    dataloader = prepare_dataloader(formatted_data)
    for batch_features, batch_labels in dataloader:
        print(f"Batch features shape: {batch_features.shape}, Batch labels shape: {batch_labels.shape}")


Processing 0 samples: 100%|██████████| 5/5 [00:00<00:00, 20.24it/s]

Sample 0: Feature shape: torch.Size([1, 6]), Label shape: torch.Size([1])
Sample 1: Feature shape: torch.Size([1, 6]), Label shape: torch.Size([1])
Sample 2: Feature shape: torch.Size([1, 6]), Label shape: torch.Size([1])
Sample 3: Feature shape: torch.Size([1, 6]), Label shape: torch.Size([1])
Sample 4: Feature shape: torch.Size([1, 6]), Label shape: torch.Size([1])
Batch features shape: torch.Size([5, 1, 6]), Batch labels shape: torch.Size([5, 1])





In [None]:
import torch
import torch.nn as nn

class FeatureClassifier(nn.Module):
    def __init__(self, input_features=6):
        """
        Neural network for processing features and performing binary classification.
        Parameters:
        - input_features: Number of input features for each sample (default is 6).
        """
        super(FeatureClassifier, self).__init__()
        self.fc1 = nn.Linear(input_features, 64)  # Fully connected layer 1
        self.fc2 = nn.Linear(64, 32)  # Fully connected layer 2
        self.fc3 = nn.Linear(32, 1)  # Output layer for binary classification
        self.relu = nn.ReLU()  # Activation function
        self.dropout = nn.Dropout(0.3)  # Dropout to prevent overfitting

    def forward(self, x):
        """
        Forward pass of the network.
        Parameters:
        - x: Input tensor of shape (batch_size, 1, 6)

        Returns:
        - Output tensor of shape (batch_size, 1)
        """
        x = x.squeeze(1)  # Remove the singleton dimension: (batch_size, 6)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [None]:
import torch.optim as optim

# Initialize the model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = FeatureClassifier(input_features=6).to(device)
criterion = nn.BCEWithLogitsLoss()  # Binary Cross-Entropy Loss with Logits
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

# Example training loop
def train_model(model, dataloader, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for batch_features, batch_labels in dataloader:
            # Move data to the correct device
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
            batch_labels = batch_labels.squeeze(1)  # Match output shape

            # Forward pass
            optimizer.zero_grad()
            outputs = model(batch_features)
            loss = criterion(outputs.squeeze(1), batch_labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss / len(dataloader):.4f}")

# Example dataloader usage
train_model(model, dataloader, epochs=10)


Epoch 1/10, Loss: 2158212.5000
Epoch 2/10, Loss: 3958796.7500
Epoch 3/10, Loss: 2967806.5000
Epoch 4/10, Loss: 2732744.7500
Epoch 5/10, Loss: 1801446.2500
Epoch 6/10, Loss: 470510.6562
Epoch 7/10, Loss: 0.0000
Epoch 8/10, Loss: 0.0000
Epoch 9/10, Loss: 127020.8125
Epoch 10/10, Loss: 101597.8594


In [None]:
def evaluate_model(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_features, batch_labels in dataloader:
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
            batch_labels = batch_labels.squeeze(1)  # Match output shape

            outputs = model(batch_features)
            predictions = torch.sigmoid(outputs) > 0.5  # Apply sigmoid and threshold
            correct += (predictions == batch_labels).sum().item()
            total += batch_labels.size(0)

    accuracy = correct / total
    print(f"Test Accuracy: {accuracy * 100:.2f}%")
    return accuracy

# Example evaluation
evaluate_model(model, dataloader)  # Use a separate test dataloader


Test Accuracy: 500.00%


5.0

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from biosppy.signals import ecg
import numpy as np
from tqdm import tqdm

from scipy.signal import butter, filtfilt

def preprocess_signal(signal, fs=500):
    """
    Preprocess ECG signal: Bandpass filter and normalization.
    """
    # Bandpass filter: 0.5 Hz to 50 Hz
    def bandpass_filter(signal, lowcut, highcut, fs, order=4):
        nyquist = 0.5 * fs
        low = lowcut / nyquist
        high = highcut / nyquist
        b, a = butter(order, [low, high], btype='band')
        return filtfilt(b, a, signal)

    # Apply bandpass filter
    filtered_signal = bandpass_filter(signal, lowcut=0.5, highcut=50, fs=fs)

    # Normalize signal
    normalized_signal = (filtered_signal - np.min(filtered_signal)) / (np.max(filtered_signal) - np.min(filtered_signal))

    return normalized_signal

def extract_ecg_features(signal, fs=500):
    """
    Extract ECG-specific features from the signal.
    Pads signals if they are too short for processing.
    """

    try:
        # Preprocess the signal: filter and normalize
        signal = preprocess_signal(signal, fs)

        # Process the ECG signal and extract R-peaks
        out = ecg.ecg(signal=signal, sampling_rate=fs, show=False)
        r_peaks = out['rpeaks']

        # Calculate features
        rr_intervals = np.diff(r_peaks) / fs
        heart_rate = 60 / np.mean(rr_intervals) if len(rr_intervals) > 0 else 0.0
        energy = np.sum(signal**2)

        st_deviation = []
        for r_peak in r_peaks:
            st_idx = int(r_peak + 0.08 * fs)
            if st_idx < len(signal):
                st_deviation.append(signal[st_idx])
        st_deviation_mean = np.mean(st_deviation) if st_deviation else 0.0

        qrs_duration = np.mean(rr_intervals) if len(rr_intervals) > 0 else 0.0
        t_wave_amplitude = np.max(signal) - np.min(signal)

        features = [
            heart_rate,
            np.mean(rr_intervals) if len(rr_intervals) > 0 else 0.0,
            energy,
            st_deviation_mean,
            qrs_duration,
            t_wave_amplitude,
        ]
        return np.array(features)

    except Exception as e:
        print(f"Error extracting features: {e}")
        return np.zeros(6)


# Create Formatted Data Function
def create_formatted(df, disease=0, lead_target=0):
    """
    Preprocess the DataFrame to extract ECG leads and labels with medical features.
    """
    examples = []
    fs = 500

    for start_idx in tqdm(range(0, len(df), 18), desc=f"Processing {disease} samples"):
        try:
            leads_ref = []

            for lead_idx in range(12):
                if lead_target is not None and lead_idx != lead_target:
                    continue

                if start_idx + lead_idx >= len(df):
                    raise IndexError(f"Missing data for lead {lead_idx} at index {start_idx}. Skipping sample.")

                row = df.iloc[start_idx + lead_idx]
                vals_str = row['Vals']
                lead_signal = np.array([float(x.strip()) for x in vals_str.strip('[]').split(',')])
                lead_signal = np.nan_to_num(lead_signal)

                features = extract_ecg_features(lead_signal, fs=fs)
                #print(features)
                leads_ref.append(features)

            if leads_ref:
                leads_ref = torch.tensor(np.array(leads_ref), dtype=torch.float32).flatten(0, -1)
                label_tensor = torch.tensor(disease, dtype=torch.float32)  # Ensure label is a scalar
                examples.append((leads_ref, label_tensor))

        except Exception as e:
            print(f"Error processing sample at index {start_idx}: {e}")
            continue

    return examples

# DataLoader Preparation Function
def prepare_dataloader(data, batch_size=32, shuffle=True):
    """
    Prepare a DataLoader from formatted data.
    """
    try:
        features = torch.stack([item[0] for item in data])
        labels = torch.tensor([item[1] for item in data]).unsqueeze(1)  # Ensure labels are shaped correctly
        dataset = TensorDataset(features, labels)
        return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

    except Exception as e:
        print(f"Error in prepare_dataloader: {e}")
        return None
# Revised Neural Network with Weight Initialization
class FeatureClassifier(nn.Module):
    def __init__(self, input_features=6):
        super(FeatureClassifier, self).__init__()
        self.fc1 = nn.Linear(input_features, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 16)
        self.fc4 = nn.Linear(16, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.4)  # Increase dropout for better regularization

        # Initialize weights
        for layer in [self.fc1, self.fc2, self.fc3, self.fc4]:
            nn.init.xavier_uniform_(layer.weight)
            nn.init.zeros_(layer.bias)

    def forward(self, x):
        x = x.squeeze(1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x


# Training Loop with Improvements
for lead_num in range(12):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = FeatureClassifier(input_features=6).to(device)

    # Check class distribution for pos_weight
    normal = create_formatted(df_sinus.iloc[:18*300], 0, lead_num)
    arythmia = create_formatted(df_sinus_a.iloc[:18*300], 1, lead_num)
    train_data = normal + arythmia
    train_labels = [item[1].item() for item in train_data]
    class_counts = np.bincount(train_labels)
    pos_weight = torch.tensor([class_counts[0] / class_counts[1]]).to(device)

    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-5)  # Add L2 regularization
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, factor=0.5, verbose=True)

    print(f"Lead {lead_num}:")
    train_loader = prepare_dataloader(train_data)

    best_loss = float('inf')
    patience = 15
    patience_counter = 0

    for epoch in range(500):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()

            # Gradient clipping
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0)

            optimizer.step()
            running_loss += loss.item()

            #print(outputs)

            predictions = (torch.sigmoid(outputs) > 0.5).float()
            correct_train += (predictions == labels).sum().item()
            total_train += labels.size(0)

        train_accuracy = correct_train / total_train
        scheduler.step(running_loss)

        # Early stopping
        if running_loss < best_loss:
            best_loss = running_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Stopping early at epoch {epoch+1}")
                break

        if epoch % 25 == 0:
            print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}, Train Accuracy: {train_accuracy * 100:.2f}%")

    # Evaluate Model
    test_data = create_formatted(df_sinus.iloc[18*300:18*400], 0, lead_num) + create_formatted(df_sinus_a.iloc[18*300:18*400], 1, lead_num)
    test_loader = prepare_dataloader(test_data)

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            predictions = (torch.sigmoid(outputs) > 0.5).float()
            correct += (predictions == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    print(f"Lead {lead_num} Test Accuracy: {accuracy * 100:.2f}%\n")

    del model


Processing 0 samples: 100%|██████████| 300/300 [00:21<00:00, 13.76it/s]
Processing 1 samples: 100%|██████████| 300/300 [00:22<00:00, 13.33it/s]


Lead 0:
Epoch 1, Loss: 11.9393, Train Accuracy: 50.83%
Epoch 26, Loss: 0.7520, Train Accuracy: 50.83%
Epoch 51, Loss: 0.6918, Train Accuracy: 54.33%
Epoch 76, Loss: 0.6874, Train Accuracy: 54.17%
Epoch 101, Loss: 0.6904, Train Accuracy: 55.33%
Stopping early at epoch 108


Processing 0 samples: 100%|██████████| 100/100 [00:07<00:00, 13.50it/s]
Processing 1 samples: 100%|██████████| 100/100 [00:07<00:00, 13.22it/s]


Lead 0 Test Accuracy: 53.00%



Processing 0 samples: 100%|██████████| 300/300 [00:22<00:00, 13.37it/s]
Processing 1 samples: 100%|██████████| 300/300 [00:22<00:00, 13.34it/s]


Lead 1:
Epoch 1, Loss: 25.2071, Train Accuracy: 49.00%
Epoch 26, Loss: 0.8820, Train Accuracy: 48.33%
Epoch 51, Loss: 0.7207, Train Accuracy: 51.67%
Epoch 76, Loss: 0.6920, Train Accuracy: 55.67%
Epoch 101, Loss: 0.6923, Train Accuracy: 54.00%
Stopping early at epoch 115


Processing 0 samples: 100%|██████████| 100/100 [00:07<00:00, 13.78it/s]
Processing 1 samples: 100%|██████████| 100/100 [00:07<00:00, 13.73it/s]


Lead 1 Test Accuracy: 53.50%



Processing 0 samples: 100%|██████████| 300/300 [00:21<00:00, 14.17it/s]
Processing 1 samples: 100%|██████████| 300/300 [00:22<00:00, 13.53it/s]


Lead 2:
Epoch 1, Loss: 60.4616, Train Accuracy: 53.00%
Epoch 26, Loss: 2.0403, Train Accuracy: 46.83%
Epoch 51, Loss: 0.9115, Train Accuracy: 55.33%
Epoch 76, Loss: 0.8553, Train Accuracy: 49.67%
Epoch 101, Loss: 0.7144, Train Accuracy: 51.83%
Epoch 126, Loss: 0.7286, Train Accuracy: 54.67%
Stopping early at epoch 131


Processing 0 samples: 100%|██████████| 100/100 [00:07<00:00, 13.52it/s]
Processing 1 samples: 100%|██████████| 100/100 [00:07<00:00, 13.41it/s]


Lead 2 Test Accuracy: 49.00%



Processing 0 samples: 100%|██████████| 300/300 [00:21<00:00, 13.66it/s]
Processing 1 samples: 100%|██████████| 300/300 [00:22<00:00, 13.47it/s]


Lead 3:
Epoch 1, Loss: 85.5683, Train Accuracy: 51.67%
Epoch 26, Loss: 6.7684, Train Accuracy: 48.17%
Epoch 51, Loss: 1.3423, Train Accuracy: 50.00%
Epoch 76, Loss: 0.9522, Train Accuracy: 49.50%
Epoch 101, Loss: 0.8246, Train Accuracy: 53.33%
Epoch 126, Loss: 0.8129, Train Accuracy: 49.67%
Epoch 151, Loss: 0.7162, Train Accuracy: 55.00%
Stopping early at epoch 163


Processing 0 samples: 100%|██████████| 100/100 [00:07<00:00, 13.79it/s]
Processing 1 samples: 100%|██████████| 100/100 [00:07<00:00, 13.43it/s]


Lead 3 Test Accuracy: 50.00%



Processing 0 samples: 100%|██████████| 300/300 [00:21<00:00, 13.71it/s]
Processing 1 samples: 100%|██████████| 300/300 [00:22<00:00, 13.52it/s]


Lead 4:
Epoch 1, Loss: 79.7124, Train Accuracy: 49.83%
Epoch 26, Loss: 1.9808, Train Accuracy: 52.33%
Epoch 51, Loss: 0.9453, Train Accuracy: 50.83%
Epoch 76, Loss: 0.7544, Train Accuracy: 51.67%
Stopping early at epoch 88


Processing 0 samples: 100%|██████████| 100/100 [00:07<00:00, 13.47it/s]
Processing 1 samples: 100%|██████████| 100/100 [00:07<00:00, 13.51it/s]


Lead 4 Test Accuracy: 51.00%



Processing 0 samples: 100%|██████████| 300/300 [00:21<00:00, 14.16it/s]
Processing 1 samples: 100%|██████████| 300/300 [00:22<00:00, 13.59it/s]


Lead 5:
Epoch 1, Loss: 21.5249, Train Accuracy: 46.83%
Epoch 26, Loss: 1.3847, Train Accuracy: 50.17%
Epoch 51, Loss: 0.7932, Train Accuracy: 52.50%
Epoch 76, Loss: 0.7455, Train Accuracy: 55.50%
Epoch 101, Loss: 0.6856, Train Accuracy: 53.67%
Stopping early at epoch 107


Processing 0 samples: 100%|██████████| 100/100 [00:07<00:00, 13.73it/s]
Processing 1 samples: 100%|██████████| 100/100 [00:07<00:00, 13.44it/s]


Lead 5 Test Accuracy: 60.50%



Processing 0 samples: 100%|██████████| 300/300 [00:22<00:00, 13.57it/s]
Processing 1 samples: 100%|██████████| 300/300 [00:21<00:00, 13.69it/s]


Lead 6:
Epoch 1, Loss: 74.8474, Train Accuracy: 49.17%
Epoch 26, Loss: 0.7147, Train Accuracy: 49.50%
Stopping early at epoch 36


Processing 0 samples: 100%|██████████| 100/100 [00:07<00:00, 13.82it/s]
Processing 1 samples: 100%|██████████| 100/100 [00:07<00:00, 13.16it/s]


Lead 6 Test Accuracy: 50.00%



Processing 0 samples: 100%|██████████| 300/300 [00:22<00:00, 13.52it/s]
Processing 1 samples: 100%|██████████| 300/300 [00:22<00:00, 13.30it/s]


Lead 7:
Epoch 1, Loss: 59.5119, Train Accuracy: 52.33%
Epoch 26, Loss: 6.8039, Train Accuracy: 47.17%
Epoch 51, Loss: 1.0076, Train Accuracy: 49.67%
Epoch 76, Loss: 0.8355, Train Accuracy: 52.00%
Epoch 101, Loss: 0.7746, Train Accuracy: 53.33%
Stopping early at epoch 109


Processing 0 samples: 100%|██████████| 100/100 [00:07<00:00, 13.69it/s]
Processing 1 samples: 100%|██████████| 100/100 [00:07<00:00, 13.58it/s]


Lead 7 Test Accuracy: 50.00%



Processing 0 samples: 100%|██████████| 300/300 [00:21<00:00, 14.05it/s]
Processing 1 samples: 100%|██████████| 300/300 [00:21<00:00, 13.88it/s]


Lead 8:
Epoch 1, Loss: 38.9522, Train Accuracy: 50.17%
Epoch 26, Loss: 1.1387, Train Accuracy: 51.67%
Epoch 51, Loss: 0.7540, Train Accuracy: 53.33%
Epoch 76, Loss: 0.7349, Train Accuracy: 56.33%
Stopping early at epoch 100


Processing 0 samples: 100%|██████████| 100/100 [00:07<00:00, 13.89it/s]
Processing 1 samples: 100%|██████████| 100/100 [00:07<00:00, 13.74it/s]


Lead 8 Test Accuracy: 59.00%



Processing 0 samples: 100%|██████████| 300/300 [00:21<00:00, 14.15it/s]
Processing 1 samples: 100%|██████████| 300/300 [00:21<00:00, 14.02it/s]


Lead 9:
Epoch 1, Loss: 23.9316, Train Accuracy: 49.17%
Epoch 26, Loss: 0.8067, Train Accuracy: 54.17%
Epoch 51, Loss: 0.6824, Train Accuracy: 57.00%
Epoch 76, Loss: 0.6721, Train Accuracy: 57.33%
Stopping early at epoch 83


Processing 0 samples: 100%|██████████| 100/100 [00:07<00:00, 14.09it/s]
Processing 1 samples: 100%|██████████| 100/100 [00:07<00:00, 13.91it/s]


Lead 9 Test Accuracy: 65.50%



Processing 0 samples: 100%|██████████| 300/300 [00:21<00:00, 14.09it/s]
Processing 1 samples: 100%|██████████| 300/300 [00:21<00:00, 13.97it/s]


Lead 10:
Epoch 1, Loss: 22.8467, Train Accuracy: 52.17%
Epoch 26, Loss: 0.9991, Train Accuracy: 50.33%
Epoch 51, Loss: 0.7152, Train Accuracy: 56.00%
Epoch 76, Loss: 0.6806, Train Accuracy: 59.50%
Epoch 101, Loss: 0.6739, Train Accuracy: 60.67%
Stopping early at epoch 107


Processing 0 samples: 100%|██████████| 100/100 [00:07<00:00, 13.66it/s]
Processing 1 samples: 100%|██████████| 100/100 [00:07<00:00, 13.53it/s]


Lead 10 Test Accuracy: 62.50%



Processing 0 samples: 100%|██████████| 300/300 [00:21<00:00, 13.84it/s]
Processing 1 samples: 100%|██████████| 300/300 [00:22<00:00, 13.49it/s]


Lead 11:
Epoch 1, Loss: 15.3376, Train Accuracy: 50.17%
Epoch 26, Loss: 1.4267, Train Accuracy: 49.83%
Epoch 51, Loss: 0.7791, Train Accuracy: 52.17%
Stopping early at epoch 67


Processing 0 samples: 100%|██████████| 100/100 [00:07<00:00, 13.63it/s]
Processing 1 samples: 100%|██████████| 100/100 [00:07<00:00, 13.59it/s]

Lead 11 Test Accuracy: 50.00%




