In [3]:
import os
import wfdb
import numpy as np
from collections import defaultdict
from sklearn.utils import resample

In [6]:
# Parameters (dataset and user dependent)
SAMPLE_RATE = 8000
REMOVE_SAMPLES = int(0.15 * SAMPLE_RATE)     # 1200
WINDOW_SIZE = 8192                           # 1.024 sec
STEP_SIZE = 4096                             # 50% overlap
# NUM_CLASSES = 4                              # Example
# SAMPLES_PER_CLASS = 260

In [9]:
def load_signal(record_path):
    signal, fields = wfdb.rdsamp(record_path)
    return signal.flatten()

def sliding_window(signal):
    segments = []
    for start in range(0, len(signal) - WINDOW_SIZE + 1, STEP_SIZE):
        segment = signal[start:start + WINDOW_SIZE]
        if len(segment) == WINDOW_SIZE:
            segments.append(segment)
    return segments

def parse_label(record_path):
    with open(record_path + ".hea", "r") as f:
        for line in f:
            if line.startswith("#"):
                if "<diagnoses>:" in line:
                    diag = line.split("<diagnoses>:")[1].split("<")[0].strip()
                    return diag
    return "unknown"

def process_all_records(folder):
    all_segments = defaultdict(list)
    for rec_name in sorted(os.listdir(folder)):
        if rec_name.endswith(".hea"):
            base = rec_name.replace(".hea", "")
            signal = load_signal(os.path.join(folder, base))[REMOVE_SAMPLES:]
            label = parse_label(os.path.join(folder, base))
            segments = sliding_window(signal)
            all_segments[label].extend(segments)
    return all_segments