In [2]:
import neurokit2 as nk
import wfdb
import pandas as pd
import os

# --- Path to your ECG data folder (where .dat and .hea files are) ---
folder_path = "ECG Data/"  # <-- Change if needed

# --- Get list of record names (without extension) ---
record_names = sorted(
    list(set([f.split(".")[0] for f in os.listdir(folder_path) if f.endswith(".hea")]))
)

# --- Limit to 43 samples if needed ---
record_names = record_names[:43]

# --- Initialize lists ---
ecg_features_list = []
failed_files = []
skipped_files = []

# --- Process each record ---
for rec in record_names:
    try:
        record_path = os.path.join(folder_path, rec)

        # Load the ECG signal using wfdb
        signal, info = wfdb.rdsamp(record_path)

        # Use Lead V5 (index 10 is typically V5 in PTB-XL)
        ecg_signal = signal[:, 10]

        # Check if signal is long enough (at least 5 seconds = 5000 samples)
        if len(ecg_signal) < 5000:
            print(f"[!] Skipping {rec}: too short ({len(ecg_signal)} samples)")
            skipped_files.append(rec)
            continue

        # Process ECG
        processed = nk.ecg_process(ecg_signal, sampling_rate=1000)

        # Extract interval-related features
        features = nk.ecg_intervalrelated(processed['ECG_Clean'], sampling_rate=1000)

        # Add record ID
        features['ecg_id'] = rec

        ecg_features_list.append(features)

    except Exception as e:
        print(f"[!] Error processing {rec}: {e}")
        failed_files.append(rec)

# --- Combine all into a single DataFrame ---
if ecg_features_list:
    df_ecg_features = pd.concat(ecg_features_list, ignore_index=True)
    print("\n✅ ECG feature extraction complete.")
    print("Final ECG Features Shape:", df_ecg_features.shape)
    print(df_ecg_features.head())
else:
    print("\n❌ No ECG features extracted. Check skipped/failed files.")

# --- Report skipped and failed files ---
print("\n[!] Skipped due to short length:", skipped_files)
print("[!] Failed during processing:", failed_files)


[!] Skipping 00001_lr: too short (1000 samples)
[!] Skipping 00002_lr: too short (1000 samples)
[!] Skipping 00003_lr: too short (1000 samples)
[!] Skipping 00004_lr: too short (1000 samples)
[!] Skipping 00005_lr: too short (1000 samples)
[!] Skipping 00006_lr: too short (1000 samples)
[!] Skipping 00007_lr: too short (1000 samples)
[!] Skipping 00008_lr: too short (1000 samples)
[!] Skipping 00009_lr: too short (1000 samples)
[!] Skipping 00010_lr: too short (1000 samples)
[!] Skipping 00011_lr: too short (1000 samples)
[!] Skipping 00012_lr: too short (1000 samples)
[!] Skipping 00013_lr: too short (1000 samples)
[!] Skipping 00014_lr: too short (1000 samples)
[!] Skipping 00015_lr: too short (1000 samples)
[!] Skipping 00016_lr: too short (1000 samples)
[!] Skipping 00017_lr: too short (1000 samples)
[!] Skipping 00018_lr: too short (1000 samples)
[!] Skipping 00019_lr: too short (1000 samples)
[!] Skipping 00020_lr: too short (1000 samples)
[!] Skipping 00021_lr: too short (1000 s

In [3]:
from scipy.stats import skew, kurtosis
import numpy as np

manual_features = []

for rec in record_names:
    try:
        record_path = os.path.join(folder_path, rec)
        signal, info = wfdb.rdsamp(record_path)

        ecg = signal[:, 10]  # Try lead V5

        if len(ecg) < 1000:
            print(f"[!] Still too short: {rec}")
            continue

        features = {
            "ecg_id": rec,
            "mean": np.mean(ecg),
            "std": np.std(ecg),
            "min": np.min(ecg),
            "max": np.max(ecg),
            "ptp": np.ptp(ecg),  # peak-to-peak
            "skew": skew(ecg),
            "kurtosis": kurtosis(ecg),
        }

        manual_features.append(features)

    except Exception as e:
        print(f"[!] Failed: {rec} — {e}")

df_manual_ecg = pd.DataFrame(manual_features)
print("Manual ECG Features Shape:", df_manual_ecg.shape)
print(df_manual_ecg.head())


Manual ECG Features Shape: (43, 8)
     ecg_id      mean       std    min    max    ptp      skew   kurtosis
0  00001_lr -0.000557  0.089204 -0.097  0.530  0.627  3.166090  12.147694
1  00002_lr  0.006375  0.228674 -0.492  1.839  2.331  4.559430  27.429426
2  00003_lr  0.005362  0.110647 -0.175  0.851  1.026  3.745405  19.571329
3  00004_lr -0.007380  0.394695 -1.269  2.221  3.490  2.007490   8.050312
4  00005_lr  0.000208  0.210904 -0.187  1.493  1.680  3.844276  19.030165


In [None]:
df_manual_ecg.to_csv("final_ecg_features.csv", index=False)