In [122]:
import wfdb
import numpy as np
import pandas as pd
import glob
import os
from scipy.signal import resample
import neurokit2 as nk
from scipy.signal import find_peaks
import matplotlib.pyplot as plt

In [2]:
# Code to load all the signals
basePath="ECG_Database"

## List files and folders insade basePath
patDir = sorted(
    [f.path for f in os.scandir(basePath) if f.is_dir()],
    key=lambda x: int(os.path.basename(x).replace("patient", ""))  # Extraer el número de paciente
)

## Dictionary to store each patient's records
patDict = {}

def generateName(i, extension):
        return f"record{i:03d}{extension}" 

for patient in patDir:
    patID=os.path.basename(patient) # patID = directory name
    archivos = os.listdir(patient)

    # Create 3 lists, one for each file type 
    dat = [f for f in archivos if f.endswith(".dat")]
    hea = [f for f in archivos if f.endswith(".hea")]
    xyz = [f for f in archivos if f.endswith(".xyz")]

    # Rename the files for clarity
    
    import os

    # Agrupar los archivos por nombre base y asignarles un número secuencial
    allFiles = []
    
    # Agrupar los archivos con el mismo nombre base
    for f in dat + hea + xyz: 
        allFiles.append(f)  # Asignar el nuevo nombre

    patDict[patID] = {"files": allFiles}

In [None]:
for patient, data in patDict.items():
    print(f"Paciente: {patient}")
    
    # Imprimir los archivos del paciente
    if "files" in data:
        print("Archivos:", ", ".join(data["files"]))
    else:
        print("No hay archivos registrados.")
    
    print("-" * 40)

In [134]:
def get_heart_rate(file_base):
    record = wfdb.rdrecord(file_base)
    signal = record.p_signal[:, 0]
    fs = record.fs
    _, rpeaks = nk.ecg_peaks(signal, sampling_rate=fs)
    hr = nk.ecg_rate(rpeaks, sampling_rate=fs)
    return np.mean(hr)


In [135]:
def get_rr_std(file_base):
    record = wfdb.rdrecord(file_base)
    signal = record.p_signal[:, 0]
    fs = record.fs
    _, rpeaks = nk.ecg_peaks(signal, sampling_rate=fs)
    rr_intervals = np.diff(rpeaks["ECG_R_Peaks"]) / fs
    return np.std(rr_intervals)

In [145]:
def get_qrs_duration(file_base):
    record = wfdb.rdrecord(file_base)
    signal = record.p_signal[:, 0]
    fs = record.fs
    cleaned = nk.ecg_clean(signal, sampling_rate=fs)
    _, rpeaks = nk.ecg_peaks(cleaned, sampling_rate=fs)
    delineate = nk.ecg_delineate(cleaned, rpeaks, sampling_rate=fs, method="dwt", show=False)
    q_peaks = np.array(delineate[1]["ECG_Q_Peaks"])
    s_peaks = np.array(delineate[1]["ECG_S_Peaks"])
    
    durations = (s_peaks - q_peaks) / fs
    durations = durations[~np.isnan(durations)]
    
    return np.mean(durations) if len(durations) > 0 else np.nan

In [146]:
basePath = "patient001"
file_path = os.path.join(basePath, "s0010_re.dat")
file_base = os.path.splitext(file_path)[0]
record = wfdb.rdrecord(file_base)
signal = record.p_signal[:, 0]
fs = record.fs
cleaned = nk.ecg_clean(signal, sampling_rate=fs)
_, rpeaks = nk.ecg_peaks(cleaned, sampling_rate=fs)
delineate = nk.ecg_delineate(cleaned, rpeaks, sampling_rate=fs, method="dwt", show=False)
print(rpeaks)

{'method_peaks': 'neurokit', 'method_fixpeaks': 'None', 'ECG_R_Peaks': array([  642,  1387,  2114,  2841,  3586,  4327,  5057,  5799,  6543,
        7265,  7991,  8727,  9451, 10162, 10885, 11612, 12332, 13049,
       13783, 14524, 15252, 15979, 16719, 17457, 18181, 18911, 19650,
       20381, 21098, 21832, 22569, 23295, 24019, 24757, 25490, 26214,
       26954, 27697, 28431, 29162, 29909, 30655, 31386, 32125, 32875,
       33617, 34348, 35096, 35853, 36587, 37317, 38064]), 'sampling_rate': 1000}


In [148]:
def get_qt_interval(file_base):
    record = wfdb.rdrecord(file_base)
    signal = record.p_signal[:, 0]
    fs = record.fs
    cleaned = nk.ecg_clean(signal, sampling_rate=fs)
    _, rpeaks = nk.ecg_peaks(cleaned, sampling_rate=fs)
    delineate = nk.ecg_delineate(cleaned, rpeaks, sampling_rate=fs, method="dwt", show=False)
    
    q_peaks = np.array(delineate[1]["ECG_Q_Peaks"])
    t_offsets = np.array(delineate[1]["ECG_T_Offsets"])
    
    qt = (t_offsets - q_peaks) / fs
    qt = qt[~np.isnan(qt)]
    
    return np.mean(qt) if len(qt) > 0 else np.nan

In [149]:
def get_qtc_bazett(file_base):
    record = wfdb.rdrecord(file_base)
    signal = record.p_signal[:, 0]
    fs = record.fs
    qt = get_qt_interval(file_base)
    _, rpeaks = nk.ecg_peaks(signal, sampling_rate=fs)
    rr = np.diff(rpeaks["ECG_R_Peaks"]) / fs
    rr_mean = np.mean(rr)
    
    if qt is not None and rr_mean > 0:
        return qt / np.sqrt(rr_mean)
    return np.nan

In [155]:
def get_p_duration(file_base):
    record = wfdb.rdrecord(file_base)
    signal = record.p_signal[:, 0]
    fs = record.fs
    cleaned = nk.ecg_clean(signal, sampling_rate=fs)
    _, rpeaks = nk.ecg_peaks(cleaned, sampling_rate=fs)
    delineate = nk.ecg_delineate(cleaned, rpeaks, sampling_rate=fs, method="dwt", show=False)
    
    p_onsets = np.array(delineate[1]["ECG_P_Onsets"])
    p_offsets = np.array(delineate[1]["ECG_P_Offsets"])
    
    durations = (p_offsets - p_onsets) / fs
    durations = durations[~np.isnan(durations)]
    
    return np.mean(durations) if len(durations) > 0 else np.nan


In [151]:
def get_pr_interval(file_base):
    record = wfdb.rdrecord(file_base)
    signal = record.p_signal[:, 0]
    fs = record.fs
    cleaned = nk.ecg_clean(signal, sampling_rate=fs)
    _, rpeaks = nk.ecg_peaks(cleaned, sampling_rate=fs)
    delineate = nk.ecg_delineate(cleaned, rpeaks, sampling_rate=fs, method="dwt", show=False)
    
    p_onsets = np.array(delineate[1]["ECG_P_Onsets"])
    q_peaks = np.array(delineate[1]["ECG_Q_Peaks"])
    
    pr = (q_peaks - p_onsets) / fs
    pr = pr[~np.isnan(pr)]
    
    return np.mean(pr) if len(pr) > 0 else np.nan


In [152]:
def extract_all_features(file_base):
    
    return {
        "heart_rate_mean": get_heart_rate(file_base),
        "rr_std": get_rr_std(file_base),
        "qrs_duration": get_qrs_duration(file_base),
        "qt_interval": get_qt_interval(file_base),
        "qtc_bazett": get_qtc_bazett(file_base),
        "p_duration": get_p_duration(file_base),
        "pr_interval": get_pr_interval(file_base)
    }

In [153]:

def extract(file_base):
    record = wfdb.rdrecord(file_base)
    signal = record.p_signal[:, 0]
    fs = record.fs
    _, processed = nk.ecg_process(signal, sampling_rate=fs)
    
    return type(processed[1])
    

In [None]:
for patient_id, data in patDict.items():
    archivos = data["files"]
    dat_files = [f for f in archivos if f.endswith(".dat")]
    hea_files = [f for f in archivos if f.endswith(".hea")]
    i=0
    for archivo in dat_files:
        basePath = "ECG_Database"
        file_path = os.path.join(basePath, patient_id, archivo)
        file_base = os.path.splitext(file_path)[0]
        features = extract_all_features(file_base)
        print(f"features of ({patient_id}) record ({archivo}):", features)
        

features of (patient001) record (s0010_re.dat): {'heart_rate_mean': np.float64(81.79366481231452), 'rr_std': np.float64(0.00921138823427905), 'qrs_duration': np.float64(0.08950980392156861), 'qt_interval': np.float64(0.39886274509803926), 'qtc_bazett': np.float64(0.46566551783780247), 'p_duration': np.float64(0.08486538461538462), 'pr_interval': np.float64(0.14431372549019608)}
features of (patient001) record (s0014lre.dat): {'heart_rate_mean': np.float64(74.9927829500251), 'rr_std': np.float64(0.743219920511232), 'qrs_duration': np.float64(0.09385975609756096), 'qt_interval': np.float64(0.37481097560975607), 'qtc_bazett': np.float64(0.38079783319749), 'p_duration': np.float64(0.08854545454545454), 'pr_interval': np.float64(0.1511951219512195)}
features of (patient001) record (s0016lre.dat): {'heart_rate_mean': np.float64(72.16485075487367), 'rr_std': np.float64(0.6632020373389688), 'qrs_duration': np.float64(0.0997236842105263), 'qt_interval': np.float64(0.3848874172185431), 'qtc_baze

  warn(


features of (patient009) record (s0035_re.dat): {'heart_rate_mean': np.float64(nan), 'rr_std': np.float64(1.726), 'qrs_duration': np.float64(0.09565909090909092), 'qt_interval': np.float64(0.3468409090909091), 'qtc_bazett': np.float64(0.22172431976644377), 'p_duration': np.float64(0.07057777777777778), 'pr_interval': np.float64(0.09518181818181817)}
features of (patient010) record (s0036lre.dat): {'heart_rate_mean': np.float64(89.43564949448842), 'rr_std': np.float64(0.027576461177472797), 'qrs_duration': np.float64(0.03258064516129033), 'qt_interval': np.float64(0.3243058823529412), 'qtc_bazett': np.float64(0.39557631136120497), 'p_duration': np.float64(0.09440935672514619), 'pr_interval': np.float64(0.1842529411764706)}
features of (patient010) record (s0042lre.dat): {'heart_rate_mean': np.float64(78.89557959869504), 'rr_std': np.float64(0.012844369280825836), 'qrs_duration': np.float64(0.04073999999999999), 'qt_interval': np.float64(0.2992133333333334), 'qtc_bazett': np.float64(0.34