### Importing the Libraries

In [1]:
import os
import csv
import sys
import math
import wfdb
import numpy as np
import pandas as pd
import biosppy.signals.ecg as ecg



### Importing the Dataset

In [2]:
dataset_path = 'dataset/WFDBRecords'
wfdb_records_dir = os.path.join(os.getcwd(), dataset_path)

In [3]:
condition_names_df = pd.read_csv('dataset/ConditionNames_SNOMED-CT.csv')

### Extracted Features Names

In [4]:
features = ['rpeaks', 'filtered', 'templates', 'heart_rates', 'heart_rate_ts','age', 'gender', 'class']
with open('features.csv', 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(features)

### Noise Removal Function

In [5]:
def NLM_1dDarbon(signal, Nvar, P, PatchHW):
    if isinstance(P, int):
        P = P - 1
        Pvec = np.array(range(-P, P + 1))
    else:
        Pvec = P
    signal = np.array(signal)
    N = len(signal)
    denoised_sig = np.empty_like(signal)
    denoised_sig[:] = np.nan
    i_start = PatchHW + 1
    i_end = N - PatchHW
    denoised_sig[i_start: i_end] = 0
    Z = np.zeros_like(signal)
    cnt = np.zeros_like(signal)
    Npatch = 2 * PatchHW + 1
    h = 2 * Npatch * Nvar ** 2
    for idx in Pvec:
        k = np.arange(N)
        kplus = k + idx
        igood = np.where((kplus >= 0) & (kplus < N))
        SSD = np.zeros_like(k)
        SSD[igood] = (signal[k[igood]] - signal[kplus[igood]]) ** 2
        Sdx = np.cumsum(SSD)
        for ii in range(i_start, i_end):
            distance = Sdx[ii + PatchHW] - Sdx[ii - PatchHW - 1]
            w = math.exp(-distance / h)
            t = ii + idx
            if 0 <= t < N:
                denoised_sig[ii] += w * signal[t]
                Z[ii] += w
    denoised_sig = denoised_sig / (Z + sys.float_info.epsilon)
    denoised_sig[0: PatchHW + 1] = signal[0: PatchHW + 1]
    denoised_sig[-PatchHW:] = signal[-PatchHW:]
    return denoised_sig

Nvar = 1
P = 5
PatchHW = 2

### Feature Extraction

In [7]:
record_counter = 0
for level1_folder in os.listdir(wfdb_records_dir):
    level1_folder_path = os.path.join(wfdb_records_dir, level1_folder)
    for subfolder in os.listdir(level1_folder_path):
        subfolder_path = os.path.join(level1_folder_path, subfolder)
        with open(subfolder_path+'/RECORDS', 'r') as file:
            records_list = file.read().splitlines()
        for record_name in records_list:
            record_path = os.path.join(subfolder_path, record_name)
            signal, meta = wfdb.rdsamp(record_path)
            denoised_ecg_signal = np.zeros_like(signal)
            for lead in range(signal.shape[1]):
                denoised_ecg_signal[:, lead] = NLM_1dDarbon(signal[:, lead], Nvar, P, PatchHW)
            rpeaks = []
            filtered = []
            templates = []
            heart_rates = []
            heart_rate_ts = []
            for lead_idx in range(denoised_ecg_signal.shape[1]):
                ecg_object = ecg.ecg(
                    signal=denoised_ecg_signal[:, lead_idx], sampling_rate=500, show=False)
                values = []
                rpeaks.append(np.mean(ecg_object['rpeaks']))
                filtered.append(np.mean(ecg_object['filtered']))
                templates.append(np.mean(ecg_object['templates']))
                heart_rates.append(np.mean(ecg_object['heart_rate']))
                heart_rate_ts.append(np.mean(ecg_object['heart_rate_ts']))
            avg_rpeaks = np.mean(rpeaks)
            values.append(avg_rpeaks)
            avg_filtered = np.mean(filtered)
            values.append(avg_filtered)
            avg_templates = np.mean(templates)
            values.append(avg_templates)
            avg_heart_rate = np.mean(heart_rates)
            values.append(avg_heart_rate)
            avg_heart_rate_ts = np.mean(heart_rate_ts)
            values.append(np.mean(heart_rate_ts))
            classes = meta['comments'][2].split(':')[1].split(',')
            classes = [x.strip() for x in classes]
            age = meta['comments'][0].split(':')[1].strip()
            gender = meta['comments'][1].split(':')[1].strip()
            class_ = ''
            record_counter+=1
            for i in range(len(classes)):
                if int(classes[i]) in condition_names_df.iloc[:, -1].values:
                    class_ = condition_names_df.iloc[condition_names_df[condition_names_df.iloc[:, -1] == int(classes[i])].index[0], 0]
                    break
                else:
                    class_ = None
            values.append(age)
            values.append(gender)
            values.append(class_)
            print(record_counter,values)
            with open('features.csv', 'a', newline='') as file:
                writer = csv.writer(file)
                writer.writerow(values)

/home/saad-bscs/Downloads/ds3/dataset/WFDBRecords/31/313/JS30700.hea
1 [2386.0, 7.105427357601002e-19, 0.018900819675007553, 59.16911390816679, 5.275333333333333, '56', 'Male', 'SB']
/home/saad-bscs/Downloads/ds3/dataset/WFDBRecords/31/313/JS30701.hea
2 [2369.777777777778, 1.2434497875801753e-18, 0.014606982217250875, 58.86751051939105, 5.24625, '45', 'Female', 'SB']
/home/saad-bscs/Downloads/ds3/dataset/WFDBRecords/31/313/JS30702.hea
3 [2517.6666666666665, 7.105427357601002e-19, 0.006054984492492223, 57.470428752746, 5.558, '74', 'Male', 'SB']
/home/saad-bscs/Downloads/ds3/dataset/WFDBRecords/31/313/JS30703.hea
4 [2546.3333333333335, 1.4210854715202004e-18, 0.01115819284639538, 57.30312569023954, 5.618, '54', 'Female', 'SB']
/home/saad-bscs/Downloads/ds3/dataset/WFDBRecords/31/313/JS30704.hea
5 [2432.75, 7.993605777301127e-19, 0.009199711110040984, 56.327813957083514, 5.401142857142857, '80', 'Male', 'SB']
/home/saad-bscs/Downloads/ds3/dataset/WFDBRecords/31/313/JS30705.hea
6 [2443.55