In [None]:
!pip install wfdb
!pip install hrv-analysis
!pip install py-ecg-detectors
!pip install neurokit2

from google.colab import output
output.clear()
print("All packages installed successfully!")

All packages installed successfully!


In [None]:
import pprint
import os
import datetime

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import wfdb
import hrvanalysis
import ecgdetectors
import hrv
import neurokit2 as nk

## Congestive Heart Failure RR Interval Database

https://physionet.org/content/chf2db/1.0.0/

In [None]:
# DOWNLOAD_PATH = "https://physionet.org/content/chf2db/1.0.0/"
# SAVE_DIR = "/content/drive/MyDrive/Projects/HRV/dataset"
# !wget -r -N -c -np $DOWNLOAD_PATH -P $SAVE_DIR

# !wget -r -N -c -np https://physionet.org/files/chf2db/1.0.0/ -P /content/dataset

In [None]:
# # CHF2DB_COLAB_DIR = ...
# CHF2DB_GDRIVE_DIR = "/content/drive/MyDrive/Projects/HRV/dataset/physionet.org/files/"
# !cp -R /content/dataset/physionet.org/files/* $CHF2DB_GDRIVE_DIR

In [None]:
rec = wfdb.get_record_list('chf2db')
print(rec)

['chf201', 'chf202', 'chf203', 'chf204', 'chf205', 'chf206', 'chf207', 'chf208', 'chf209', 'chf210', 'chf211', 'chf212', 'chf213', 'chf214', 'chf215', 'chf216', 'chf217', 'chf218', 'chf219', 'chf220', 'chf221', 'chf222', 'chf223', 'chf224', 'chf225', 'chf226', 'chf227', 'chf228', 'chf229']


In [None]:
# CHF2DB_DIR = "/content/dataset/physionet.org/files/chf2db/1.0.0/"
CHF2DB_DIR = "/content/drive/MyDrive/Projects/HRV/dataset/physionet.org/files/chf2db/1.0.0/"

In [None]:
FS = 128
h = wfdb.rdheader(os.path.join(CHF2DB_DIR, "chf201"))
h.__dict__

{'record_name': 'chf201',
 'n_sig': 0,
 'fs': 128,
 'counter_freq': None,
 'base_counter': None,
 'sig_len': 0,
 'base_time': None,
 'base_date': None,
 'comments': ['Age: 55  Sex: M  NYHA class: III'],
 'sig_name': None,
 'p_signal': None,
 'd_signal': None,
 'e_p_signal': None,
 'e_d_signal': None,
 'file_name': None,
 'fmt': None,
 'samps_per_frame': None,
 'skew': None,
 'byte_offset': None,
 'adc_gain': None,
 'baseline': None,
 'units': None,
 'adc_res': None,
 'adc_zero': None,
 'init_value': None,
 'checksum': None,
 'block_size': None}

In [None]:
ann = wfdb.rdann(os.path.join(CHF2DB_DIR, "chf201"), extension='ecg')
pprint.pprint(ann.__dict__.keys())

dict_keys(['record_name', 'extension', 'sample', 'symbol', 'subtype', 'chan', 'num', 'aux_note', 'fs', 'label_store', 'description', 'custom_labels', 'contained_labels', 'ann_len'])


In [None]:
# ann.__dict__

In [None]:
pd.Series(ann.symbol).unique()
# index_wave = list(ann.symbol).index('~')
# index_wave

array(['N', '~', 'A', 'V'], dtype=object)

In [None]:
DATABASE = "chf2db"

start_time = datetime.datetime.now()

FS = 128

MENIT = 5
SIZE_SEC = int(60 * MENIT)
WINDOW_SIZE = SIZE_SEC * FS

END_SEC = int(60 * 120) * FS
# END_SEC = max(r_peaks)

# OVERLAP means percentage of overlap, values between [0, 1)
OVERLAP = 0.5

chf2db = {}
for record in wfdb.get_record_list(DATABASE):
    ann = wfdb.rdann(os.path.join(CHF2DB_DIR, record), extension='ecg')
    record_hea = wfdb.rdheader(os.path.join(CHF2DB_DIR, record))

    r_peaks = np.array(ann.sample, dtype=np.float32)
    annotation = np.array(ann.symbol)

    start_sec = 0
    while start_sec < END_SEC:
        output.clear()
        end_sec = start_sec + WINDOW_SIZE

        print("New Record           :", record)
        print("  Start Sec          : %d" % int(start_sec//FS))
        print("  End Sec            : %d" % int(end_sec//FS))

        cond = (start_sec <= r_peaks) & (r_peaks < end_sec)
        _r_peak = r_peaks[cond]
        _annotation = annotation[cond]

        # counting ectopic beats
        abnormal_beat_num = 0
        for _sym, _count in np.transpose(np.unique(_annotation, return_counts=True)):
            if _sym != 'N':
                abnormal_beat_num += int(_count)
            if _sym == 'N':
                normal_beat_num = int(_count)
        normal_beats_ratio = normal_beat_num / (normal_beat_num + abnormal_beat_num)

        # check & replace ectopic beat with np.nan
        for idx, ann in enumerate(_annotation):
            if str(ann).upper() != 'N':
                _r_peak[idx] = np.nan
        # interpolate
        _r_peak = hrvanalysis.preprocessing.interpolate_nan_values(_r_peak)

        # retrieve rr_interval from distance between 2 r_peak points
        rri = [_r_peak[i+1] - _r_peak[i] for i in range(len(_r_peak) - 1)]
        # convert the unit from freq_sample to milisecond
        rri = list(np.array(rri) * 1000 / FS)
        print("  len(_r_peak)           :", len(_r_peak))
        print("  len(rri)               :", len(rri))

        id = "{0}-{1}-{2}-{3}".format(DATABASE, record, start_sec//FS, end_sec//FS)
        try:
            rri = hrvanalysis.preprocessing.remove_outliers(
                rri,
                low_rri = 300,
                high_rri = 2000,
            )
            nni = hrvanalysis.preprocessing.interpolate_nan_values(rri)

            # remove nan values if exists
            nni = [val for val in nni if np.isfinite(val)]
            print("  len(nni)               :", len(nni))
            time_domain = hrvanalysis.extract_features.get_time_domain_features(nni)
            print("  time_domain computed!")
            freq_domain = hrvanalysis.extract_features.get_frequency_domain_features(nni)
            print("  freq_domain computed!")
            geom = hrvanalysis.extract_features.get_geometrical_features(nni)
            print("  geometrical features computed!")
            poincare = hrvanalysis.extract_features.get_poincare_plot_features(nni)
            print("  poincare features computed!")
            csi_cvi = hrvanalysis.extract_features.get_csi_cvi_features(nni)
            print("  csi cvi computed!")
            # Function computing the sample entropy of the given data.
            # Must use this function on short term recordings, from 1 minute window.
            sampen = hrvanalysis.extract_features.get_sampen(nni)
            print("  sampen computed!")

            chf2db[id] = {
                "id": id,
                "start_secs": start_sec // FS,
                "end_secs": end_sec // FS,
                "db_source": DATABASE,
                "age": record_hea.comments[0].split()[1],
                "gender": record_hea.comments[0].split()[3],
                "fs": FS,
                "signal_length": np.nan,
                "recording_time_hours": np.nan,
                "recording_time_seconds": np.nan,
                "normal_beats": normal_beat_num,
                "abnormal_beats": abnormal_beat_num,
                "normal_beats_ratio": normal_beats_ratio,
                "rri_length": len(rri),
                "nni_length": len(nni),
                **time_domain,
                **freq_domain,
                **geom,
                **poincare,
                **csi_cvi,
                **sampen,
                "cardiac_info": " ".join(record_hea.comments[0].split()[4:7]),
                "risk": 1
            }
            print("  RECORD {} SAVED!\n".format(id))
        except:
            print("  \nERROR at {}\n".format(id))
        finally:
            start_sec = start_sec + int(WINDOW_SIZE * (1 - OVERLAP))

print(datetime.datetime.now() - start_time)

New Record           : chf229
  Start Sec          : 7050
  End Sec            : 7350
  len(_r_peak)           : 453
  len(rri)               : 452
6 outlier(s) have been deleted.
The outlier(s) value(s) are : [2503.90625, 2503.90625, 2789.0625, 2789.0625, 5539.0625, 5539.0625]
  len(nni)               : 452
  time_domain computed!
  freq_domain computed!
  geometrical features computed!
  poincare features computed!
  csi cvi computed!
  sampen computed!
  RECORD chf2db-chf229-7050-7350 SAVED!

0:03:26.096599


In [None]:
# pprint.pprint(chf2db)

In [None]:
df_chf2db = pd.DataFrame(chf2db).T
df_chf2db

Unnamed: 0,id,start_secs,end_secs,db_source,age,gender,fs,signal_length,recording_time_hours,recording_time_seconds,...,tinn,sd1,sd2,ratio_sd2_sd1,csi,cvi,Modified_csi,sampen,cardiac_info,risk
chf2db-chf201-0-300,chf2db-chf201-0-300,0,300,chf2db,55,M,128,,,,...,,18.171992,94.332966,5.191119,5.191119,4.438186,1958.77451,1.246316,NYHA class: III,1
chf2db-chf201-150-450,chf2db-chf201-150-450,150,450,chf2db,55,M,128,,,,...,,19.958477,124.257946,6.225823,6.225823,4.598572,3094.431998,0.561756,NYHA class: III,1
chf2db-chf201-300-600,chf2db-chf201-300-600,300,600,chf2db,55,M,128,,,,...,,17.33371,94.146,5.431382,5.431382,4.416813,2045.371561,0.834986,NYHA class: III,1
chf2db-chf201-450-750,chf2db-chf201-450-750,450,750,chf2db,55,M,128,,,,...,,13.399354,93.62172,6.987032,6.987032,4.30258,2616.551895,0.859493,NYHA class: III,1
chf2db-chf201-600-900,chf2db-chf201-600-900,600,900,chf2db,55,M,128,,,,...,,16.660719,102.970607,6.180442,6.180442,4.438527,2545.615406,0.769901,NYHA class: III,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
chf2db-chf229-6450-6750,chf2db-chf229-6450-6750,6450,6750,chf2db,58,?,128,,,,...,,7.871407,34.63226,4.399755,4.399755,3.639653,609.4938,1.402189,NYHA class: III,1
chf2db-chf229-6600-6900,chf2db-chf229-6600-6900,6600,6900,chf2db,58,?,128,,,,...,,9.959363,60.031112,6.027606,6.027606,3.980728,1447.375543,0.582688,NYHA class: III,1
chf2db-chf229-6750-7050,chf2db-chf229-6750-7050,6750,7050,chf2db,58,?,128,,,,...,,16.940619,56.689608,3.346372,3.346372,4.186553,758.817994,0.864019,NYHA class: III,1
chf2db-chf229-6900-7200,chf2db-chf229-6900-7200,6900,7200,chf2db,58,?,128,,,,...,,22.023742,57.645395,2.617421,2.617421,4.307776,603.528959,0.864587,NYHA class: III,1


In [None]:
df_chf2db.to_csv("df_chf2db.csv", index=False)
df_chf2db.to_excel("df_chf2db.xlsx", index=False)

In [None]:
pd.set_option("display.max_columns", None)

In [None]:
pd.read_csv("df_chf2db.csv")

Unnamed: 0,id,start_secs,end_secs,db_source,age,gender,fs,signal_length,recording_time_hours,recording_time_seconds,normal_beats,abnormal_beats,normal_beats_ratio,rri_length,nni_length,mean_nni,sdnn,sdsd,nni_50,pnni_50,nni_20,pnni_20,rmssd,median_nni,range_nni,cvsd,cvnni,mean_hr,max_hr,min_hr,std_hr,lf,hf,lf_hf_ratio,lfnu,hfnu,total_power,vlf,triangular_index,tinn,sd1,sd2,ratio_sd2_sd1,csi,cvi,Modified_csi,sampen,cardiac_info,risk
0,chf2db-chf201-0-300,0,300,chf2db,55,M,128,,,,384,0,1.000000,383,383,766.114556,67.929853,25.665417,14,3.664921,159,41.623037,25.666403,757.8125,343.75000,0.033502,0.088668,78.914734,93.658537,60.952381,6.797715,630.191978,214.774049,2.934209,74.581931,25.418069,3026.539198,2181.573171,15.958333,,18.171992,94.332966,5.191119,5.191119,4.438186,1958.774510,1.246316,NYHA class: III,1
1,chf2db-chf201-150-450,150,450,chf2db,55,M,128,,,,413,1,0.997585,413,413,724.916768,88.989825,28.191273,8,1.941748,103,25.000000,28.193340,718.7500,683.59375,0.038892,0.122759,84.174602,199.480519,60.952381,12.401861,381.593560,162.018998,2.355240,70.195869,29.804131,2460.030190,1916.417632,15.884615,,19.958477,124.257946,6.225823,6.225823,4.598572,3094.431998,0.561756,NYHA class: III,1
2,chf2db-chf201-300-600,300,600,chf2db,55,M,128,,,,456,1,0.997812,456,456,656.798246,67.690202,24.486615,2,0.439560,88,19.340659,24.493988,656.2500,558.59375,0.037293,0.103061,92.440962,199.480519,69.818182,11.223436,325.763601,111.793355,2.913980,74.450559,25.549441,1137.888960,700.332003,15.724138,,17.333710,94.146000,5.431382,5.431382,4.416813,2045.371561,0.834986,NYHA class: III,1
3,chf2db-chf201-450-750,450,750,chf2db,55,M,128,,,,457,0,1.000000,456,456,656.678317,66.875142,18.928713,7,1.538462,111,24.395604,18.928721,648.4375,382.81250,0.028825,0.101839,92.248818,109.714286,64.537815,8.747404,750.100002,136.731204,5.485946,84.582049,15.417951,2037.571914,1150.740709,14.250000,,13.399354,93.621720,6.987032,6.987032,4.302580,2616.551895,0.859493,NYHA class: III,1
4,chf2db-chf201-600-900,600,900,chf2db,55,M,128,,,,447,1,0.997768,447,447,669.934983,73.758137,23.535386,16,3.587444,144,32.286996,23.539460,664.0625,382.81250,0.035137,0.110097,90.583912,109.714286,64.537815,9.384220,613.239473,113.597229,5.398366,84.371011,15.628989,2340.402865,1613.566162,13.147059,,16.660719,102.970607,6.180442,6.180442,4.438527,2545.615406,0.769901,NYHA class: III,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1330,chf2db-chf229-6450-6750,6450,6750,chf2db,58,?,128,,,,523,3,0.994297,525,525,557.127976,25.113268,11.121223,0,0.000000,34,6.488550,11.121223,554.6875,140.62500,0.019962,0.045076,107.904586,116.363636,91.428571,4.660659,210.415347,16.150012,13.028805,92.871809,7.128191,498.584737,272.019379,6.730769,,7.871407,34.632260,4.399755,4.399755,3.639653,609.493800,1.402189,NYHA class: III,1
1331,chf2db-chf229-6600-6900,6600,6900,chf2db,58,?,128,,,,504,3,0.994083,506,506,584.563365,43.028615,14.070714,1,0.198020,81,16.039604,14.072380,570.3125,179.68750,0.024073,0.073608,103.176968,116.363636,86.292135,7.308964,117.211908,24.236592,4.836155,82.865430,17.134570,522.204767,380.756266,9.730769,,9.959363,60.031112,6.027606,6.027606,3.980728,1447.375543,0.582688,NYHA class: III,1
1332,chf2db-chf229-6750-7050,6750,7050,chf2db,58,?,128,,,,463,8,0.983015,470,470,622.190824,41.837162,23.932099,5,1.066098,105,22.388060,23.932470,632.8125,277.34375,0.038465,0.067242,96.877818,114.626866,74.926829,6.650656,356.503152,193.732462,1.840183,64.790999,35.209001,1123.165779,572.930165,8.545455,,16.940619,56.689608,3.346372,3.346372,4.186553,758.817994,0.864019,NYHA class: III,1
1333,chf2db-chf229-6900-7200,6900,7200,chf2db,58,?,128,,,,438,10,0.977679,447,447,622.221057,43.635059,31.111338,13,2.914798,98,21.973094,31.111383,625.0000,480.46875,0.050001,0.070128,97.004202,187.317073,74.926829,8.636140,593.366560,442.117503,1.342101,57.303302,42.696698,1459.118722,423.634658,9.717391,,22.023742,57.645395,2.617421,2.617421,4.307776,603.528959,0.864587,NYHA class: III,1


In [None]:
pd.read_excel("df_chf2db.xlsx")

Unnamed: 0,id,start_secs,end_secs,db_source,age,gender,fs,signal_length,recording_time_hours,recording_time_seconds,normal_beats,abnormal_beats,normal_beats_ratio,rri_length,nni_length,mean_nni,sdnn,sdsd,nni_50,pnni_50,nni_20,pnni_20,rmssd,median_nni,range_nni,cvsd,cvnni,mean_hr,max_hr,min_hr,std_hr,lf,hf,lf_hf_ratio,lfnu,hfnu,total_power,vlf,triangular_index,tinn,sd1,sd2,ratio_sd2_sd1,csi,cvi,Modified_csi,sampen,cardiac_info,risk
0,chf2db-chf201-0-300,0,300,chf2db,55,M,128,,,,384,0,1.000000,383,383,766.114556,67.929853,25.665417,14,3.664921,159,41.623037,25.666403,757.8125,343.75000,0.033502,0.088668,78.914734,93.658537,60.952381,6.797715,630.191978,214.774049,2.934209,74.581931,25.418069,3026.539198,2181.573171,15.958333,,18.171992,94.332966,5.191119,5.191119,4.438186,1958.774510,1.246316,NYHA class: III,1
1,chf2db-chf201-150-450,150,450,chf2db,55,M,128,,,,413,1,0.997585,413,413,724.916768,88.989825,28.191273,8,1.941748,103,25.000000,28.193340,718.7500,683.59375,0.038892,0.122759,84.174602,199.480519,60.952381,12.401861,381.593560,162.018998,2.355240,70.195869,29.804131,2460.030190,1916.417632,15.884615,,19.958477,124.257946,6.225823,6.225823,4.598572,3094.431998,0.561756,NYHA class: III,1
2,chf2db-chf201-300-600,300,600,chf2db,55,M,128,,,,456,1,0.997812,456,456,656.798246,67.690202,24.486615,2,0.439560,88,19.340659,24.493988,656.2500,558.59375,0.037293,0.103061,92.440962,199.480519,69.818182,11.223436,325.763601,111.793355,2.913980,74.450559,25.549441,1137.888960,700.332003,15.724138,,17.333710,94.146000,5.431382,5.431382,4.416813,2045.371561,0.834986,NYHA class: III,1
3,chf2db-chf201-450-750,450,750,chf2db,55,M,128,,,,457,0,1.000000,456,456,656.678317,66.875142,18.928713,7,1.538462,111,24.395604,18.928721,648.4375,382.81250,0.028825,0.101839,92.248818,109.714286,64.537815,8.747404,750.100002,136.731204,5.485946,84.582049,15.417951,2037.571914,1150.740709,14.250000,,13.399354,93.621720,6.987032,6.987032,4.302580,2616.551895,0.859493,NYHA class: III,1
4,chf2db-chf201-600-900,600,900,chf2db,55,M,128,,,,447,1,0.997768,447,447,669.934983,73.758137,23.535386,16,3.587444,144,32.286996,23.539460,664.0625,382.81250,0.035137,0.110097,90.583912,109.714286,64.537815,9.384220,613.239473,113.597229,5.398366,84.371011,15.628989,2340.402865,1613.566162,13.147059,,16.660719,102.970607,6.180442,6.180442,4.438527,2545.615406,0.769901,NYHA class: III,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1330,chf2db-chf229-6450-6750,6450,6750,chf2db,58,?,128,,,,523,3,0.994297,525,525,557.127976,25.113268,11.121223,0,0.000000,34,6.488550,11.121223,554.6875,140.62500,0.019962,0.045076,107.904586,116.363636,91.428571,4.660659,210.415347,16.150012,13.028805,92.871809,7.128191,498.584737,272.019379,6.730769,,7.871407,34.632260,4.399755,4.399755,3.639653,609.493800,1.402189,NYHA class: III,1
1331,chf2db-chf229-6600-6900,6600,6900,chf2db,58,?,128,,,,504,3,0.994083,506,506,584.563365,43.028615,14.070714,1,0.198020,81,16.039604,14.072380,570.3125,179.68750,0.024073,0.073608,103.176968,116.363636,86.292135,7.308964,117.211908,24.236592,4.836155,82.865430,17.134570,522.204767,380.756266,9.730769,,9.959363,60.031112,6.027606,6.027606,3.980728,1447.375543,0.582688,NYHA class: III,1
1332,chf2db-chf229-6750-7050,6750,7050,chf2db,58,?,128,,,,463,8,0.983015,470,470,622.190824,41.837162,23.932099,5,1.066098,105,22.388060,23.932470,632.8125,277.34375,0.038465,0.067242,96.877818,114.626866,74.926829,6.650656,356.503152,193.732462,1.840183,64.790999,35.209001,1123.165779,572.930165,8.545455,,16.940619,56.689608,3.346372,3.346372,4.186553,758.817994,0.864019,NYHA class: III,1
1333,chf2db-chf229-6900-7200,6900,7200,chf2db,58,?,128,,,,438,10,0.977679,447,447,622.221057,43.635059,31.111338,13,2.914798,98,21.973094,31.111383,625.0000,480.46875,0.050001,0.070128,97.004202,187.317073,74.926829,8.636140,593.366560,442.117503,1.342101,57.303302,42.696698,1459.118722,423.634658,9.717391,,22.023742,57.645395,2.617421,2.617421,4.307776,603.528959,0.864587,NYHA class: III,1
