In [1]:
!pip install wfdb
!pip install hrv-analysis
!pip install py-ecg-detectors
!pip install neurokit2

from google.colab import output
output.clear()

print("Packages installed successfully!")

Packages installed successfully!


In [2]:
import pprint
import os
import datetime

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import wfdb
import hrvanalysis
import ecgdetectors
import hrv
import neurokit2 as nk

## Congestive Heart Failure RR Interval Database

https://physionet.org/content/chf2db/1.0.0/

In [3]:
# DOWNLOAD_PATH = "https://physionet.org/content/chf2db/1.0.0/"
# SAVE_DIR = "/content/drive/MyDrive/Projects/HRV/dataset"
# !wget -r -N -c -np $DOWNLOAD_PATH -P $SAVE_DIR

# !wget -r -N -c -np https://physionet.org/files/chf2db/1.0.0/ -P /content/dataset

In [4]:
# CHF2DB_COLAB_DIR = ...
CHF2DB_GDRIVE_DIR = "/content/drive/MyDrive/Projects/HRV/dataset/physionet.org/files/"
!cp -R /content/dataset/physionet.org/files/* $CHF2DB_GDRIVE_DIR

cp: cannot stat '/content/dataset/physionet.org/files/*': No such file or directory


In [5]:
rec = wfdb.get_record_list('chf2db')
print(rec)

['chf201', 'chf202', 'chf203', 'chf204', 'chf205', 'chf206', 'chf207', 'chf208', 'chf209', 'chf210', 'chf211', 'chf212', 'chf213', 'chf214', 'chf215', 'chf216', 'chf217', 'chf218', 'chf219', 'chf220', 'chf221', 'chf222', 'chf223', 'chf224', 'chf225', 'chf226', 'chf227', 'chf228', 'chf229']


In [6]:
# CHF2DB_DIR = "/content/dataset/physionet.org/files/chf2db/1.0.0/"
CHF2DB_DIR = "/content/drive/MyDrive/Projects/HRV/dataset/physionet.org/files/chf2db/1.0.0/"

In [7]:
FS = 128
h = wfdb.rdheader(os.path.join(CHF2DB_DIR, "chf201"))
h.__dict__

{'record_name': 'chf201',
 'n_sig': 0,
 'fs': 128,
 'counter_freq': None,
 'base_counter': None,
 'sig_len': 0,
 'base_time': None,
 'base_date': None,
 'comments': ['Age: 55  Sex: M  NYHA class: III'],
 'sig_name': None,
 'p_signal': None,
 'd_signal': None,
 'e_p_signal': None,
 'e_d_signal': None,
 'file_name': None,
 'fmt': None,
 'samps_per_frame': None,
 'skew': None,
 'byte_offset': None,
 'adc_gain': None,
 'baseline': None,
 'units': None,
 'adc_res': None,
 'adc_zero': None,
 'init_value': None,
 'checksum': None,
 'block_size': None}

In [8]:
ann = wfdb.rdann(os.path.join(CHF2DB_DIR, "chf201"), extension='ecg')
pprint.pprint(ann.__dict__.keys())

dict_keys(['record_name', 'extension', 'sample', 'symbol', 'subtype', 'chan', 'num', 'aux_note', 'fs', 'label_store', 'description', 'custom_labels', 'contained_labels', 'ann_len'])


In [9]:
# ann.__dict__

In [10]:
# for record in wfdb.get_record_list('chf2db'):
#     print(record)
#     ann = wfdb.rdann(os.path.join(CHF2DB_DIR, record), extension='ecg')
#     r_peaks = ann.sample
#     print("  len(sample):", len(r_peaks))

In [11]:
pd.Series(ann.symbol).unique()
# index_wave = list(ann.symbol).index('~')
# index_wave

array(['N', '~', 'A', 'V'], dtype=object)

In [None]:
start_time = datetime.datetime.now()

FS = 128

chf2db = {}
for record in wfdb.get_record_list('chf2db'):
    try:
        output.clear()
        print("Record:", record)
        ann = wfdb.rdann(os.path.join(CHF2DB_DIR, record), extension='ecg')
        record_hea = wfdb.rdheader(os.path.join(CHF2DB_DIR, record))

        r_peaks = np.array(ann.sample, dtype=np.float32)
        annotation = np.array(ann.symbol)

        # counting ectopic beats
        abnormal_beat_num = 0
        for _sym, _count in np.transpose(np.unique(annotation, return_counts=True)):
            if _sym != 'N':
                abnormal_beat_num += int(_count)
            if _sym == 'N':
                normal_beat_num = int(_count)
        normal_beats_ratio = normal_beat_num / (normal_beat_num + abnormal_beat_num)

        # check & replace ectopic beat with np.nan
        for idx, ann in enumerate(annotation):
            if str(ann).upper() != 'N':
                r_peaks[idx] = np.nan
        # interpolate
        r_peaks = hrvanalysis.preprocessing.interpolate_nan_values(r_peaks)

        # retrieve rr_interval from distance between 2 r_peak points
        rri = [r_peaks[i+1] - r_peaks[i] for i in range(len(r_peaks) - 1)]
        # convert the unit from freq_sample to milisecond
        rri = list(np.array(rri) * 1000 / FS)
        print("  len(r_peaks)           :", len(r_peaks))
        print("  len(rri)               :", len(rri))

        rri = hrvanalysis.preprocessing.remove_outliers(
            rri,
            low_rri = 300,
            high_rri = 2000,
        )
        nni = hrvanalysis.preprocessing.interpolate_nan_values(rri)

        # remove nan values if exists
        nni = [val for val in nni if np.isfinite(val)]
        print("  len(nni)               :", len(nni))
        time_domain = hrvanalysis.extract_features.get_time_domain_features(nni)
        print("  time_domain computed!")
        freq_domain = hrvanalysis.extract_features.get_frequency_domain_features(nni)
        print("  freq_domain computed!")
        geom = hrvanalysis.extract_features.get_geometrical_features(nni)
        print("  geometrical features computed!")
        poincare = hrvanalysis.extract_features.get_poincare_plot_features(nni)
        print("  poincare features computed!")
        csi_cvi = hrvanalysis.extract_features.get_csi_cvi_features(nni)
        print("  csi cvi computed!")
        # Function computing the sample entropy of the given data.
        # Must use this function on short term recordings, from 1 minute window.
        if len(nni) <= 200:
            # Ref: https://ieeexplore.ieee.org/document/8295257
            # Sample entropy (SampEn), a popularly used “regularity analysis” tool,
            # has restrictions in handling shortterm segments (largely N ≤ 200) of
            # heart rate variability (HRV) data
            sampen = hrvanalysis.extract_features.get_sampen(nni)
            print("  sampen computed!")
        else:
            print("  nn > 200")
            print("  nn is set to NaN")
            sampen = {"sampen": np.nan}


        chf2db[record] = {
            "id": "chf2db-" + str(record),
            "db_source": "chf2db",
            "age": record_hea.comments[0].split()[1],
            "gender": record_hea.comments[0].split()[3],
            "fs": FS,
            "signal_length": np.nan,
            "recording_time_hours": np.nan,
            "recording_time_seconds": np.nan,
            "normal_beats": normal_beat_num,
            "abnormal_beats": abnormal_beat_num,
            "normal_beats_ratio": normal_beats_ratio,
            "rri_length": len(rri),
            "nni_length": len(nni),
            **time_domain,
            **freq_domain,
            **geom,
            **poincare,
            **csi_cvi,
            **sampen,
            "cardiac_info": " ".join(record_hea.comments[0].split()[4:7]),
            "risk": 1
        }
        print("  RECORD {} SAVED!".format(record))
    except:
        print("\nERROR (%s)\n" %record)
        continue

print(datetime.datetime.now() - start_time)

Record: chf229
  len(r_peaks)           : 126159
  len(rri)               : 126158
1534 outlier(s) have been deleted.
The outlier(s) value(s) are : [296.875, 296.875, 2054.6875, 2054.6875, 2054.6875, 4339.84375, 4339.84375, 257.8125, 257.8125, 2468.75, 2468.75, 14324.21875, 14324.21875, 4261.71875, 4261.71875, 3566.40625, 3566.40625, 2613.28125, 2613.28125, 2445.3125, 2445.3125, 2519.53125, 2519.53125, 3011.71875, 3011.71875, 265.625, 265.625, 2703.125, 2703.125, 2167.96875, 2167.96875, 289.0625, 289.0625, 4652.34375, 4652.34375, 2898.4375, 2898.4375, 2320.3125, 2320.3125, 2320.3125, 2320.3125, 3199.21875, 3199.21875, 10972.65625, 10972.65625, 5468.75, 5468.75, 29460.9375, 29460.9375, 4148.4375, 4148.4375, 16273.4375, 16273.4375, 261.71875, 261.71875, 8734.375, 8734.375, 2007.8125, 2007.8125, 2007.8125, 242.1875, 242.1875, 10007.8125, 10007.8125, 10007.8125, 2140.625, 2140.625, 250.0, 250.0, 2281.25, 2281.25, 2746.09375, 2746.09375, 14640.625, 14640.625, 5460.9375, 5460.9375, 6035.1562

In [None]:
# pprint.pprint(chf2db)

In [None]:
df_chf2db = pd.DataFrame(chf2db).T
df_chf2db

Unnamed: 0,id,db_source,age,gender,fs,signal_length,recording_time_hours,recording_time_seconds,normal_beats,abnormal_beats,normal_beats_ratio,rri_length,nni_length,mean_nni,sdnn,sdsd,nni_50,pnni_50,nni_20,pnni_20,rmssd,median_nni,range_nni,cvsd,cvnni,mean_hr,max_hr,min_hr,std_hr,lf,hf,lf_hf_ratio,lfnu,hfnu,total_power,vlf,triangular_index,tinn,sd1,sd2,ratio_sd2_sd1,csi,cvi,Modified_csi,sampen,cardiac_info,risk
chf201,chf2db-chf201,chf2db,55,M,128,,,,112123,110,0.99902,112232,112232,738.033298,76.121752,21.692007,2313,2.060928,26247,23.386587,21.692007,734.375,910.15625,0.029392,0.103141,82.161982,199.480519,49.548387,8.594888,470.083438,136.09234,3.454151,77.54903,22.45097,1778.57634,1172.400562,22.885808,,15.338633,106.554064,6.946777,6.946777,4.417477,2960.829225,,NYHA class: III,1
chf202,chf2db-chf202,chf2db,59,F,128,,,,109059,468,0.995727,109526,109526,676.160191,92.413169,41.989039,6866,6.268888,34552,31.547135,41.98904,664.0625,1507.8125,0.062099,0.136673,90.358375,199.480519,33.174946,12.45669,979.473459,434.814718,2.252623,69.255579,30.744421,2331.929082,917.640905,22.596658,,29.69087,127.274662,4.28666,4.28666,4.781485,2182.33277,,NYHA class: III,1
chf203,chf2db-chf203,chf2db,68,M,128,,,,98884,1362,0.986413,100245,100245,749.741337,54.108314,37.7568,1428,1.424524,6813,6.796417,37.7568,757.8125,1695.3125,0.05036,0.072169,80.620167,196.923077,30.0,8.854603,433.250158,278.143839,1.557648,60.901576,39.098424,1036.423799,325.029802,10.915179,,26.698223,71.712093,2.686025,2.686025,4.486195,770.481893,,NYHA class: III,1
chf204,chf2db-chf204,chf2db,62,M,128,,,,96320,2561,0.9741,98880,98880,750.324849,60.842073,34.704728,3196,3.232233,22547,22.802617,34.704728,750.0,1535.15625,0.046253,0.081088,80.605518,199.480519,32.680851,8.452262,390.04126,252.693752,1.543533,60.684614,39.315386,1215.684028,572.949016,15.03421,,24.540073,82.469997,3.360626,3.360626,4.510292,1108.603143,,NYHA class: III,1
chf205,chf2db-chf205,chf2db,39,M,128,,,,133482,2774,0.979641,136255,136254,579.681412,41.598262,27.352421,1401,1.028234,14470,10.61995,27.352421,578.125,1605.46875,0.047185,0.071761,103.924801,199.480519,31.47541,6.447751,557.312563,206.019543,2.705144,73.010497,26.989503,1264.851002,501.518896,10.742195,,19.341153,55.558533,2.872555,2.872555,4.235353,638.379848,,NYHA class: III,1
chf206,chf2db-chf206,chf2db,38,F,128,,,,124376,11267,0.916936,135642,135642,554.326831,41.046289,15.614192,526,0.387788,7009,5.167317,15.614192,562.5,1564.941406,0.028168,0.074047,108.900286,164.497992,31.093117,9.095894,97.239002,65.028285,1.495334,59.925204,40.074796,220.825427,58.55814,7.721409,,11.040941,56.988537,5.161565,5.161565,4.002914,1176.600134,,NYHA class: III,1
chf207,chf2db-chf207,chf2db,62,M,128,,,,94562,17168,0.846344,111729,111729,621.319778,83.906504,57.573183,5247,4.696227,14627,13.091615,57.573184,614.583333,1691.40625,0.092663,0.135046,97.993808,199.480519,30.117647,12.298328,3463.477349,999.291739,3.465932,77.608258,22.391742,6220.612084,1757.842995,14.178807,,40.71057,111.459645,2.737855,2.737855,4.860945,1220.641449,,NYHA class: III,1
chf208,chf2db-chf208,chf2db,62,M,128,,,,105427,3851,0.96476,109277,109277,790.89659,54.826941,40.102197,4293,3.928585,21819,19.966873,40.102197,789.0625,1585.9375,0.050705,0.069323,76.281036,187.317073,31.47541,6.583129,503.776357,353.360936,1.425671,58.77429,41.22571,1170.208186,313.070893,11.880518,,28.356665,72.165687,2.544929,2.544929,4.515106,734.626073,,NYHA class: III,1
chf209,chf2db-chf209,chf2db,65,M,128,,,,108808,738,0.993263,109545,109542,617.677431,30.318701,22.830015,369,0.33686,2267,2.069545,22.830015,617.1875,1628.90625,0.036961,0.049085,97.380368,199.480519,31.093117,5.709408,358.547594,116.891054,3.067366,75.414061,24.585939,677.133507,201.694858,5.85473,,16.143332,39.722036,2.460585,2.460585,4.011145,390.95773,,NYHA class: III,1
chf210,chf2db-chf210,chf2db,43,M,128,,,,139696,2328,0.983608,142023,142023,576.219636,24.949494,12.832905,66,0.046472,8118,5.716016,12.832905,578.125,1519.53125,0.022271,0.043299,104.317133,199.480519,32.961373,4.529385,113.655536,26.912084,4.223216,80.854706,19.145294,262.723585,122.155964,6.475902,,9.074266,34.0971,3.75756,3.75756,3.694649,512.487603,,NYHA class: III,1


In [None]:
df_chf2db.to_csv("df_chf2db.csv", index=False)
df_chf2db.to_excel("df_chf2db.xlsx", index=False)

In [None]:
pd.set_option("display.max_columns", None)

In [None]:
pd.read_csv("df_chf2db.csv")

Unnamed: 0,id,db_source,age,gender,fs,signal_length,recording_time_hours,recording_time_seconds,normal_beats,abnormal_beats,normal_beats_ratio,rri_length,nni_length,mean_nni,sdnn,sdsd,nni_50,pnni_50,nni_20,pnni_20,rmssd,median_nni,range_nni,cvsd,cvnni,mean_hr,max_hr,min_hr,std_hr,lf,hf,lf_hf_ratio,lfnu,hfnu,total_power,vlf,triangular_index,tinn,sd1,sd2,ratio_sd2_sd1,csi,cvi,Modified_csi,sampen,cardiac_info,risk
0,chf2db-chf201,chf2db,55,M,128,,,,112123,110,0.99902,112232,112232,738.033298,76.121752,21.692007,2313,2.060928,26247,23.386587,21.692007,734.375,910.15625,0.029392,0.103141,82.161982,199.480519,49.548387,8.594888,470.083438,136.09234,3.454151,77.54903,22.45097,1778.57634,1172.400562,22.885808,,15.338633,106.554064,6.946777,6.946777,4.417477,2960.829225,,NYHA class: III,1
1,chf2db-chf202,chf2db,59,F,128,,,,109059,468,0.995727,109526,109526,676.160191,92.413169,41.989039,6866,6.268888,34552,31.547135,41.98904,664.0625,1507.8125,0.062099,0.136673,90.358375,199.480519,33.174946,12.45669,979.473459,434.814718,2.252623,69.255579,30.744421,2331.929082,917.640905,22.596658,,29.69087,127.274662,4.28666,4.28666,4.781485,2182.33277,,NYHA class: III,1
2,chf2db-chf203,chf2db,68,M,128,,,,98884,1362,0.986413,100245,100245,749.741337,54.108314,37.7568,1428,1.424524,6813,6.796417,37.7568,757.8125,1695.3125,0.05036,0.072169,80.620167,196.923077,30.0,8.854603,433.250158,278.143839,1.557648,60.901576,39.098424,1036.423799,325.029802,10.915179,,26.698223,71.712093,2.686025,2.686025,4.486195,770.481893,,NYHA class: III,1
3,chf2db-chf204,chf2db,62,M,128,,,,96320,2561,0.9741,98880,98880,750.324849,60.842073,34.704728,3196,3.232233,22547,22.802617,34.704728,750.0,1535.15625,0.046253,0.081088,80.605518,199.480519,32.680851,8.452262,390.04126,252.693752,1.543533,60.684614,39.315386,1215.684028,572.949016,15.03421,,24.540073,82.469997,3.360626,3.360626,4.510292,1108.603143,,NYHA class: III,1
4,chf2db-chf205,chf2db,39,M,128,,,,133482,2774,0.979641,136255,136254,579.681412,41.598262,27.352421,1401,1.028234,14470,10.61995,27.352421,578.125,1605.46875,0.047185,0.071761,103.924801,199.480519,31.47541,6.447751,557.312563,206.019543,2.705144,73.010497,26.989503,1264.851002,501.518896,10.742195,,19.341153,55.558533,2.872555,2.872555,4.235353,638.379848,,NYHA class: III,1
5,chf2db-chf206,chf2db,38,F,128,,,,124376,11267,0.916936,135642,135642,554.326831,41.046289,15.614192,526,0.387788,7009,5.167317,15.614192,562.5,1564.941406,0.028168,0.074047,108.900286,164.497992,31.093117,9.095894,97.239002,65.028285,1.495334,59.925204,40.074796,220.825427,58.55814,7.721409,,11.040941,56.988537,5.161565,5.161565,4.002914,1176.600134,,NYHA class: III,1
6,chf2db-chf207,chf2db,62,M,128,,,,94562,17168,0.846344,111729,111729,621.319778,83.906504,57.573183,5247,4.696227,14627,13.091615,57.573184,614.583333,1691.40625,0.092663,0.135046,97.993808,199.480519,30.117647,12.298328,3463.477349,999.291739,3.465932,77.608258,22.391742,6220.612084,1757.842995,14.178807,,40.71057,111.459645,2.737855,2.737855,4.860945,1220.641449,,NYHA class: III,1
7,chf2db-chf208,chf2db,62,M,128,,,,105427,3851,0.96476,109277,109277,790.89659,54.826941,40.102197,4293,3.928585,21819,19.966873,40.102197,789.0625,1585.9375,0.050705,0.069323,76.281036,187.317073,31.47541,6.583129,503.776357,353.360936,1.425671,58.77429,41.22571,1170.208186,313.070893,11.880518,,28.356665,72.165687,2.544929,2.544929,4.515106,734.626073,,NYHA class: III,1
8,chf2db-chf209,chf2db,65,M,128,,,,108808,738,0.993263,109545,109542,617.677431,30.318701,22.830015,369,0.33686,2267,2.069545,22.830015,617.1875,1628.90625,0.036961,0.049085,97.380368,199.480519,31.093117,5.709408,358.547594,116.891054,3.067366,75.414061,24.585939,677.133507,201.694858,5.85473,,16.143332,39.722036,2.460585,2.460585,4.011145,390.95773,,NYHA class: III,1
9,chf2db-chf210,chf2db,43,M,128,,,,139696,2328,0.983608,142023,142023,576.219636,24.949494,12.832905,66,0.046472,8118,5.716016,12.832905,578.125,1519.53125,0.022271,0.043299,104.317133,199.480519,32.961373,4.529385,113.655536,26.912084,4.223216,80.854706,19.145294,262.723585,122.155964,6.475902,,9.074266,34.0971,3.75756,3.75756,3.694649,512.487603,,NYHA class: III,1


In [None]:
pd.read_excel("df_chf2db.xlsx")

Unnamed: 0,id,db_source,age,gender,fs,signal_length,recording_time_hours,recording_time_seconds,normal_beats,abnormal_beats,normal_beats_ratio,rri_length,nni_length,mean_nni,sdnn,sdsd,nni_50,pnni_50,nni_20,pnni_20,rmssd,median_nni,range_nni,cvsd,cvnni,mean_hr,max_hr,min_hr,std_hr,lf,hf,lf_hf_ratio,lfnu,hfnu,total_power,vlf,triangular_index,tinn,sd1,sd2,ratio_sd2_sd1,csi,cvi,Modified_csi,sampen,cardiac_info,risk
0,chf2db-chf201,chf2db,55,M,128,,,,112123,110,0.99902,112232,112232,738.033298,76.121752,21.692007,2313,2.060928,26247,23.386587,21.692007,734.375,910.15625,0.029392,0.103141,82.161982,199.480519,49.548387,8.594888,470.083438,136.09234,3.454151,77.54903,22.45097,1778.57634,1172.400562,22.885808,,15.338633,106.554064,6.946777,6.946777,4.417477,2960.829225,,NYHA class: III,1
1,chf2db-chf202,chf2db,59,F,128,,,,109059,468,0.995727,109526,109526,676.160191,92.413169,41.989039,6866,6.268888,34552,31.547135,41.98904,664.0625,1507.8125,0.062099,0.136673,90.358375,199.480519,33.174946,12.45669,979.473459,434.814718,2.252623,69.255579,30.744421,2331.929082,917.640905,22.596658,,29.69087,127.274662,4.28666,4.28666,4.781485,2182.33277,,NYHA class: III,1
2,chf2db-chf203,chf2db,68,M,128,,,,98884,1362,0.986413,100245,100245,749.741337,54.108314,37.7568,1428,1.424524,6813,6.796417,37.7568,757.8125,1695.3125,0.05036,0.072169,80.620167,196.923077,30.0,8.854603,433.250158,278.143839,1.557648,60.901576,39.098424,1036.423799,325.029802,10.915179,,26.698223,71.712093,2.686025,2.686025,4.486195,770.481893,,NYHA class: III,1
3,chf2db-chf204,chf2db,62,M,128,,,,96320,2561,0.9741,98880,98880,750.324849,60.842073,34.704728,3196,3.232233,22547,22.802617,34.704728,750.0,1535.15625,0.046253,0.081088,80.605518,199.480519,32.680851,8.452262,390.04126,252.693752,1.543533,60.684614,39.315386,1215.684028,572.949016,15.03421,,24.540073,82.469997,3.360626,3.360626,4.510292,1108.603143,,NYHA class: III,1
4,chf2db-chf205,chf2db,39,M,128,,,,133482,2774,0.979641,136255,136254,579.681412,41.598262,27.352421,1401,1.028234,14470,10.61995,27.352421,578.125,1605.46875,0.047185,0.071761,103.924801,199.480519,31.47541,6.447751,557.312563,206.019543,2.705144,73.010497,26.989503,1264.851002,501.518896,10.742195,,19.341153,55.558533,2.872555,2.872555,4.235353,638.379848,,NYHA class: III,1
5,chf2db-chf206,chf2db,38,F,128,,,,124376,11267,0.916936,135642,135642,554.326831,41.046289,15.614192,526,0.387788,7009,5.167317,15.614192,562.5,1564.941406,0.028168,0.074047,108.900286,164.497992,31.093117,9.095894,97.239002,65.028285,1.495334,59.925204,40.074796,220.825427,58.55814,7.721409,,11.040941,56.988537,5.161565,5.161565,4.002914,1176.600134,,NYHA class: III,1
6,chf2db-chf207,chf2db,62,M,128,,,,94562,17168,0.846344,111729,111729,621.319778,83.906504,57.573183,5247,4.696227,14627,13.091615,57.573184,614.583333,1691.40625,0.092663,0.135046,97.993808,199.480519,30.117647,12.298328,3463.477349,999.291739,3.465932,77.608258,22.391742,6220.612084,1757.842995,14.178807,,40.71057,111.459645,2.737855,2.737855,4.860945,1220.641449,,NYHA class: III,1
7,chf2db-chf208,chf2db,62,M,128,,,,105427,3851,0.96476,109277,109277,790.89659,54.826941,40.102197,4293,3.928585,21819,19.966873,40.102197,789.0625,1585.9375,0.050705,0.069323,76.281036,187.317073,31.47541,6.583129,503.776357,353.360936,1.425671,58.77429,41.22571,1170.208186,313.070893,11.880518,,28.356665,72.165687,2.544929,2.544929,4.515106,734.626073,,NYHA class: III,1
8,chf2db-chf209,chf2db,65,M,128,,,,108808,738,0.993263,109545,109542,617.677431,30.318701,22.830015,369,0.33686,2267,2.069545,22.830015,617.1875,1628.90625,0.036961,0.049085,97.380368,199.480519,31.093117,5.709408,358.547594,116.891054,3.067366,75.414061,24.585939,677.133507,201.694858,5.85473,,16.143332,39.722036,2.460585,2.460585,4.011145,390.95773,,NYHA class: III,1
9,chf2db-chf210,chf2db,43,M,128,,,,139696,2328,0.983608,142023,142023,576.219636,24.949494,12.832905,66,0.046472,8118,5.716016,12.832905,578.125,1519.53125,0.022271,0.043299,104.317133,199.480519,32.961373,4.529385,113.655536,26.912084,4.223216,80.854706,19.145294,262.723585,122.155964,6.475902,,9.074266,34.0971,3.75756,3.75756,3.694649,512.487603,,NYHA class: III,1


## Coret2an

In [298]:
def fill_interpolate(RR, t, k, i):
    RR_len = len(RR)
    if k == 0:
        # single nan
        if i == 0:
            # at the beginning
            RR[i] = RR[i+1]
        elif i+1 == RR_len:
            # at the end
            RR[i] = RR[i-1]
    else:
        RR[i] = (RR[t+k+1] - RR[t-1])/(t + k + 1 - (t - 1)) * (i - t + 1) + RR[t-1]
    return RR

def nearest_interpolate(RR):
    RR = np.array(RR)
    nan_indexes = np.argwhere(np.isnan(t_RR))[:, 0]
    print("nan_indexes", nan_indexes)

    consecutive_indexes = []
    ci = []
    for idx, nan_idx in enumerate(nan_indexes):
        if nan_idx == nan_indexes[-1]:
            print("nan_indexes terakhir")
            ci.append(nan_idx)
            print(ci)
            consecutive_indexes.append(ci)
            ci = []
        elif len(ci) == 0 or nan_indexes[idx+1] - nan_idx == 1:
            print("masuk")
            ci.append(nan_idx)
            print(ci)
        elif nan_indexes[idx+1] - nan_idx > 1:
            print("nan_indexes tengah terakhir")
            ci.append(nan_idx)
            print(ci)
            consecutive_indexes.append(ci)
            ci = []
            print("reset")
            print(ci)

    print(consecutive_indexes)
    for ci in consecutive_indexes:
        t = ci[0]
        k = ci[-1] - t
        for i in range(t, t+k+1):
            RR = fill_interpolate(RR, t, k, i)

    # t = nan_indexes[0][0]
    # k = nan_indexes[-1][0] - t
    # print(t, k)
    # for i in nan_indexes:
    #     RR = fill_interpolate(RR, t, k, i[0])

    return RR

In [301]:
t_RR = [100, 200, 150, np.nan, np.nan, np.nan, 350, 100, 100, np.nan, np.nan, 400, 500, np.nan, np.nan]
t_RR = np.array(t_RR)

In [302]:
# first K or last K np.nan still give error
nearest_interpolate(t_RR)

nan_indexes [ 3  4  5  9 10 13 14]
masuk
[3]
masuk
[3, 4]
nan_indexes tengah terakhir
[3, 4, 5]
reset
[]
masuk
[9]
nan_indexes tengah terakhir
[9, 10]
reset
[]
masuk
[13]
nan_indexes terakhir
[13, 14]
[[3, 4, 5], [9, 10], [13, 14]]


IndexError: ignored