## Import Packages

In [1]:
!pip install wfdb
!pip install hrv-analysis
!pip install py-ecg-detectors
!pip install neurokit2

Collecting wfdb
  Downloading wfdb-4.1.2-py3-none-any.whl (159 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/160.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m160.0/160.0 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: wfdb
Successfully installed wfdb-4.1.2
Collecting hrv-analysis
  Downloading hrv_analysis-1.0.4-py3-none-any.whl (28 kB)
Collecting nolds>=0.4.1 (from hrv-analysis)
  Downloading nolds-0.5.2-py2.py3-none-any.whl (39 kB)
Installing collected packages: nolds, hrv-analysis
Successfully installed hrv-analysis-1.0.4 nolds-0.5.2
Collecting py-ecg-detectors
  Downloading py_ecg_detectors-1.3.4-py3-none-any.whl (25 kB)
Collecting gatspy (from py-ecg-detectors)
  Downloading gatspy-0.3.tar.gz (554 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m554.5/554.5 kB[0m [31m31.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25

In [1]:
import pprint
import os
import datetime

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import wfdb
import hrvanalysis
import ecgdetectors
import hrv
import neurokit2 as nk

## RR interval time series from healthy subjects

https://physionet.org/content/rr-interval-healthy-subjects/1.0.0/

In [3]:
# !wget -r -N -c -np https://physionet.org/files/rr-interval-healthy-subjects/1.0.0/ -P "/content/drive/MyDrive/Projects/HRV/dataset"

In [2]:
healthy_DIR = "/content/drive/MyDrive/Projects/HRV/dataset/physionet.org/files/rr-interval-healthy-subjects/1.0.0/"

In [3]:
healthy_db_clinical_info = pd.read_csv(os.path.join(healthy_DIR, "patient-info.csv")).set_index("File")

healthy_db_clinical_info

Unnamed: 0_level_0,Age (years),Gender
File,Unnamed: 1_level_1,Unnamed: 2_level_1
0,53.00,M
2,17.00,F
3,46.00,F
5,38.00,F
6,32.00,M
...,...,...
4118,5.00,F
4119,0.67,F
4120,4.50,M
4122,5.92,F


In [4]:
start_time = datetime.datetime.now()

healthy_db = {}
for record in os.listdir(healthy_DIR):
    try:
        if ".txt" not in record:
            continue
        print("Record:", record)
        record_path = os.path.join(healthy_DIR, record)
        with open(record_path, "r") as f:
            _rri = f.readlines()
        rri = []
        for i in range(len(_rri)):
            try:
                rri.append(int(_rri[i].strip('\n')))
            except:
                continue

        print("  len(rri)     :", len(rri))
        nni = hrvanalysis.get_nn_intervals(rri)
        time_domain = hrvanalysis.extract_features.get_time_domain_features(nni)
        freq_domain = hrvanalysis.extract_features.get_frequency_domain_features(nni)
        index = record.strip('.txt')
        healthy_db[index] = {
            "id": "rr_interval_healthy_subjects-" + str(index),
            "db_source": "rr-interval-healthy-subjects",
            "age": healthy_db_clinical_info.loc[int(index), "Age (years)"],
            "gender": healthy_db_clinical_info.loc[int(index), "Gender"],
            "fs": "?",
            "signal_length": "?",
            "recording_time_hours": "?",
            "recording_time_seconds": "?",
            "rri_length": len(rri),
            "nni_length": len(nni),
            **time_domain,
            **freq_domain,
            "cardiac_info": "Healthy",
            "risk": 0
        }
    except:
        print("\nERROR (%s)\n" %record)
        continue

print(datetime.datetime.now() - start_time)

Record: 419.txt
  len(rri)     : 89418
0 outlier(s) have been deleted.
739 ectopic beat(s) have been deleted with kamath rule.
Record: 4087.txt
  len(rri)     : 197369
239 outlier(s) have been deleted.
The outlier(s) value(s) are : [273, 242, 243, 273, 258, 258, 211, 297, 289, 289, 297, 297, 289, 297, 297, 297, 289, 164, 266, 211, 289, 203, 227, 227, 250, 125, 156, 297, 297, 234, 281, 274, 281, 281, 281, 132, 289, 282, 273, 289, 274, 266, 273, 297, 297, 289, 266, 297, 282, 266, 289, 290, 289, 281, 289, 297, 289, 297, 282, 289, 297, 297, 289, 289, 289, 289, 289, 297, 289, 289, 297, 289, 289, 289, 297, 281, 297, 297, 296, 297, 289, 297, 289, 289, 281, 297, 289, 297, 297, 297, 297, 297, 296, 266, 219, 282, 218, 290, 297, 297, 297, 296, 297, 297, 297, 297, 297, 297, 297, 297, 297, 290, 296, 289, 289, 297, 281, 273, 296, 297, 289, 297, 297, 273, 297, 297, 297, 297, 296, 297, 289, 297, 289, 289, 297, 297, 297, 297, 297, 296, 289, 297, 297, 296, 281, 297, 297, 282, 281, 289, 297, 289, 258, 29

  series_rr_intervals_cleaned = pd.Series(rr_intervals)


In [5]:
pprint.pprint(healthy_db)

{'000': {'age': 53.0,
         'cardiac_info': 'Healthy',
         'cvnni': 0.17366862718554654,
         'cvsd': 0.051270767561768904,
         'db_source': 'rr-interval-healthy-subjects',
         'fs': '?',
         'gender': 'M',
         'hf': 305.7844963594148,
         'hfnu': 22.975540099553253,
         'id': 'rr_interval_healthy_subjects-000',
         'lf': 1025.128705395353,
         'lf_hf_ratio': 3.3524548091883353,
         'lfnu': 77.02445990044676,
         'max_hr': 153.84615384615384,
         'mean_hr': 65.91003442457263,
         'mean_nni': 943.7062940540271,
         'median_nni': 976.0,
         'min_hr': 39.787798408488065,
         'nni_20': 53059,
         'nni_50': 21558,
         'nni_length': 80441,
         'pnni_20': 65.960964694182,
         'pnni_50': 26.800099453008453,
         'range_nni': 1118.0,
         'recording_time_hours': '?',
         'recording_time_seconds': '?',
         'risk': 0,
         'rmssd': 48.38454604902236,
         'rri_lengt

In [6]:
df_healthy = pd.DataFrame(healthy_db).T
df_healthy

Unnamed: 0,id,db_source,age,gender,fs,signal_length,recording_time_hours,recording_time_seconds,rri_length,nni_length,...,std_hr,lf,hf,lf_hf_ratio,lfnu,hfnu,total_power,vlf,cardiac_info,risk
419,rr_interval_healthy_subjects-419,rr-interval-healthy-subjects,14.0,M,?,?,?,?,89418,89418,...,17.459796,2351.490374,2188.112976,1.074666,51.799468,48.200532,7142.191201,2602.58785,Healthy,0
4087,rr_interval_healthy_subjects-4087,rr-interval-healthy-subjects,0.17,F,?,?,?,?,197369,197369,...,21.458721,356.783198,68.039608,5.243757,83.984003,16.015997,761.568964,336.746158,Healthy,0
4075,rr_interval_healthy_subjects-4075,rr-interval-healthy-subjects,1.67,M,?,?,?,?,100139,100139,...,17.35694,331.230711,152.158099,2.176885,68.522627,31.477373,830.071429,346.682619,Healthy,0
4015,rr_interval_healthy_subjects-4015,rr-interval-healthy-subjects,1.17,F,?,?,?,?,185500,185500,...,14.168282,185.927794,93.161429,1.995759,66.619482,33.380518,439.948043,160.85882,Healthy,0
4064,rr_interval_healthy_subjects-4064,rr-interval-healthy-subjects,0.083,M,?,?,?,?,168950,168950,...,18.408994,1009.124691,580.315238,1.738925,63.489326,36.510674,2134.36547,544.925541,Healthy,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4035,rr_interval_healthy_subjects-4035,rr-interval-healthy-subjects,0.42,F,?,?,?,?,197505,197505,...,14.975721,158.334119,40.187222,3.939912,79.756725,20.243275,396.952689,198.431349,Healthy,0
4046,rr_interval_healthy_subjects-4046,rr-interval-healthy-subjects,2.83,M,?,?,?,?,157528,157528,...,18.334455,482.686632,267.241031,1.806185,64.364426,35.635574,1208.347847,458.420184,Healthy,0
4066,rr_interval_healthy_subjects-4066,rr-interval-healthy-subjects,0.67,M,?,?,?,?,73180,73180,...,14.683994,252.747854,116.131105,2.176401,68.517829,31.482171,579.216754,210.337795,Healthy,0
4059,rr_interval_healthy_subjects-4059,rr-interval-healthy-subjects,0.25,M,?,?,?,?,190081,190081,...,14.564847,226.25215,50.95042,4.440634,81.619788,18.380212,484.145944,206.943374,Healthy,0


In [7]:
df_healthy.to_csv("df_healthy_rri.csv", index=False)
df_healthy.to_excel("df_healthy_rri.xlsx", index=False)

In [8]:
pd.read_csv("df_healthy_rri.csv")

Unnamed: 0,id,db_source,age,gender,fs,signal_length,recording_time_hours,recording_time_seconds,rri_length,nni_length,...,std_hr,lf,hf,lf_hf_ratio,lfnu,hfnu,total_power,vlf,cardiac_info,risk
0,rr_interval_healthy_subjects-419,rr-interval-healthy-subjects,14.000,M,?,?,?,?,89418,89418,...,17.459796,2351.490374,2188.112976,1.074666,51.799468,48.200532,7142.191201,2602.587850,Healthy,0
1,rr_interval_healthy_subjects-4087,rr-interval-healthy-subjects,0.170,F,?,?,?,?,197369,197369,...,21.458721,356.783198,68.039608,5.243757,83.984003,16.015997,761.568964,336.746158,Healthy,0
2,rr_interval_healthy_subjects-4075,rr-interval-healthy-subjects,1.670,M,?,?,?,?,100139,100139,...,17.356940,331.230711,152.158099,2.176885,68.522627,31.477373,830.071429,346.682619,Healthy,0
3,rr_interval_healthy_subjects-4015,rr-interval-healthy-subjects,1.170,F,?,?,?,?,185500,185500,...,14.168282,185.927794,93.161429,1.995759,66.619482,33.380518,439.948043,160.858820,Healthy,0
4,rr_interval_healthy_subjects-4064,rr-interval-healthy-subjects,0.083,M,?,?,?,?,168950,168950,...,18.408994,1009.124691,580.315238,1.738925,63.489326,36.510674,2134.365470,544.925541,Healthy,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,rr_interval_healthy_subjects-4035,rr-interval-healthy-subjects,0.420,F,?,?,?,?,197505,197505,...,14.975721,158.334119,40.187222,3.939912,79.756725,20.243275,396.952689,198.431349,Healthy,0
132,rr_interval_healthy_subjects-4046,rr-interval-healthy-subjects,2.830,M,?,?,?,?,157528,157528,...,18.334455,482.686632,267.241031,1.806185,64.364426,35.635574,1208.347847,458.420184,Healthy,0
133,rr_interval_healthy_subjects-4066,rr-interval-healthy-subjects,0.670,M,?,?,?,?,73180,73180,...,14.683994,252.747854,116.131105,2.176401,68.517829,31.482171,579.216754,210.337795,Healthy,0
134,rr_interval_healthy_subjects-4059,rr-interval-healthy-subjects,0.250,M,?,?,?,?,190081,190081,...,14.564847,226.252150,50.950420,4.440634,81.619788,18.380212,484.145944,206.943374,Healthy,0


In [9]:
pd.read_excel("df_healthy_rri.xlsx")

Unnamed: 0,id,db_source,age,gender,fs,signal_length,recording_time_hours,recording_time_seconds,rri_length,nni_length,...,std_hr,lf,hf,lf_hf_ratio,lfnu,hfnu,total_power,vlf,cardiac_info,risk
0,rr_interval_healthy_subjects-419,rr-interval-healthy-subjects,14.000,M,?,?,?,?,89418,89418,...,17.459796,2351.490374,2188.112976,1.074666,51.799468,48.200532,7142.191201,2602.587850,Healthy,0
1,rr_interval_healthy_subjects-4087,rr-interval-healthy-subjects,0.170,F,?,?,?,?,197369,197369,...,21.458721,356.783198,68.039608,5.243757,83.984003,16.015997,761.568964,336.746158,Healthy,0
2,rr_interval_healthy_subjects-4075,rr-interval-healthy-subjects,1.670,M,?,?,?,?,100139,100139,...,17.356940,331.230711,152.158099,2.176885,68.522627,31.477373,830.071429,346.682619,Healthy,0
3,rr_interval_healthy_subjects-4015,rr-interval-healthy-subjects,1.170,F,?,?,?,?,185500,185500,...,14.168282,185.927794,93.161429,1.995759,66.619482,33.380518,439.948043,160.858820,Healthy,0
4,rr_interval_healthy_subjects-4064,rr-interval-healthy-subjects,0.083,M,?,?,?,?,168950,168950,...,18.408994,1009.124691,580.315238,1.738925,63.489326,36.510674,2134.365470,544.925541,Healthy,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,rr_interval_healthy_subjects-4035,rr-interval-healthy-subjects,0.420,F,?,?,?,?,197505,197505,...,14.975721,158.334119,40.187222,3.939912,79.756725,20.243275,396.952689,198.431349,Healthy,0
132,rr_interval_healthy_subjects-4046,rr-interval-healthy-subjects,2.830,M,?,?,?,?,157528,157528,...,18.334455,482.686632,267.241031,1.806185,64.364426,35.635574,1208.347847,458.420184,Healthy,0
133,rr_interval_healthy_subjects-4066,rr-interval-healthy-subjects,0.670,M,?,?,?,?,73180,73180,...,14.683994,252.747854,116.131105,2.176401,68.517829,31.482171,579.216754,210.337795,Healthy,0
134,rr_interval_healthy_subjects-4059,rr-interval-healthy-subjects,0.250,M,?,?,?,?,190081,190081,...,14.564847,226.252150,50.950420,4.440634,81.619788,18.380212,484.145944,206.943374,Healthy,0


## Coretan

In [None]:
time_domain = hrvanalysis.extract_features.get_time_domain_features(nni)
freq_domain = hrvanalysis.extract_features.get_frequency_domain_features(nni)

In [None]:
time_domain_keys = [key for key in time_domain.keys()]
print(time_domain_keys)

['mean_nni', 'sdnn', 'sdsd', 'nni_50', 'pnni_50', 'nni_20', 'pnni_20', 'rmssd', 'median_nni', 'range_nni', 'cvsd', 'cvnni', 'mean_hr', 'max_hr', 'min_hr', 'std_hr']


In [None]:
freq_domain_keys = [key for key in freq_domain.keys()]
print(freq_domain_keys)

['lf', 'hf', 'lf_hf_ratio', 'lfnu', 'hfnu', 'total_power', 'vlf']


In [None]:
from hrvanalysis import remove_outliers, remove_ectopic_beats, interpolate_nan_values
from hrvanalysis import get_time_domain_features

 # nn_intervals_list contains integer values of NN-interval
rr_intervals_list = [1000, 1050, 1020, 1080, 1100, 1110, 1060]

# This remove outliers from signal
rr_intervals_without_outliers = remove_outliers(rr_intervals=rr_intervals_list,
                                                low_rri=300, high_rri=2000)
# This replace outliers nan values with linear interpolation
interpolated_rr_intervals = interpolate_nan_values(rr_intervals=rr_intervals_without_outliers,
                                                   interpolation_method="linear")

# This remove ectopic beats from signal
nn_intervals_list = remove_ectopic_beats(rr_intervals=interpolated_rr_intervals, method="malik")

# This replace ectopic beats nan values with linear interpolation
interpolated_nn_intervals = interpolate_nan_values(rr_intervals=nn_intervals_list)

time_domain_features = get_time_domain_features(nn_intervals_list)

time_domain_features

0 outlier(s) have been deleted.
0 ectopic beat(s) have been deleted with malik rule.


{'mean_nni': 1060.0,
 'sdnn': 40.414518843273804,
 'sdsd': 39.58114029012639,
 'nni_50': 1,
 'pnni_50': 16.666666666666668,
 'nni_20': 4,
 'pnni_20': 66.66666666666667,
 'rmssd': 40.824829046386306,
 'median_nni': 1060.0,
 'range_nni': 110,
 'cvsd': 0.038513989666402175,
 'cvnni': 0.03812690456912623,
 'mean_hr': 56.675032042084524,
 'max_hr': 60.0,
 'min_hr': 54.054054054054056,
 'std_hr': 2.019662082524272}