<a href="https://colab.research.google.com/github/mohit-bags/Arrhythmia-Detection/blob/main/RR_Interval_Extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Script for formatting the MIT-Normal Sinus Rhythm Database
Steps:

    1. Download the ZIP database from https://physionet.org/content/nsrdb/1.0.0/
    2. Open it with a zip-opener (WinZip, 7zip).
    3. Extract the folder of the same name (named 'mit-bih-normal-sinus-rhythm-database-1.0.0') to the same folder as this script.
    4. Run this script.

##### Credits:
    https://github.com/berndporr/py-ecg-detectors/blob/master/tester_MITDB.py

In [None]:
!wget -r -N -c -np https://physionet.org/files/nsrdb/1.0.0/

--2022-03-23 09:06:41--  https://physionet.org/files/nsrdb/1.0.0/
Resolving physionet.org (physionet.org)... 18.18.42.54
Connecting to physionet.org (physionet.org)|18.18.42.54|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘physionet.org/files/nsrdb/1.0.0/index.html’

physionet.org/files     [ <=>                ]   8.58K  --.-KB/s    in 0s      

Last-modified header missing -- time-stamps turned off.
2022-03-23 09:06:42 (214 MB/s) - ‘physionet.org/files/nsrdb/1.0.0/index.html’ saved [8781]

Loading robots.txt; please ignore errors.
--2022-03-23 09:06:42--  https://physionet.org/robots.txt
Reusing existing connection to physionet.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 22 [text/plain]
Saving to: ‘physionet.org/robots.txt’


2022-03-23 09:06:42 (7.31 MB/s) - ‘physionet.org/robots.txt’ saved [22/22]

--2022-03-23 09:06:42--  https://physionet.org/files/nsrdb/1.0.0/16265.atr
Reusing existing connection

In [None]:
root_path = "/content/physionet.org"
folder_path = "/content/physionet.org/files/nsrdb/1.0.0"

In [None]:
!pip install wfdb

Collecting wfdb
  Downloading wfdb-3.4.1-py3-none-any.whl (137 kB)
[K     |████████████████████████████████| 137 kB 5.1 MB/s 
Collecting matplotlib>=3.3.4
  Downloading matplotlib-3.5.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (11.2 MB)
[K     |████████████████████████████████| 11.2 MB 49.2 MB/s 
Collecting fonttools>=4.22.0
  Downloading fonttools-4.31.2-py3-none-any.whl (899 kB)
[K     |████████████████████████████████| 899 kB 48.4 MB/s 
Installing collected packages: fonttools, matplotlib, wfdb
  Attempting uninstall: matplotlib
    Found existing installation: matplotlib 3.2.2
    Uninstalling matplotlib-3.2.2:
      Successfully uninstalled matplotlib-3.2.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.[0m
Successfully installed font

In [None]:
import pandas as pd
import numpy as np
import wfdb
import os

data_files = [folder_path+"/" + file for file in os.listdir(folder_path) if ".dat" in file]


In [None]:
data_files

['/content/physionet.org/files/nsrdb/1.0.0/16265.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/17453.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/16272.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/17052.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/18177.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/16539.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/19093.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/19830.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/19088.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/16795.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/16773.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/18184.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/19090.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/16273.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/16786.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/16420.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/19140.dat',
 '/content/physionet.org/files/nsrdb/1.0.0/16483.dat']

In [None]:
dfs_ecg = []
dfs_rpeaks = []

for participant, file in enumerate(data_files):

    print("Participant: " + str(participant + 1) + "/" + str(len(data_files)))


    # Get signal
    data = pd.DataFrame({"ECG": wfdb.rdsamp(file[:-4])[0][:, 1]})
    data["Participant"] = "MIT-Normal_%.2i" %(participant)
    data["Sample"] = range(len(data))
    data["Sampling_Rate"] = 128
    data["Database"] = "MIT-Normal"

    # getting annotations
    anno = wfdb.rdann(file[:-4], 'atr')
    anno = anno.sample[np.where(np.array(anno.symbol) == "N")[0]]
    anno = pd.DataFrame({"Rpeaks": anno})
    anno["Participant"] = "MIT-Normal_%.2i" %(participant)
    anno["Sampling_Rate"] = 128
    anno["Database"] = "MIT-Normal"

    # Select only 1h of recording (otherwise it's too big)
    data = data[460800:460800*3].reset_index(drop=True)
    anno = anno[(anno["Rpeaks"] > 460800) & (anno["Rpeaks"] <= 460800*2)].reset_index(drop=True)
    anno["Rpeaks"] = anno["Rpeaks"] - 460800


    # Store with the rest
    dfs_ecg.append(data)
    dfs_rpeaks.append(anno)



# Save
df_ecg = pd.concat(dfs_ecg).to_csv("ECGs.csv", index=False)
dfs_rpeaks = pd.concat(dfs_rpeaks).to_csv("Rpeaks.csv", index=False)


# Quick test
#import neurokit2 as nk
#nk.events_plot(anno["Rpeaks"][anno["Rpeaks"] <= 1000], data["ECG"][0:1001])

Participant: 1/18
Participant: 2/18
Participant: 3/18
Participant: 4/18
Participant: 5/18
Participant: 6/18
Participant: 7/18
Participant: 8/18
Participant: 9/18
Participant: 10/18
Participant: 11/18
Participant: 12/18
Participant: 13/18
Participant: 14/18
Participant: 15/18
Participant: 16/18
Participant: 17/18
Participant: 18/18


In [None]:
# df_ecg = pd.concat(dfs_ecg).to_csv("/content/drive/MyDrive/Arrythmia /ECGs.csv", index=False)


In [None]:
dfs_ecg

[          ECG    Participant   Sample  Sampling_Rate    Database
 0      -0.035  MIT-Normal_00   460800            128  MIT-Normal
 1      -0.055  MIT-Normal_00   460801            128  MIT-Normal
 2      -0.035  MIT-Normal_00   460802            128  MIT-Normal
 3      -0.065  MIT-Normal_00   460803            128  MIT-Normal
 4      -0.055  MIT-Normal_00   460804            128  MIT-Normal
 ...       ...            ...      ...            ...         ...
 921595 -0.015  MIT-Normal_00  1382395            128  MIT-Normal
 921596 -0.015  MIT-Normal_00  1382396            128  MIT-Normal
 921597  0.005  MIT-Normal_00  1382397            128  MIT-Normal
 921598  0.035  MIT-Normal_00  1382398            128  MIT-Normal
 921599  0.035  MIT-Normal_00  1382399            128  MIT-Normal
 
 [921600 rows x 5 columns],
           ECG    Participant   Sample  Sampling_Rate    Database
 0       0.025  MIT-Normal_01   460800            128  MIT-Normal
 1       0.025  MIT-Normal_01   460801        

### Pan tompkins RR Interval

In [None]:
!pip install hrv-analysis
!pip install neurokit2
!pip3 install py-ecg-detectors

Collecting hrv-analysis
  Downloading hrv_analysis-1.0.4-py3-none-any.whl (28 kB)
Collecting nolds>=0.4.1
  Downloading nolds-0.5.2-py2.py3-none-any.whl (39 kB)
Installing collected packages: nolds, hrv-analysis
Successfully installed hrv-analysis-1.0.4 nolds-0.5.2
Collecting neurokit2
  Downloading neurokit2-0.1.7-py2.py3-none-any.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 5.3 MB/s 
Installing collected packages: neurokit2
Successfully installed neurokit2-0.1.7
Collecting py-ecg-detectors
  Downloading py_ecg_detectors-1.2.0-py3-none-any.whl (24 kB)
Collecting gatspy
  Downloading gatspy-0.3.tar.gz (554 kB)
[K     |████████████████████████████████| 554 kB 7.4 MB/s 
Building wheels for collected packages: gatspy
  Building wheel for gatspy (setup.py) ... [?25l[?25hdone
  Created wheel for gatspy: filename=gatspy-0.3-py3-none-any.whl size=43818 sha256=9160208f9aaca17e1e3a86aee3795fd274c535e9a8d0b2264a26710803a3322a
  Stored in directory: /root/.cache/pip/wheels/1f

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt
from scipy import stats
from sklearn.model_selection import train_test_split
import sklearn
import itertools

import matplotlib as mpl
mpl.style.use('seaborn')
plt.rcParams["figure.figsize"] = (13,4)

import matplotlib.pyplot as plt 
from os import listdir 
import requests
import matplotlib.pyplot as plt
from os import listdir, mkdir, system
from os.path import isfile, isdir, join, exists
import json
import os
from tqdm import tqdm
from matplotlib import collections as matcoll
import pywt

In [None]:
from ecgdetectors import Detectors
import neurokit2 as nk
from hrvanalysis import remove_outliers, remove_ectopic_beats, interpolate_nan_values
from hrvanalysis import get_time_domain_features


In [None]:
master_ecg = pd.read_csv("/content/ECGs.csv")
list_of_patients = master_ecg['Participant'].unique()

In [None]:
data_with_rr = pd.DataFrame()
detectors = Detectors(128) #specify sampling rate
for i in tqdm(list_of_patients):
    df=master_ecg[master_ecg['Participant']==i]

    unfiltered_ecg=df["ECG"]
    detector = "pan_tompkins"
    if(detector=="pan_tompkins"):
      rpeaks = detectors.pan_tompkins_detector(unfiltered_ecg)
    elif(detector=="stationary_wavelet_transform"):
      rpeaks = detectors.swt_detector(unfiltered_ecg)

    rrintervals = np.array(rpeaks)
    rr_diff = rrintervals
    rr_in_df = pd.DataFrame(rr_diff)
    rr_in_df['filename']=i
    data_with_rr=data_with_rr.append(rr_in_df)
    


 56%|█████▌    | 10/18 [00:37<00:28,  3.58s/it]

In [None]:
data_with_rr.to_csv("RR_pan_tompkins.csv",index=False)

In [None]:
data_with_rr.columns=['Rpeaks','Participant']

In [None]:
my_rr=pd.read_csv("/content/RR_pan_tompkins.csv")
ori_rr=pd.read_csv("/content/Rpeaks.csv")
df_comp = pd.DataFrame(columns=['Participant','mean_pan_t','mean_ori','max_pan_t','max_ori','min_pan_t','min_ori'])


for i in tqdm(list_of_patients):
  df1=my_rr[my_rr['Participant']==i]
  df2=ori_rr[ori_rr['Participant']==i]
  rr1=np.diff(df1['Rpeaks'])
  rr2=np.diff(df2['Rpeaks'])
  mean_r1 = np.mean(rr1)
  mean_r2 = np.mean(rr2)

  max_r1 = max(rr1)
  max_r2 = max(rr2)

  min_r1 = min(rr1)
  min_r2 = min(rr2)

  lst = [i,mean_r1,mean_r2,max_r1,max_r2,min_r1,min_r2]

  df_comp.loc[len(df_comp)] = lst


  

100%|██████████| 18/18 [00:00<00:00, 21.64it/s]


In [None]:
df_comp.to_csv("compared.csv",index=False)