# NSRR HRV

In [None]:
import os
import glob
import yasa
import warnings
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from mne.io import read_raw_edf
from neurokit2 import ecg_clean, ecg_peaks
from neurokit2.misc import NeuroKitWarning
from neurokit2 import hrv_time, hrv_frequency
from helper_functions import get_sub_visit_hyp, get_all_edfs

# Define paths
root_dir = '/Volumes/NSRR/'

warnings.filterwarnings("ignore", category=NeuroKitWarning)

In [None]:
# NOTE: The HRV is pre-computed in MESA. This script is only used for CFS.
for dataset in ['cfs']:
    
    df = []
    
    # Extract all EDF files
    all_edfs, hypno_dir = get_all_edfs(dataset, root_dir)
 
    # Include
    if dataset == "mesa":
        include = ["EKG"]
    elif dataset == "cfs":
        include = ["ECG1"]
        
    for eeg_file in tqdm(all_edfs):

        # Extract subject, visit and hypno_file from fname
        sub, visit, hypno_file = get_sub_visit_hyp(eeg_file, dataset, hypno_dir)

        # Check that file exists
        if not os.path.isfile(eeg_file):
            warnings.warn("File not found %s" % eeg_file)
            continue
        if not os.path.isfile(hypno_file):
            warnings.warn("File not found %s" % hypno_file)
            continue

        # LOAD EEG DATA
        try:
            raw = read_raw_edf(eeg_file, preload=False, verbose=0)
            raw.drop_channels(np.setdiff1d(raw.info['ch_names'], include))
            raw.load_data()
        except:
            continue
            
        sf = raw.info['sfreq']
        
        # LOAD HYPNOGRAM
        hypno, _ = yasa.load_profusion_hypno(hypno_file)
        if not len(hypno[hypno > 0]):
            continue

        # Get VALID hypnograms, excluding Motion and Unscored
        hypno_ALL = (hypno >= 0).astype(int)  # 0 / 1

        # Extract EKG data
        data = raw.get_data()[0] * 1e6
        
        # Inverse polarity
        data *= -1

        # Find non-overlapping, continuous epoch of 5 min of NREM sleep
        idx_start, idx_stop = [], []
        thresh = 10  # 10 epochs = 5 min
        for i in np.arange(0, hypno_ALL.size - thresh + 1, step=thresh):
            if i+thresh > hypno_ALL.size:
                continue
            epochs = hypno_ALL[i:i+thresh]
            if (epochs == 1).all():
                idx_start.append(i)
                idx_stop.append(i+thresh)

        if not len(idx_start):
            warnings.warn("No continuous epochs of 5 min were found for subject %s | SKIPPING SUBJECT" % sub)
            continue

        epochs = np.vstack((idx_start, idx_stop)).T
        epochs_sample = (epochs * (30 * sf)).astype(int)

        #################################
        # HRV REM
        #################################

        # Loop across 5 min epochs
        hrv_all_epochs = []

        for start, stop in epochs_sample:

            # 1) Epoching
            ecg_epoch = data[start:stop]
            if ecg_epoch.size != (sf * 5 * 60):
                continue

            # 2) IBI detection
            try:
                ecg_cleaned = ecg_clean(ecg_epoch, sf, method="neurokit")
                signals, info = ecg_peaks(ecg_cleaned, sf, method="neurokit", correct_artifacts=True)
                rpeaks = info['ECG_R_Peaks']
            except:
                continue

            # 3) Check RR data
            rpeaks = np.unique(rpeaks)
            # We require at least 175 valid peaks, i.e. average HR of 35 bpm ((175 / 300) * 60
            if rpeaks.size < 175:
                continue

            # 4) Calculate HRV
            try:
                td = hrv_time(rpeaks, sf)
                fd = hrv_frequency(rpeaks, sf)
                hrv_all_epochs.append(pd.concat((td, fd), axis=1))
            except:
                warnings.warn("Error calculating HRV features for %s" % sub)
                continue
        
        # Average across epochs using the median
        # Only if at least 3 epochs were found to average, otherwise skip subject
        if len(hrv_all_epochs) < 3:
            continue

        hrv_all_epochs = pd.concat(hrv_all_epochs)
        hrv_subj = hrv_all_epochs.median().to_dict()
        hrv_subj['dataset'] = dataset
        hrv_subj['subj'] = sub
        hrv_subj['visit'] = visit
        hrv_subj['n_epochs'] = hrv_all_epochs.shape[0]
        df.append(hrv_subj)

    # Create and export dataframe for each study
    df = pd.DataFrame(df).set_index(['dataset', 'subj', 'visit'])
    df.columns = df.columns.str.lower()
    df = df.add_suffix("_ALL")
    df.to_csv("output/csv/df_HRV_ALL_%s.csv" % dataset)