# Extract R-peaks from ECG files

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from pathlib import Path

import pandas as pd
from tqdm.auto import tqdm

from speech_study.process_ecg import process_ecg

In [3]:
# configure user
user = "jonas"  # set this to mitchel

if user.lower() == "jonas":
    BASE_PATH = Path("/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/")
elif user.lower() == "mitchel":
    BASE_PATH = Path("D:/Data/EEG_Study_1/")
DATA_PATH = BASE_PATH.joinpath("aligned_data")

eeg_feat_stat_dir = Path(DATA_PATH).joinpath("EEG1_study_feat_stats")

For more information about the ECG processing, look at [this python script](../speech_study/process_ecg.py).

## Process the `edf_aligned` files

In [4]:
# First, process the aligned EDF files
for pqt in tqdm(list(BASE_PATH.rglob("*/edf_aligned/ecg*.parquet"))):
    print(pqt)

    # Read & process the ECG file
    df_parquet = (
        pd.read_parquet(pqt)
        .set_index("timestamp", drop=True)
        .rename(columns={"ECG_Raw": "ECG"})
    )
    df_rr = process_ecg(df_parquet["ECG"])

    # save the file in feat stat dir
    eeg_feat_stat_dir_user = eeg_feat_stat_dir.joinpath(pqt.parent.parent.name)
    if not eeg_feat_stat_dir_user.exists():
        os.mkdir(eeg_feat_stat_dir_user)
    df_rr.reset_index().to_parquet(
        eeg_feat_stat_dir_user.joinpath(
            f"rr_intervals_{'_'.join(pqt.name.split('_')[-3:])}"
        ),
        engine="fastparquet",
    )

    # save the file in the same folder as the ECG_file resides within
    df_rr.reset_index().to_parquet(
        pqt.parent.joinpath(f"rr_intervals_{'_'.join(pqt.name.split('_')[-3:])}"),
        engine="fastparquet",
    )

del df_parquet, pqt, eeg_feat_stat_dir_user, df_rr

  0%|          | 0/59 [00:00<?, ?it/s]

/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/63/edf_aligned/ecg_2020_03_11.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/17/edf_aligned/ecg_2020_02_10.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/54/edf_aligned/ecg_2020_03_04.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/69/edf_aligned/ecg_2020_07_07.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/34/edf_aligned/ecg_2020_02_20.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/68/edf_aligned/ecg_2020_07_06.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/53/edf_aligned/ecg_2020_03_04.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/22/edf_aligned/ecg_2020_02_12.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/23/edf_aligned/ecg_2020_02_13.parquet
/users/jonvdrdo/jonas/data/a

## Process the non-aligned EDF_files

In [5]:
# First create list of the aligned parquet users
aligned_pqt_users = []
for pqt in BASE_PATH.rglob("*/edf_aligned/ecg*.parquet"):
    aligned_pqt_users.append(pqt.parent.parent.name)


for pqt in tqdm(list(BASE_PATH.rglob("*/edf/ecg*.parquet"))):
    # As aligned data takes precedence over non-aligned data, We will only process
    # non-aligned `edf`-files when we do not have an aligned equivalent
    if pqt.parent.parent.name not in aligned_pqt_users:
        print(pqt)

        df_parquet = (
            pd.read_parquet(pqt)
            .set_index("timestamp", drop=True)
            .rename(columns={"ECG_Raw": "ECG"})
        )
        df_rr = process_ecg(df_parquet["ECG"])

        # save the file in feat_stat_dir
        eeg_feat_stat_dir_user = eeg_feat_stat_dir.joinpath(pqt.parent.parent.name)
        if not eeg_feat_stat_dir_user.exists():
            os.mkdir(eeg_feat_stat_dir_user)
        df_rr.reset_index().to_parquet(
            eeg_feat_stat_dir_user.joinpath(
                f"rr_intervals_{'_'.join(pqt.name.split('_')[-3:])}"
            ),
            engine="fastparquet",
        )

        # save the file in the same folder as the ECG_file resides within
        df_rr.reset_index().to_parquet(
            pqt.parent.joinpath(f"rr_intervals_{'_'.join(pqt.name.split('_')[-3:])}"),
            engine="fastparquet",
        )


  0%|          | 0/75 [00:00<?, ?it/s]

/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/4/edf/ecg_2020_01_27.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/13/edf/ecg_2020_02_04.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/20/edf/ecg_2020_02_12.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/11/edf/ecg_2020_02_03.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/37/edf/ecg_2020_02_21.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/8/edf/ecg_2020_01_31.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/9/edf/ecg_2020_01_31.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/15/edf/ecg_2020_02_06.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/14/edf/ecg_2020_02_05.parquet
/users/jonvdrdo/jonas/data/aaa_contextaware/raw/uz_study/aligned_data/10/edf/ecg_2020_02_03.parquet
/us

In [6]:
df_rr

Unnamed: 0_level_0,r_peak_agreement,RR_interval_ms,HRV_ms
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-02-04 10:05:25.041000+01:00,0.1,,
2020-02-04 10:05:25.843000+01:00,1.0,,
2020-02-04 10:05:26.695000+01:00,1.0,852.0,
2020-02-04 10:05:27.582000+01:00,1.0,887.0,35.0
2020-02-04 10:05:28.435000+01:00,1.0,853.0,-34.0
...,...,...,...
2020-02-04 11:27:41.591000+01:00,1.0,765.0,50.0
2020-02-04 11:27:42.318000+01:00,1.0,727.0,-38.0
2020-02-04 11:27:43.029000+01:00,1.0,711.0,-16.0
2020-02-04 11:27:43.751000+01:00,1.0,722.0,11.0
