# ECG Data Processing – Per Subject

This notebook performs ECG data processing per subject. This includes R-peak detection and outlier removal. The processed heart rate and R-peak data are then exported as Excel files in a `processed` subfolder of each subject folder.

## Setup and Helper Functions

In [None]:
import re
from pathlib import Path

import pandas as pd
import numpy as np
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.auto import tqdm

import biopsykit as bp

from cft_analysis.io import load_ecg_raw_data_folder
from cft_analysis.hrv import hrv_continuous


%load_ext autoreload
%autoreload 2
%matplotlib widget

In [None]:
plt.close("all")

palette = bp.colors.fau_palette
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams['figure.figsize'] = (10, 5)
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['mathtext.default'] = "regular"

palette

## Data Import

In [None]:
data_path = Path("../../../../Data")
ecg_path = data_path.joinpath("ecg")

# flag indicating whether to ignore already existing processing results and re-process data or not
overwrite_results = False

In [None]:
file_mapping = ["Pre", "MIST1", "MIST2", "MIST3", "Post"]

## Data Processing

In [None]:
for subject_dir in tqdm(subject_dirs):
    subject_id = subject_dir.name
    
    ecg_path_raw = subject_dir.joinpath("raw")
    ecg_path_proc = subject_dir.joinpath("processed")
    bp.utils.file_handling.mkdirs(ecg_path_proc)
    
    hr_result_filename = ecg_path_proc.joinpath("hr_result_{}.xlsx".format(subject_id))
    rpeaks_result_filename = ecg_path_proc.joinpath("rpeaks_result_{}.xlsx".format(subject_id))
    hrv_cont_filename = ecg_path_proc.joinpath("hrv_continuous_{}.xlsx".format(subject_id))
    
    if overwrite_results or not hr_result_path.exists():
        dataset_dict = load_ecg_raw_data_folder(ecg_path_raw, phase_names=file_mapping)
        ep = bp.signals.ecg.EcgProcessor(data=dataset_dict[0], sampling_rate=dataset_dict[1])
        ep.ecg_process(title=subject_id)
        
        dict_hrv_continuous = {key: hrv_continuous(rpeaks) for key, rpeaks in tqdm(list(ep.rpeaks.items()), desc="HRV")}
        
        # save HR data and R-Peak data to file
        bp.io.ecg.write_hr_phase_dict(ep.heart_rate, hr_result_filename)
        bp.io.ecg.write_pandas_dict_excel(ep.rpeaks, rpeaks_result_filename)
        bp.io.ecg.write_pandas_dict_excel(dict_hrv_continuous, hrv_cont_filename)