# ECG Processing & Feature Computation

In [None]:
import json
import re
from pathlib import Path

import pandas as pd
import numpy as np
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.auto import tqdm

import biopsykit as bp

from cft_analysis.datasets import CftDatasetRaw
from cft_analysis.feature_extraction.hrv import hrv_continuous_dict
from cft_analysis.datasets.helper import load_subject_data_dicts, load_subject_continuous_hrv_data


%load_ext autoreload
%autoreload 2
%matplotlib widget

In [None]:
plt.close("all")

palette = bp.colors.fau_palette
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams['figure.figsize'] = (10, 5)
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['mathtext.default'] = "regular"

palette

## Data Import

In [None]:
# build path to data folder
config_dict = json.load(Path("../../config.json").open(encoding="utf-8"))
base_path = Path("..").joinpath(config_dict["base_path"])

In [None]:
export_path = Path("../../data")
export_path_ecg = export_path.joinpath("ecg")

bp.utils.file_handling.mkdirs([export_path, export_path_ecg])

In [None]:
# flag indicating whether to ignore already existing processing results and re-process data or not
overwrite_results = False

In [None]:
dataset = CftDatasetRaw(base_path)
dataset

## Process ECG Signal

In [None]:
for subset in tqdm(dataset.groupby("subject")):
    subject_id = subset.index["subject"][0]
    
    export_paths = subset.setup_export_paths()
    if not overwrite_results or export_paths["hr_result"].exists():
        continue

    ecg_data = subset.ecg
    ep = bp.signals.ecg.EcgProcessor(data=ecg_data, sampling_rate=subset.sampling_rate)
    ep.ecg_process(title=subject_id)
    
    dict_hrv_continuous = hrv_continuous_dict(ep)
    
    paths = subset.setup_export_paths()

    # save HR data and R-Peak data to file
    bp.io.ecg.write_hr_phase_dict(ep.heart_rate, paths["hr_result"])
    bp.io.ecg.write_pandas_dict_excel(ep.rpeaks, paths["rpeaks_result"])
    bp.io.ecg.write_pandas_dict_excel(dict_hrv_continuous, paths["hrv_cont"])

In [None]:
hr_subject_data_dict, rpeaks_subject_data_dict = load_subject_data_dicts(dataset)
hrv_subject_data_dict = load_subject_continuous_hrv_data(dataset)

## Compute HR(V) Parameters over MIST

In [None]:
# get condition list from dataset
condition_list = dataset.condition_list

# HRV parameters of interest
hrv_columns = ["HRV_SDNN", "HRV_RMSSD", "HRV_pNN50", "HRV_pNN20"]

### Create `MIST` Object

In [None]:
subphases = {
    "BL": 60, "RP_CFI": 120, "AT": 240, "FB": 0
}

structure = {
    "Pre": None,
    "MIST": {
        "MIST1": subphases,
        "MIST2": subphases,
        "MIST3": subphases,
    },
    "Post": None
}

mist = bp.protocols.MIST(name="CFT", structure=structure)
mist

### Add Heart Rate Data

In [None]:
mist.add_hr_data(hr_data=hr_subject_data_dict, rpeak_data=rpeaks_subject_data_dict)

### Compute *Ensemble Time-Series*

In [None]:
mist.compute_hr_ensemble(
    "hr_ensemble",
    select_phases=True,
    params={
        "normalize_to": "Pre",
        "select_phases": ["MIST1", "MIST2", "MIST3"]
    }
)

### Compute *Aggregated Heart Rate Results*

The following parameters are computed:

* *Aggregated Heart Rate Results* are computed for both absolute and relative heart rate (relative heart rate: normalized to `Pre` phase on a subject-level basis)
* Relative amount of heart rate above baseline (baseline: `Pre` phase)

In [None]:
mist.compute_hr_results(
    "hr_mean",
    resample_sec=False,
    normalize_to=False,
    select_phases=True,
    split_into_subphases=True,
    add_conditions=True,
    params={
        "select_phases": ["MIST1", "MIST2", "MIST3"],
        "split_into_subphases": subphases,
        "add_conditions": condition_list
    }
)

mist.compute_hr_results(
    "hr_mean_normalized",
    resample_sec=False,
    normalize_to=True,
    select_phases=True,
    split_into_subphases=True,
    add_conditions=True,
    params={
        "normalize_to": "Pre",
        "select_phases": ["MIST1", "MIST2", "MIST3"],
        "split_into_subphases": subphases,
        "add_conditions": condition_list
    }
)

mist.compute_hr_above_baseline(
    "hr_above_bl_glo",
    "Pre",
    select_phases=True,
    split_into_subphases=True,
    add_conditions=True,
    params={
        "select_phases": ["MIST1", "MIST2", "MIST3"],
        "split_into_subphases": subphases,
        "add_conditions": condition_list
    }
)

### Compute *HRV* Parameter

The following HRV parameters are computed:

* Time-domain and non-linear HRV parameters are computed over phases and MIST subphases, respectively
* Relative amount of HRV parameters above baseline are computed per subphase from continuous HRV data (baseline: `Pre` phase)

In [None]:
# ignore neurokit warnings
from neurokit2 import NeuroKitWarning
import warnings
warnings.filterwarnings("ignore", category=NeuroKitWarning)

mist.compute_hrv_results(
    "hrv_phases",
    add_conditions=True,
    params={
        "add_conditions": condition_list
    },
    hrv_params={
        "hrv_types": ["hrv_time", "hrv_nonlinear"]
    }
)

mist.compute_hrv_results(
    "hrv_subphases",
    split_into_subphases=True,
    select_phases=True,
    add_conditions=True,
    params={
        "split_into_subphases": subphases,
        "select_phases": ["MIST1", "MIST2", "MIST3"],
        "add_conditions": condition_list
    },
    hrv_params={
        "hrv_types": ["hrv_time", "hrv_nonlinear"]
    }
)

mist.compute_hrv_above_baseline(
    "hrv_above_bl_glo",
    "Pre",
    hrv_subject_data_dict,
    select_phases=True,
    split_into_subphases=True,
    add_conditions=True,
    hrv_columns=hrv_columns,
    params={
        "select_phases": ["MIST1", "MIST2", "MIST3"],
        "split_into_subphases": subphases,
        "add_conditions": condition_list
    }
)

## Compute CFT Parameter

In [None]:
from cft_analysis.feature_extraction.cft import cft_parameter_per_phase

In [None]:
cft_params = cft_parameter_per_phase(
    hr_subject_data_dict, 
    dataset.get_subset(condition="CFT").condition_list
)
cft_params.head()

In [None]:
from cft_analysis.utils.data_reshaping import reshape_hr_data, reshape_hrv_data, reshape_time_above_bl_glo, reshape_cft_params

In [None]:
hr_data = reshape_hr_data(mist)
hrv_data = reshape_hrv_data(mist, hrv_columns)
above_bl_data = reshape_time_above_bl_glo(mist)
cft_params_data = reshape_cft_params(cft_params, condition_list)

In [None]:
concat_dict = {"HR": hr_data, "Time_BL_Glo": above_bl_data, "HRV": hrv_data, "CFT": cft_params_data}

In [None]:
data_concat = pd.concat(
    concat_dict, 
    names=["category"]
)
data_concat = data_concat.reorder_levels(["condition", "subject", "phase", "subphase", "category", "type"]).sort_index()

In [None]:
data_concat

### Export Data

In [None]:
mist.to_file(export_path.joinpath("mist_cft.json"))

In [None]:
mist.export_hr_ensemble(export_path_ecg)

In [None]:
cft_params.to_csv(export_path_ecg.joinpath("cft_parameter.csv"))
data_concat.to_csv(export_path_ecg.joinpath("cft_hr_features_merged.csv"))