# Subject Exclusion

This notebook assesses whether any subjects need to be excluded from furhter analysis.

## Imports and Helper Functions

In [91]:
import re
from pathlib import Path

import pandas as pd
import numpy as np
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import zscore

import biopsykit as bp

%load_ext autoreload
%autoreload 2
%matplotlib widget

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [92]:
plt.close("all")

palette = bp.colors.fau_palette
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams['figure.figsize'] = (10, 5)
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['mathtext.default'] = "regular"

palette

## Data Import

In [93]:
base_path = Path("../../../data/processed")
ecg_path = base_path.joinpath("ecg")
saliva_path = base_path.joinpath("saliva")
quest_path = base_path.joinpath("questionnaire")

export_path = Path("../../../data/for_analysis")
bp.utils.file_handling.mkdirs(export_path)

### HR(V)

In [94]:
hr_mean_normalized = bp.io.load_wide_format_csv(
    ecg_path.joinpath("cft_hr_mean_normalized.csv")
)

hr_measures = bp.io.load_wide_format_csv(
    ecg_path.joinpath("cft_measures_merged.csv")
)

cft_parameter = bp.io.load_wide_format_csv(
    ecg_path.joinpath("cft_parameter.csv"),
    index_cols=["condition", "subject", "phase"]
)

hr_ensemble = bp.io.load_pandas_dict_excel(
    ecg_path.joinpath("cft_hr_ensemble.xlsx")
)

### Saliva

In [95]:
cort_samples = bp.io.load_wide_format_csv(
    saliva_path.joinpath("cortisol_samples.csv")
)

cort_features = bp.io.load_wide_format_csv(
    saliva_path.joinpath("cortisol_features.csv")
)

### Questionnaire

In [96]:
quest_data = bp.io.load_wide_format_csv(
    quest_path.joinpath("questionnaire_data.csv")
)

## Check for Subject Exclusion Criteria

### High Initial Cortisol Levels

Subjects are excluded if they have a high initial cortisol level (sample `S0`).

Exclusion criteria: $\geq 3\sigma$

In [97]:
cort_samples_s0 = cort_samples.xs("S0", level="sample")

In [98]:
cort_samples.where((zscore(cort_samples_s0) > 3.0)).dropna()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cortisol
condition,subject,sample,Unnamed: 3_level_1
CFT,Vp27,S0,9.7073
CFT,Vp27,S1,10.3513
CFT,Vp27,S2,10.649
CFT,Vp27,S3,12.04
CFT,Vp27,S4,14.0805
CFT,Vp27,S5,13.9135
CFT,Vp27,S6,13.1495


**Conclusion**:

Remove `Vp22`

### Heart Rate Outlier

Subjects are excluded if their heart rate response is an outlier, i.e., does not represent the population.


Exclusion criteria: $\geq 3\sigma$

In [99]:
hr_mask = zscore(hr_mean_normalized.xs("AT", level="subphase").unstack("phase")) > 3
hr_mean_normalized.where(hr_mask.any(axis=1)).dropna().unstack("phase")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Heart_Rate,Heart_Rate,Heart_Rate
Unnamed: 0_level_1,Unnamed: 1_level_1,phase,MIST1,MIST2,MIST3
condition,subject,subphase,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
CFT,Vp10,AT,5.113052,72.902709,88.850159
CFT,Vp10,BL,0.581179,10.483095,26.881095
CFT,Vp10,FB,19.066959,35.185565,56.648804
CFT,Vp10,RP_CFI,-8.587217,10.425869,23.947121


**Conclusion**:

Remove `Vp10`

### CFT Non-Responder

Subjects are excluded if they do not respond to the CFT at all, i.e., do not show an decrease in heart rate relative to Baseline.

Exclusion criteria: $\text{HR}_{CFI} > 0$

In [100]:
hr_mean_cft = hr_mean_normalized.xs("CFT", level="condition").xs("RP_CFI", level="subphase")
hr_mean_cft.where(hr_mean_cft.groupby("subject").apply(lambda df: (df > 0).all())).dropna()

Unnamed: 0_level_0,Unnamed: 1_level_0,Heart_Rate
subject,phase,Unnamed: 2_level_1
Vp07,MIST1,6.941204
Vp07,MIST2,6.671656
Vp07,MIST3,31.80132


**Conclusion**:

Remove `Vp07`

## Apply Subject Exclusion

In [101]:
exluded_subjects = ["Vp22", "Vp07", "Vp10"]

### HR(V)

In [102]:
hr_mean_normalized = bp.utils.data_processing.exclude_subjects(exluded_subjects, hr_mean_normalized=hr_mean_normalized)
hr_measures = bp.utils.data_processing.exclude_subjects(exluded_subjects, hr_measures=hr_measures)
cft_parameter = bp.utils.data_processing.exclude_subjects(exluded_subjects, cft_parameter=cft_parameter)
hr_ensemble = {key: df_hr.drop(columns=exluded_subjects, errors="ignore") for key, df_hr in hr_ensemble.items()}



### Saliva

In [103]:
cort_samples = bp.utils.data_processing.exclude_subjects(exluded_subjects, cort_samples=cort_samples)
cort_features = bp.utils.data_processing.exclude_subjects(exluded_subjects, cort_features=cort_features)

### Questionnaire

In [104]:
quest_data = bp.utils.data_processing.exclude_subjects(exluded_subjects, quest_data=quest_data)

## Export Data with Excluded Subjects

In [105]:
hr_mean_normalized.to_csv(export_path.joinpath("cft_hr_mean_normalized.csv"))
hr_measures.to_csv(export_path.joinpath("cft_measures_merged.csv"))
cft_parameter.to_csv(export_path.joinpath("cft_parameter.csv"))
bp.io.write_pandas_dict_excel(hr_ensemble, export_path.joinpath("cft_hr_ensemble.xlsx"))

In [106]:
cort_samples.to_csv(export_path.joinpath("cortisol_samples.csv"))
cort_features.to_csv(export_path.joinpath("cortisol_features.csv"))

In [107]:
quest_data.to_csv(export_path.joinpath("questionnaire_data.csv"))