# Subject Exclusion

This notebook assesses whether any subjects need to be excluded from furhter analysis.

## Imports and Helper Functions

In [None]:
import json
import re
from pathlib import Path

import pandas as pd
import numpy as np
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import zscore

import biopsykit as bp

from cft_analysis.datasets import CftDatasetProcessed

%load_ext autoreload
%autoreload 2
%matplotlib widget

In [None]:
plt.close("all")

palette = bp.colors.fau_palette
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams['figure.figsize'] = (10, 5)
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['mathtext.default'] = "regular"

palette

## Data Import

In [None]:
# get path to analysis results
base_path = Path("../../data")

In [None]:
dataset = CftDatasetProcessed(base_path, exclude_subjects=False)
dataset

## Check for Subject Exclusion Criteria

In [None]:
subject_ids_to_exclude = []

### High Initial Cortisol Levels

Subjects are excluded if they have a high initial cortisol level (sample `S0`).

Exclusion criteria: $\geq 3\sigma$

In [None]:
cort_samples = dataset.cortisol

In [None]:
cort_samples_s0 = cort_samples.xs("S0", level="sample")

In [None]:
cort_exclude = cort_samples.where((zscore(cort_samples_s0) > 3.0)).dropna()
cort_exclude

**Conclusion**:

Remove `Vp22`

In [None]:
subject_ids = list(cort_exclude.index.get_level_values("subject").unique())
if all(s not in subject_ids_to_exclude for s in subject_ids):
    subject_ids_to_exclude.extend(subject_ids)
subject_ids_to_exclude

### Heart Rate Outlier

Subjects are excluded if their heart rate response is an outlier, i.e., does not represent the population.


Exclusion criteria: $\text{HR} \geq 3\sigma$

In [None]:
hr_data = dataset.get_subset(subphase="AT").heart_rate
hr_data = hr_data.xs("HR_Norm", level="type")

In [None]:
hr_mask = zscore(hr_data.unstack("phase")) > 3
hr_exclude = hr_mask.where(hr_mask.any(axis=1)).dropna()
hr_exclude

**Conclusion**:

Remove `Vp10`

In [None]:
subject_ids = list(hr_exclude.index.get_level_values("subject").unique())
if all(s not in subject_ids_to_exclude for s in subject_ids):
    subject_ids_to_exclude.extend(subject_ids)
subject_ids_to_exclude

### CFT Non-Responder

Subjects are excluded if they do not respond to the CFT at all, i.e., do not show an decrease in heart rate relative to Baseline.

Exclusion criteria: $\text{HR}_{CFI} > 0$

In [None]:
hr_data_cft = dataset.get_subset(condition="CFT", subphase="RP_CFI").heart_rate
hr_data_cft = hr_data_cft.xs("HR_Norm", level="type")

hr_cft_exclude = hr_data_cft.unstack("phase").where((hr_data_cft.unstack("phase") > 0).all(axis=1)).dropna()
hr_cft_exclude

**Conclusion**:

Remove `Vp07`

In [None]:
subject_ids = list(hr_cft_exclude.index.get_level_values("subject").unique())
if all(s not in subject_ids_to_exclude for s in subject_ids):
    subject_ids_to_exclude.extend(subject_ids)
subject_ids_to_exclude

## Save Results from Subject Exclusion

In [None]:
subject_ids_to_exclude = pd.Series(subject_ids_to_exclude, name="subject")

subject_ids_to_exclude.to_csv(base_path.joinpath("excluded_subjects.csv"), index=False)