# Cortisol Data Cleaning

In [None]:
from pathlib import Path
import json

import pandas as pd
import numpy as np
import pingouin as pg

import biopsykit as bp
from biopsykit.utils.time import time_to_timedelta
from biopsykit.utils.dataframe_handling import multi_xs, int_from_str_idx, camel_to_snake

from fau_colors import cmaps

import matplotlib.pyplot as plt
import seaborn as sns

from carwatch_analysis.datasets import CarWatchDatasetProcessed
from carwatch_analysis.data_cleaning.saliva import (
    clean_measurable_range,
    clean_statistical_outlier,
)

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams["figure.figsize"] = (8, 4)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

pg.options["round"] = 4

palette

## Setup Paths

In [None]:
deploy_type = "develop"

In [None]:
# build path to data folder
config_dict = json.load(Path("../../../config.json").open(encoding="utf-8"))
base_path = Path("..").joinpath(config_dict[deploy_type]["base_path"])
base_path

In [None]:
dataset = CarWatchDatasetProcessed(base_path)
dataset

## Load Data

### Saliva

In [None]:
cort_samples = dataset.cortisol_samples
cort_samples_before = cort_samples.copy()
cort_samples.head()

## Data Cleaning

In [None]:
print(f"Number of CARs before cleaning: {cort_samples_before.unstack('sample').shape[0]}")

### Remove CARs with values outside of measurable range (<0.33 nmol/l, >82.80 nmol/l)

In [None]:
cort_samples = clean_measurable_range(cort_samples)

### Remove Statistical Outlier ($> 3 \sigma$)

Remove CARs where any cortisol sample differs more than 3 standard deviations from the mean

In [None]:
cort_samples = clean_statistical_outlier(cort_samples)

In [None]:
cort_samples.head()

## Export

In [None]:
export_path = Path("../../exports")
export_path.mkdir(exist_ok=True)

In [None]:
cort_samples.to_csv(export_path.joinpath("cortisol_samples_cleaned.csv"))