# Sample Time Analysis

In [None]:
from pathlib import Path
import json

import pandas as pd
import numpy as np
import scipy.stats as stats
import pingouin as pg

import biopsykit as bp
from biopsykit.stats import StatsPipeline
from biopsykit.io import load_long_format_csv
from biopsykit.utils.dataframe_handling import multi_xs

from fau_colors import cmaps

import matplotlib.pyplot as plt
import seaborn as sns

from carwatch_analysis.io import load_cortisol_samples_reporting_times
from carwatch_analysis.datasets import CarWatchDatasetProcessed
from carwatch_analysis.data_processing.sample_times import (
    compute_cumulative_sampling_delay,
    categorize_sampling_adherence,
)
from carwatch_analysis.stats import create_unique_night_id, median_iqr_saliva_samples
from carwatch_analysis.plotting import multi_boxplot_sampling_delay, multi_paired_plot_sampling_delay

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)

theme_kwargs = {"context": "talk", "style": "ticks", "palette": palette}
sns.set_theme(**theme_kwargs)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

export = True

pg.options["round"] = 4

palette

## Setup Paths

In [None]:
deploy_type = "develop"

In [None]:
# build path to data folder
config_dict = json.load(Path("../../../config.json").open(encoding="utf-8"))
data_path = Path("..").joinpath(config_dict[deploy_type]["base_path"])
data_path

In [None]:
dataset = CarWatchDatasetProcessed(data_path)
dataset

In [None]:
base_path = Path("../..")
export_path = base_path.joinpath("exports")
result_path = base_path.joinpath("results")
stats_path = result_path.joinpath("statistics")

img_path = result_path.joinpath("plots")

paper_path = Path(json.load(Path("../paper_path.json").open(encoding="utf-8"))["paper_path"])
paper_img_path = paper_path.joinpath("img")

bp.utils.file_handling.mkdirs([result_path, stats_path, img_path, paper_img_path])

### Cortisol Samples

In [None]:
cort_path = export_path.joinpath("cortisol_samples_processed_all_reporting_types.csv")
cort_samples = load_cortisol_samples_reporting_times(cort_path)
# rename condition for table and plotting
cort_samples = cort_samples.rename({"Spontaneous": "Spontaneous Awakening"}, level="condition")

cort_samples.head()

In [None]:
conditions = ["Spontaneous Awakening", "Known Alarm", "Unknown Alarm"]
reporting_types_fine = [
    "Naive",
    "Selfreport without App",
    "Selfreport with App",
    "App",
    "Sensor + Selfreport without App",
    "Sensor + Selfreport with App",
    "Sensor + App",
]

reporting_types_coarse = ["Naive", "Selfreport", "App", "Sensor + Selfreport", "Sensor + App"]

reporting_types_fine_rename = [s.replace("without", "w/o").replace("with", "w/") for s in reporting_types_fine]
rename_mapper_reporting_types = dict(zip(reporting_types_fine, reporting_types_fine_rename))

reporting_types_fine = reporting_types_fine_rename

cort_samples = cort_samples.rename(rename_mapper_reporting_types, level="reporting_type")
cort_samples

In [None]:
dict_sample_times = {}

## Sample Times

### All Log Types

#### Median & IQR

In [None]:
variable = "time_diff_to_naive_min"
data_desc = median_iqr_saliva_samples(cort_samples, "time_diff_to_naive_min", ["reporting_type", "sample"])
data_desc = data_desc.reindex(reporting_types_fine[1:])
data_desc.columns = data_desc.columns.set_names(variable, level=-1)
data_desc = data_desc.round(2)
dict_sample_times["Sampling_Delay_Mean_IQR"] = data_desc

data_desc

#### Cumulative Sampling Delay

In [None]:
cum_sampling_delay = cort_samples["time_diff_to_naive_min"].drop("Naive", level="reporting_type").unstack("sample")
cum_sampling_delay = compute_cumulative_sampling_delay(cum_sampling_delay)
cum_sampling_delay = cum_sampling_delay.reindex(reporting_types_fine[1:]).round(2)
dict_sample_times["Cumulative_Sampling_Delay"] = cum_sampling_delay

cum_sampling_delay

#### Boxplots

In [None]:
data_plot = cort_samples.drop("Naive", level="reporting_type")

reporting_type_order = reporting_types_fine.copy()
reporting_type_order.remove("Naive")
reporting_type_order.remove("Selfreport w/ App")
reporting_type_order.remove("Sensor + Selfreport w/ App")

fig, axs = multi_boxplot_sampling_delay(
    data_plot, order=reporting_type_order, figsize=(12, 4), palette=cmaps.faculties_light
)

for path in [img_path, paper_img_path]:
    bp.utils.file_handling.export_figure(
        fig, filename="img_boxplot_sampling_delay", base_dir=path, formats=["png", "pdf"], dpi=300
    )

#### Paired Plots

In [None]:
data_plot = cort_samples.copy()

reporting_type_order = reporting_types_fine.copy()
reporting_type_order.remove("Naive")
reporting_type_order.remove("Selfreport w/ App")
reporting_type_order.remove("Sensor + Selfreport w/ App")

fig, axs = multi_paired_plot_sampling_delay(data_plot, order=reporting_type_order, figsize=(12, 4), dpi=300)

for path in [img_path, paper_img_path]:
    bp.utils.file_handling.export_figure(fig, "img_pairedplot_sampling_delay", path, formats=["png", "pdf"], dpi=300)

#### Compare Highest App-based Sampling Delays with Selfreport Sampling Delays

Get the three CARs with the highest $\Delta t_{S0}$ for *App* (see Paired Plot)

In [None]:
cars_max_delay = cort_samples.xs("App", level="reporting_type")["time_diff_to_naive_min"]
cars_max_delay = cars_max_delay.unstack("sample")["S0"].sort_values(ascending=False).iloc[0:3]
cars_max_delay = cars_max_delay.index.get_level_values("night_id")
cars_max_delay

In [None]:
max_delay_selfreport = cort_samples["time_diff_to_naive_min"].reindex(cars_max_delay, level="night_id")
max_delay_selfreport = max_delay_selfreport.reindex(["Selfreport", "App"], level="reporting_type").unstack(
    ["sample", "reporting_type"]
)
max_delay_selfreport = max_delay_selfreport.sort_index(axis=1).dropna().sort_values(by=("S0", "App"), ascending=False)
max_delay_selfreport = max_delay_selfreport.round(2)
dict_sample_times["Max_Sampling_Delay_SR_App"] = max_delay_selfreport

max_delay_selfreport

#### Histogram

In [None]:
col = "time_diff_to_naive_min"

reporting_type_order = reporting_types_fine.copy()
reporting_type_order.remove("Naive")

data_hist = cort_samples.reindex(reporting_type_order, level="reporting_type")
data_hist[col] = np.around(data_hist[col])

fig, axs = plt.subplots(figsize=(12, 5), nrows=2, ncols=3, sharey=True, sharex=True)
axs = axs.flatten()

grouper = data_hist.groupby("reporting_type")

for reporting_type, ax in zip(reporting_type_order, axs):
    df = grouper.get_group(reporting_type)
    sns.histplot(data=df.reset_index(), x=col, stat="percent", ax=ax, binwidth=1)
    ax.minorticks_on()
    ax.set_title(reporting_type, fontsize="small")

fig.tight_layout()

### Selfreport without App vs. App

#### Paired Plots

In [None]:
data_plot = cort_samples.reindex(["Selfreport w/o App", "App"], level="reporting_type")
data_plot.index = data_plot.index.rename({"sample": "Sample"})

reporting_type_order = ["Selfreport w/o App", "App"]

fig, axs = multi_paired_plot_sampling_delay(data_plot, order=reporting_type_order)

fig.tight_layout()

#### By Delay Groups

In [None]:
delay_group_data = cort_samples.reindex(["Selfreport w/o App", "App"], level="reporting_type")
delay_group_data = categorize_sampling_adherence(delay_group_data)

delay_group_percent = delay_group_data.groupby("reporting_type").apply(
    lambda df: df.groupby("delay_group").apply(lambda d: len(d) / len(df) * 100)
)
delay_group_percent = delay_group_percent.round(2)
dict_sample_times["Adherence_App_Selfreport"] = delay_group_percent

delay_group_percent

### Selfreport without App vs. with App

#### Data Preparation

In [None]:
cort_samples_selfreport = cort_samples.reindex(["Selfreport w/o App", "Selfreport w/ App"], level="reporting_type")
cort_samples_selfreport.head()

#### Boxplot

In [None]:
fig, ax = plt.subplots()

bp.plotting.feature_boxplot(
    data=cort_samples_selfreport.reset_index(),
    x="sample",
    y="time_diff_to_naive_min",
    hue="reporting_type",
    hue_order=["Selfreport w/o App", "Selfreport w/ App"],
    legend_orientation="horizontal",
    legend_loc="upper center",
    rect=(0, 0, 1, 0.9),
    ax=ax,
    palette=cmaps.faculties_light,
)
ax.set_ylabel("$\Delta t$ [min]")
ax.set_xlabel("Sample")

for path in [img_path, paper_img_path]:
    bp.utils.file_handling.export_figure(fig, "img_boxplot_sampling_delay_selfreport", path, ["pdf", "png"])

### Sensor

#### Data Preparation

In [None]:
reporting_order = ["Sensor + Selfreport w/o App", "Sensor + App"]

data_sensor = multi_xs(cort_samples, reporting_order, level="reporting_type")
data_sensor.head()

#### Median & IQR

In [None]:
data_desc = median_iqr_saliva_samples(data_sensor, "time_diff_to_naive_min", ["reporting_type", "sample"])
data_desc.head()

#### By Delay Groups

In [None]:
delay_group_data = data_sensor.reindex(reporting_order, level="reporting_type")
delay_group_data = categorize_sampling_adherence(delay_group_data)

delay_group_percent = delay_group_data.groupby("reporting_type").apply(
    lambda df: df.groupby("delay_group").apply(lambda d: len(d) / len(df) * 100)
)
delay_group_percent = delay_group_percent.round(2)
dict_sample_times["Adherence_Sensor_App_Selfreport"] = delay_group_percent
delay_group_percent

## Export

In [None]:
bp.io.write_pandas_dict_excel(dict_sample_times, result_path.joinpath("sample_time_results.xlsx"))