# Sample Time Analysis – All Log Types

In [None]:
from pathlib import Path
import json

import pandas as pd
import numpy as np
import scipy.stats as stats
import pingouin as pg

import biopsykit as bp
from biopsykit.stats import StatsPipeline
from biopsykit.io import load_long_format_csv
from biopsykit.utils.dataframe_handling import multi_xs

from fau_colors import cmaps

import matplotlib.pyplot as plt
import seaborn as sns

from carwatch_analysis.io import load_cortisol_samples_log_times
from carwatch_analysis.datasets import CarWatchDatasetProcessed

from carwatch_analysis.stats import (
    create_unique_night_id,
    median_iqr_saliva_samples
)

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)

theme_kwargs = {"context": "talk", "style": "ticks", "palette": palette}
theme_kwargs_grid = {"context": "talk", "style": "ticks", "palette": palette, "font_scale": 0.8}
sns.set_theme(**theme_kwargs)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

export = True

pg.options["round"] = 4

palette

## Setup Paths

In [None]:
deploy_type = "develop"

In [None]:
# build path to data folder
config_dict = json.load(Path("../../../config.json").open(encoding="utf-8"))
data_path = Path("..").joinpath(config_dict[deploy_type]["base_path"])
data_path

In [None]:
dataset = CarWatchDatasetProcessed(data_path)
dataset

In [None]:
base_path = Path("../..")
export_path = base_path.joinpath("exports")
result_path = base_path.joinpath("results")
stats_path = result_path.joinpath("statistics")

img_path = result_path.joinpath("plots")

paper_path = Path(json.load(Path("../paper_path.json").open(encoding="utf-8"))["paper_path"])
paper_img_path = paper_path.joinpath("img")

bp.utils.file_handling.mkdirs([result_path, stats_path, img_path, paper_img_path])

## Load Data

### Cortisol Samples

In [None]:
cort_path = export_path.joinpath("cortisol_samples_processed_all_log_types.csv")
cort_samples = load_cortisol_samples_log_times(cort_path)
cort_samples = cort_samples.rename({"Spontaneous": "Spontaneous Awakening"}, level="condition")
cort_samples.head()

In [None]:
conditions = ["Spontaneous Awakening", "Known Alarm", "Unknown Alarm"]
log_types_fine = [
    "Naive", 
    "Selfreport without App", 
    "Selfreport with App", 
    "App", 
    "Sensor + Selfreport without App", 
    "Sensor + Selfreport with App", 
    "Sensor + App"
]
log_types_coarse = ["Naive", "Selfreport", "App", "Sensor + Selfreport", "Sensor + App"]

## Sample Times

### All Log Types

#### Median & IQR

In [None]:
variable = "time_diff_to_naive_min"
data_desc = median_iqr_saliva_samples(cort_samples, "time_diff_to_naive_min", ["log_type", "sample"])
data_desc = data_desc.reindex(log_types_fine[1:])
data_desc.columns = data_desc.columns.set_names(variable, level=-1)
data_desc.to_csv(result_path.joinpath("log_time_difference_mean_iqr.csv"))

data_desc

#### Cumulated Sampling Delay

In [None]:
cum_sampling_delay = cort_samples["time_diff_to_naive_min"].drop("Naive", level="log_type").unstack("sample")
cum_sampling_delay = cum_sampling_delay["S4"] - cum_sampling_delay["S0"]
cum_sampling_delay = pd.DataFrame(cum_sampling_delay, columns=["cum_sampling_delay"])
cum_sampling_delay = cum_sampling_delay.reindex(log_types_fine[1:], level="log_type")
cum_sampling_delay.groupby("log_type").agg(["median", stats.iqr]).round(2)

#### Boxplots

In [None]:
fig, axs = plt.subplots(figsize=(12, 4), ncols=4, sharey=True)

data_plot = cort_samples.drop("Naive", level="log_type")
data_plot.index = data_plot.index.rename({"sample": "Sample"})
data_group = data_plot.groupby("log_type")

order = log_types_fine.copy()
order.remove("Naive")
order.remove("Selfreport with App")
order.remove("Sensor + Selfreport with App")

for i, (key, ax) in enumerate(zip(order, axs)):
    df = data_group.get_group(key)
    bp.plotting.feature_boxplot(
        data=df.reset_index(), x="Sample", y="time_diff_to_naive_min", ax=ax
    )
    ax.set_title(key)

    if i == 0:
        ax.set_ylabel("$\Delta s$ [min]")
    else:
        ax.set_ylabel(None)

fig.tight_layout()
#for path in [img_path, paper_img_path]:
    #fig.savefig(path.joinpath("img_log_time_differences.pdf"), transparent=True)
#    bp.utils.file_handling.export_figure(fig, "img_boxplot_sampling_delay", path, formats=["png", "pdf"])

#### Paired Plots

In [None]:
fig, axs = plt.subplots(figsize=(12,5), ncols=4, sharey=True)

data_plot = cort_samples.copy()
data_plot.index = data_plot.index.rename({"sample": "Sample"})
data_group = data_plot.groupby("log_type")

order = log_types_fine.copy()
order.remove("Naive")
order.remove("Selfreport with App")
order.remove("Sensor + Selfreport with App")

for key, ax in zip(order, axs):
    df = data_group.get_group(key)
    pg.plot_paired(
        data=df.reset_index(), 
        dv="time_diff_to_naive_min", 
        within="Sample", 
        subject="night_id", 
        pointplot_kwargs={"alpha": 0.5}, 
        boxplot_in_front=True,
        ax=ax
    )
    ax.set_title(key, fontsize="smaller")

axs[0].set_ylabel("$\Delta s$ [min]")

fig.tight_layout()
for path in [img_path, paper_img_path]:
    #fig.savefig(path.joinpath("img_pair_plot_sampling_delay.pdf"), transparent=True)
    bp.utils.file_handling.export_figure(fig, "img_pairedplot_sampling_delay", path, formats=["png", "pdf"])

#### Compare Highest App-based Sampling Delays with Selfreport Sampling Delays

Get the three CARs with the highest $\Delta s_0$ for *App* (see Paired Plot)

In [None]:
cars_max_delay = cort_samples.xs("App", level="log_type")["time_diff_to_naive_min"]
cars_max_delay = cars_max_delay.unstack("sample")["S0"].sort_values(ascending=False).iloc[0:3]
cars_max_delay = cars_max_delay.index.get_level_values("night_id")
cars_max_delay

In [None]:
max_delay_selfreport = cort_samples["time_diff_to_naive_min"].reindex(cars_max_delay, level="night_id")
max_delay_selfreport = max_delay_selfreport.reindex(["Selfreport", "App"], level="log_type").unstack(["sample", "log_type"])
max_delay_selfreport = max_delay_selfreport.sort_index(axis=1).dropna().sort_values(by=("S0", "App"), ascending=False)
max_delay_selfreport

#### Histogram

In [None]:
fig, ax = plt.subplots(figsize=(12,3))

col = "time_diff_to_naive_min"

log_type_order = log_types_fine.copy()
log_type_order.remove("Naive")

data_hist = cort_samples.reindex(log_type_order, level="log_type")
data_hist[col] = np.around(data_hist[col])

sns.histplot(
    data=data_hist.reset_index(), 
    x=col, 
    hue="log_type", 
    hue_order=log_type_order,
    stat="percent", 
    binwidth=1,
    common_norm=False, 
    ax=ax
)
ax.minorticks_on()
l, h = ax.get_legend_handles_labels()
fig.tight_layout()


fig, axs = plt.subplots(figsize=(12,5), nrows=2, ncols=3, sharey=True, sharex=True)
axs = axs.flatten()

grouper = data_hist.groupby("log_type")

for log_type, ax in zip(log_type_order, axs):
    df = grouper.get_group(log_type)
    sns.histplot(
        data=df.reset_index(), x=col, stat="percent", ax=ax, binwidth=1
    )
    ax.minorticks_on()
    ax.set_title(log_type, fontsize="small")

fig.tight_layout()

### Selfreport without App vs. App

#### Paired Plots

In [None]:
fig, axs = plt.subplots(ncols=2, sharey=True)

data_plot = cort_samples.reindex(["Selfreport without App", "App"], level="log_type")
data_plot.index = data_plot.index.rename({"sample": "Sample"})
data_group = data_plot.groupby("log_type")

order = ["Selfreport without App", "App"]

for key, ax in zip(order, axs):
    df = data_group.get_group(key)
    pg.plot_paired(
        data=df.reset_index(), 
        dv="time_diff_to_naive_min", 
        within="Sample", 
        subject="night_id", 
        pointplot_kwargs={"alpha": 0.5}, 
        boxplot_in_front=True,
        ax=ax
    )
    ax.set_title(key)
    ax.set_title(key.replace("_", " + "))

axs[0].set_ylabel("$\Delta s$ [min]")

fig.tight_layout()

#### By Delay Groups

In [None]:
wo_s0_data = cort_samples.reindex(["Selfreport without App", "App"], level="log_type")
wo_s0_data = wo_s0_data.xs("S0", level="sample")["time_diff_to_naive_min"]
wo_s0_group = pd.cut(
    wo_s0_data, 
    bins=[wo_s0_data.min(), 5, wo_s0_data.max()], 
    include_lowest=True, 
    labels=["Adherent", "Non-adherent"]
)
wo_s0_group.name = "delay_group"
wo_s0_data = pd.DataFrame(wo_s0_data).join(wo_s0_group).set_index("delay_group", append=True)
delay_group_percent = wo_s0_data.groupby("log_type").apply(
    lambda df: df.groupby("delay_group").apply(lambda d: len(d) / len(df) * 100)
)
delay_group_percent

### Selfreport without App vs. with App

#### Data Preparation

In [None]:
cort_samples_selfreport = cort_samples.reindex(["Selfreport without App", "Selfreport with App"], level="log_type")
cort_samples_selfreport.head()

#### Boxplot

In [None]:
data_analysis = cort_samples_selfreport.copy()

pipeline = StatsPipeline(
    steps=[
        ("prep", "normality"), 
        ("prep", "equal_var"), 
        ("test", "mixed_anova"), 
        ("posthoc", "pairwise_ttests")
    ],
    params={
        "dv": "time_diff_to_naive_min", 
        "within": "sample", 
        "between": "log_type",
        "subject": "night_id",
        "multicomp": {"method": "bonf"}
    }
)

pipeline.apply(data_analysis)
pipeline.display_results(prep=True, posthoc=False)

In [None]:
fig, ax = plt.subplots()

bp.plotting.feature_boxplot(
    data=cort_samples_selfreport.reset_index(), 
    x="sample", 
    y="time_diff_to_naive_min", 
    hue="log_type",
    hue_order=["Selfreport without App", "Selfreport with App"],
    legend_orientation="horizontal",
    legend_loc="upper center",
    rect=(0, 0, 1, 0.9),
    ax=ax
);
ax.set_ylabel("$\Delta s$ [min]")
ax.set_xlabel("Sample")

for path in [img_path, paper_img_path]:
    bp.utils.file_handling.export_figure(fig, "img_sampling_delay_selfreport", path, ["pdf", "png"])

### Sensor

#### Data Preparation

In [None]:
log_order = ["Sensor + Selfreport without App", "Sensor + App"]

data_sensor = multi_xs(cort_samples, log_order, level="log_type")
data_sensor.head()

#### Median & IQR

In [None]:
data_desc = median_iqr_saliva_samples(data_sensor, "time_diff_to_naive_min", ["log_type", "sample"])
data_desc.head()

#### By Delay Groups

In [None]:
wo_s0_data = data_sensor.xs("S0", level="sample")["time_diff_to_naive_min"]
wo_s0_group = pd.cut(
    wo_s0_data, 
    bins=[wo_s0_data.min(), 5, wo_s0_data.max()], 
    include_lowest=True, 
    labels=["Adherent", "Non-adherent"]
)

wo_s0_group.name = "delay_group"
wo_s0_data = pd.DataFrame(wo_s0_data).join(wo_s0_group).set_index("delay_group", append=True)

delay_group_percent = wo_s0_data.groupby("log_type").apply(
    lambda df: df.groupby("delay_group").apply(lambda d: len(d) / len(df) * 100)
).reindex(log_order)
delay_group_percent

## Awakening and Sampling Time Unit Digits

In [None]:
log_type_order = ["Selfreport", "App"]
condition_order = ["Known Alarm", "Spontaneous Awakening", "Unknown Alarm"]

### Data Preparation

In [None]:
cort_samples = cort_samples.assign(
    **{
        "sample_minute": cort_samples["sample_time"].dt.components["minutes"] % 10,
        "wake_onset_minute": cort_samples["wake_onset"].dt.components["minutes"] % 10
    }
)

cort_samples.head()

### Awakening Times

#### Percents

In [None]:
wo_minutes = cort_samples["wake_onset_minute"].reindex(log_type_order, level="log_type")
wo_minutes = wo_minutes.groupby(["condition", "log_type"]).value_counts(normalize=True) * 100
wo_minutes = wo_minutes.unstack(["wake_onset_minute"]).fillna(0).round(0)
wo_minutes

#### Histogram Plots

In [None]:
from matplotlib.ticker import MultipleLocator

sns.set_theme(**theme_kwargs_grid)

fig = plt.figure(figsize=(8, 9), constrained_layout=True)
fig.suptitle("Awakening Times")

subfigs = fig.subfigures(nrows=3, ncols=1, hspace=0.05)

data_analysis = cort_samples.reindex(log_type_order, level="log_type")
grouper_condition = data_analysis.groupby("condition")

ylims = [0, 65]

for condition, subfig in zip(condition_order, subfigs):
    subfig.suptitle(condition, fontsize="medium")
    grouper_log_type = grouper_condition.get_group(condition).groupby("log_type")
    
    # create 1x2 subplots per subfig
    axs = subfig.subplots(nrows=1, ncols=2, gridspec_kw={"wspace": 0.1})
    for log_type, ax in zip(log_type_order, axs):
        df = grouper_log_type.get_group(log_type)
        sns.histplot(
            data=df.reset_index(), 
            x="wake_onset_minute", 
            stat="percent", 
            bins=10, 
            binrange=[0,9], 
            discrete=True, 
            ax=ax
        )
        ax.set_xticks(np.arange(0,10))
        ax.yaxis.set_major_locator(MultipleLocator(20))
        ax.yaxis.set_minor_locator(MultipleLocator(10))
        ax.set_title(log_type)
        ax.set_xlabel("Unit Digit [min]")
        ax.set_ylabel("Frequency [%]")
        ax.set_ylim(ylims)

for path in [img_path, paper_img_path]:
    bp.utils.file_handling.export_figure(fig, "img_unit_digits_awakening_time", path, ["pdf", "png"])

### Sampling Times

#### Percents

In [None]:
sample_minutes = cort_samples["sample_minute"].reindex(log_type_order, level="log_type")
sample_minutes = sample_minutes.groupby(["log_type"]).value_counts(normalize=True) * 100
sample_minutes = sample_minutes.unstack(["sample_minute"]).fillna(0).round(0)
sample_minutes

#### Histogram Plots

##### All Conditions Combined

In [None]:
from matplotlib.ticker import MultipleLocator

sns.set_theme(**theme_kwargs_grid)

fig = plt.figure(figsize=(8, 3), constrained_layout=True)
fig.suptitle("Sampling Times")

subfig = fig.subfigures(nrows=1, ncols=1, hspace=0.05)
subfig.suptitle("All Conditions", fontsize="medium")

data_analysis = cort_samples.reindex(log_type_order, level="log_type")
grouper = data_analysis.groupby("log_type")

ylims = [0, 45]

axs = subfig.subplots(nrows=1, ncols=2, gridspec_kw={"wspace": 0.1})
for log_type, ax in zip(log_type_order, axs):
    
    df = grouper.get_group(log_type)
    
    sns.histplot(
        data=df.reset_index(), 
        x="sample_minute", 
        stat="percent", 
        bins=10, 
        binrange=[0,9], 
        discrete=True, 
        ax=ax
    )
    ax.set_xticks(np.arange(0,10))
    ax.yaxis.set_major_locator(MultipleLocator(20))
    ax.yaxis.set_minor_locator(MultipleLocator(10))
    ax.set_title(log_type)
    ax.set_xlabel("Unit Digit [min]")
    ax.set_ylabel("Frequency [%]")
    ax.set_ylim(ylims)


for path in [img_path, paper_img_path]:
    bp.utils.file_handling.export_figure(fig, "img_unit_digits_sampling_time", path, ["pdf", "png"])

##### Per Condition

In [None]:
from matplotlib.ticker import MultipleLocator

sns.set_theme(**theme_kwargs_grid)

fig = plt.figure(figsize=(8, 9), constrained_layout=True)
fig.suptitle("Sampling Times")

subfigs = fig.subfigures(nrows=3, ncols=1, hspace=0.05)

data_analysis = cort_samples.reindex(log_type_order, level="log_type")
grouper_condition = data_analysis.groupby("condition")

ylims = [0, 45]

for condition, subfig in zip(condition_order, subfigs):
    subfig.suptitle(condition, fontsize="medium")
    grouper_log_type = grouper_condition.get_group(condition).groupby("log_type")
    
    # create 1x3 subplots per subfig
    axs = subfig.subplots(nrows=1, ncols=2, gridspec_kw={"wspace": 0.1})
    for log_type, ax in zip(log_type_order, axs):
        df = grouper_log_type.get_group(log_type)
        sns.histplot(
            data=df.reset_index(), 
            x="sample_minute", 
            stat="percent", 
            bins=10, 
            binrange=[0,9], 
            discrete=True, 
            ax=ax
        )
        ax.set_xticks(np.arange(0,10))
        ax.yaxis.set_major_locator(MultipleLocator(20))
        ax.yaxis.set_minor_locator(MultipleLocator(10))
        ax.set_title(log_type)
        ax.set_xlabel("Unit Digit [min]")
        ax.set_ylabel("Frequency [%]")
        ax.set_ylim(ylims)

#for path in [img_path, paper_img_path]:
#    bp.utils.file_handling.export_figure(fig, "img_unit_digits_sampling_time", path, ["pdf", "png"])