# Sample Time Analysis

In [11]:
from pathlib import Path
import json

import pandas as pd
import numpy as np
import scipy.stats as stats
import pingouin as pg

import biopsykit as bp
from biopsykit.stats import StatsPipeline
from biopsykit.io import load_long_format_csv
from biopsykit.utils.dataframe_handling import multi_xs

from fau_colors import cmaps

import matplotlib.pyplot as plt
import seaborn as sns

from carwatch_analysis.io import load_cortisol_samples_log_times
from carwatch_analysis.datasets import CarWatchDatasetProcessed
from carwatch_analysis.data_processing.sample_times import (
    compute_cumulative_sampling_delay,
    categorize_sampling_adherence,
)
from carwatch_analysis.stats import create_unique_night_id, median_iqr_saliva_samples
from carwatch_analysis.plotting import multi_boxplot_sampling_delay, multi_paired_plot_sampling_delay

%matplotlib widget
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)

theme_kwargs = {"context": "talk", "style": "ticks", "palette": palette}
sns.set_theme(**theme_kwargs)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

export = True

pg.options["round"] = 4

palette

## Setup Paths

In [13]:
deploy_type = "develop"

In [14]:
# build path to data folder
config_dict = json.load(Path("../../../config.json").open(encoding="utf-8"))
data_path = Path("..").joinpath(config_dict[deploy_type]["base_path"])
data_path

PosixPath('/Users/Richer/Documents/PhD/Projects/HealthPsychology/CARWatch/Data')

In [15]:
dataset = CarWatchDatasetProcessed(data_path)
dataset

Unnamed: 0,subject,night
0,AB19E,0
1,AB19E,1
2,AB31R,0
3,AB31R,1
4,AC12E,0
...,...,...
229,VE19A,1
230,VS09S,0
231,VS09S,1
232,WM13K,0


In [16]:
base_path = Path("../..")
export_path = base_path.joinpath("exports")
result_path = base_path.joinpath("results")
stats_path = result_path.joinpath("statistics")

img_path = result_path.joinpath("plots")

paper_path = Path(json.load(Path("../paper_path.json").open(encoding="utf-8"))["paper_path"])
paper_img_path = paper_path.joinpath("img")

bp.utils.file_handling.mkdirs([result_path, stats_path, img_path, paper_img_path])

### Cortisol Samples

In [17]:
cort_path = export_path.joinpath("cortisol_samples_processed_all_log_types.csv")
cort_samples = load_cortisol_samples_log_times(cort_path)
# rename condition for
cort_samples = cort_samples.rename({"Spontaneous": "Spontaneous Awakening"}, level="condition")
cort_samples.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,date,wake_onset,sample_time,cortisol,time_diff_to_wake_onset,time_diff_min,time_diff_to_naive_min
subject,night,night_id,condition,log_type,sample,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
AB19E,0,AB19E_0,Known Alarm,Naive,S0,2019-11-18,0 days 05:45:00,0 days 05:45:00,2.8,0 days 00:00:00,0.0,0.0
AB19E,0,AB19E_0,Known Alarm,Naive,S1,2019-11-18,0 days 05:45:00,0 days 06:00:00,5.59,0 days 00:15:00,15.0,0.0
AB19E,0,AB19E_0,Known Alarm,Naive,S2,2019-11-18,0 days 05:45:00,0 days 06:15:00,13.29,0 days 00:30:00,30.0,0.0
AB19E,0,AB19E_0,Known Alarm,Naive,S3,2019-11-18,0 days 05:45:00,0 days 06:30:00,13.46,0 days 00:45:00,45.0,0.0
AB19E,0,AB19E_0,Known Alarm,Naive,S4,2019-11-18,0 days 05:45:00,0 days 06:45:00,12.65,0 days 01:00:00,60.0,0.0


In [18]:
conditions = ["Spontaneous Awakening", "Known Alarm", "Unknown Alarm"]
log_types_fine = [
    "Naive",
    "Selfreport without App",
    "Selfreport with App",
    "App",
    "Sensor + Selfreport without App",
    "Sensor + Selfreport with App",
    "Sensor + App",
]
log_types_coarse = ["Naive", "Selfreport", "App", "Sensor + Selfreport", "Sensor + App"]

In [None]:
dict_sample_times = {}

## Sample Times

### All Log Types

#### Median & IQR

In [None]:
variable = "time_diff_to_naive_min"
data_desc = median_iqr_saliva_samples(cort_samples, "time_diff_to_naive_min", ["log_type", "sample"])
data_desc = data_desc.reindex(log_types_fine[1:])
data_desc.columns = data_desc.columns.set_names(variable, level=-1)
data_desc = data_desc.round(2)
dict_sample_times["Sampling_Delay_Mean_IQR"] = data_desc

data_desc

#### Cumulated Sampling Delay

In [None]:
cum_sampling_delay = cort_samples["time_diff_to_naive_min"].drop("Naive", level="log_type").unstack("sample")
cum_sampling_delay = compute_cumulative_sampling_delay(cum_sampling_delay)
cum_sampling_delay = cum_sampling_delay.reindex(log_types_fine[1:]).round(2)
dict_sample_times["Cumulative_Sampling_Delay"] = cum_sampling_delay

cum_sampling_delay

#### Boxplots

In [None]:
data_plot = cort_samples.drop("Naive", level="log_type")

log_type_order = log_types_fine.copy()
log_type_order.remove("Naive")
log_type_order.remove("Selfreport with App")
log_type_order.remove("Sensor + Selfreport with App")

fig, axs = multi_boxplot_sampling_delay(data_plot, order=log_type_order, figsize=(12, 4))

for path in [img_path, paper_img_path]:
    # fig.savefig(path.joinpath("img_log_time_differences.pdf"), transparent=True)
    bp.utils.file_handling.export_figure(
        fig, filename="img_boxplot_sampling_delay", base_dir=path, formats=["png", "pdf"], dpi=300
    )

#### Paired Plots

In [None]:
data_plot = cort_samples.copy()

log_type_order = log_types_fine.copy()
log_type_order.remove("Naive")
log_type_order.remove("Selfreport with App")
log_type_order.remove("Sensor + Selfreport with App")

fig, axs = multi_paired_plot_sampling_delay(data_plot, order=log_type_order, figsize=(12, 4))

for path in [img_path, paper_img_path]:
    # fig.savefig(path.joinpath("img_pair_plot_sampling_delay.pdf"), transparent=True)
    bp.utils.file_handling.export_figure(fig, "img_pairedplot_sampling_delay", path, formats=["png", "pdf"], dpi=300)

#### Compare Highest App-based Sampling Delays with Selfreport Sampling Delays

Get the three CARs with the highest $\Delta s_0$ for *App* (see Paired Plot)

In [None]:
cars_max_delay = cort_samples.xs("App", level="log_type")["time_diff_to_naive_min"]
cars_max_delay = cars_max_delay.unstack("sample")["S0"].sort_values(ascending=False).iloc[0:3]
cars_max_delay = cars_max_delay.index.get_level_values("night_id")
cars_max_delay

In [None]:
max_delay_selfreport = cort_samples["time_diff_to_naive_min"].reindex(cars_max_delay, level="night_id")
max_delay_selfreport = max_delay_selfreport.reindex(["Selfreport", "App"], level="log_type").unstack(
    ["sample", "log_type"]
)
max_delay_selfreport = max_delay_selfreport.sort_index(axis=1).dropna().sort_values(by=("S0", "App"), ascending=False)
max_delay_selfreport = max_delay_selfreport.round(2)
dict_sample_times["Max_Sampling_Delay_SR_App"] = max_delay_selfreport

max_delay_selfreport

#### Histogram

In [None]:
col = "time_diff_to_naive_min"

log_type_order = log_types_fine.copy()
log_type_order.remove("Naive")

data_hist = cort_samples.reindex(log_type_order, level="log_type")
data_hist[col] = np.around(data_hist[col])

fig, axs = plt.subplots(figsize=(12, 5), nrows=2, ncols=3, sharey=True, sharex=True)
axs = axs.flatten()

grouper = data_hist.groupby("log_type")

for log_type, ax in zip(log_type_order, axs):
    df = grouper.get_group(log_type)
    sns.histplot(data=df.reset_index(), x=col, stat="percent", ax=ax, binwidth=1)
    ax.minorticks_on()
    ax.set_title(log_type, fontsize="small")

fig.tight_layout()

### Selfreport without App vs. App

#### Paired Plots

In [None]:
data_plot = cort_samples.reindex(["Selfreport without App", "App"], level="log_type")
data_plot.index = data_plot.index.rename({"sample": "Sample"})

log_type_order = ["Selfreport without App", "App"]

fig, axs = multi_paired_plot_sampling_delay(data_plot, order=log_type_order)

fig.tight_layout()

#### By Delay Groups

In [None]:
delay_group_data = cort_samples.reindex(["Selfreport without App", "App"], level="log_type")
delay_group_data = categorize_sampling_adherence(delay_group_data)

delay_group_percent = delay_group_data.groupby("log_type").apply(
    lambda df: df.groupby("delay_group").apply(lambda d: len(d) / len(df) * 100)
)
delay_group_percent = delay_group_percent.round(2)
dict_sample_times["Adherence_App_Selfreport"] = delay_group_percent

delay_group_percent

### Selfreport without App vs. with App

#### Data Preparation

In [None]:
cort_samples_selfreport = cort_samples.reindex(["Selfreport without App", "Selfreport with App"], level="log_type")
cort_samples_selfreport.head()

#### Boxplot

In [None]:
# data_analysis = cort_samples_selfreport.copy()
#
# pipeline = StatsPipeline(
#    steps=[
#        ("prep", "normality"),
#        ("prep", "equal_var"),
#        ("test", "mixed_anova"),
#        ("posthoc", "pairwise_ttests")
#    ],
#    params={
#        "dv": "time_diff_to_naive_min",
#        "within": "sample",
#        "between": "log_type",
#        "subject": "night_id",
#        "multicomp": {"method": "bonf"}
#    }
# )
#
# pipeline.apply(data_analysis)
# pipeline.display_results(prep=True, posthoc=False)

In [None]:
fig, ax = plt.subplots()

bp.plotting.feature_boxplot(
    data=cort_samples_selfreport.reset_index(),
    x="sample",
    y="time_diff_to_naive_min",
    hue="log_type",
    hue_order=["Selfreport without App", "Selfreport with App"],
    legend_orientation="horizontal",
    legend_loc="upper center",
    rect=(0, 0, 1, 0.9),
    ax=ax,
)
ax.set_ylabel("$\Delta s$ [min]")
ax.set_xlabel("Sample")

for path in [img_path, paper_img_path]:
    bp.utils.file_handling.export_figure(fig, "img_boxplot_sampling_delay_selfreport", path, ["pdf", "png"])

### Sensor

#### Data Preparation

In [None]:
log_order = ["Sensor + Selfreport without App", "Sensor + App"]

data_sensor = multi_xs(cort_samples, log_order, level="log_type")
data_sensor.head()

#### Median & IQR

In [None]:
data_desc = median_iqr_saliva_samples(data_sensor, "time_diff_to_naive_min", ["log_type", "sample"])
data_desc.head()

#### By Delay Groups

In [None]:
delay_group_data = data_sensor.reindex(log_order, level="log_type")
delay_group_data = categorize_sampling_adherence(delay_group_data)

delay_group_percent = delay_group_data.groupby("log_type").apply(
    lambda df: df.groupby("delay_group").apply(lambda d: len(d) / len(df) * 100)
)
delay_group_percent = delay_group_percent.round(2)
dict_sample_times["Adherence_Sensor_App_Selfreport"] = delay_group_percent
delay_group_percent

## Export

In [None]:
bp.io.write_pandas_dict_excel(dict_sample_times, result_path.joinpath("sample_time_results.xlsx"))