# Saliva Data Analysis

In [None]:
from pathlib import Path

import pandas as pd
import numpy as np

import biopsykit as bp
from biopsykit.stats import StatsPipeline

from carwatch_analysis.saliva_helper import import_cortisol_raw, import_cortisol_features, analysis_saliva_raw, analysis_saliva_features
from carwatch_analysis.general_helper import describe_groups_df

from statannot import add_stat_annotation

import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
pg.options['round'] = 3

palette = bp.colors.fau_palette

theme_kwargs = {
    "context": "talk",
    "style": "ticks",
    "palette": palette
}
theme_kwargs_scale = {
    "context": "talk",
    "style": "ticks",
    "palette": palette,
    "font_scale": 1.25
}

sns.set_theme(**theme_kwargs)
#plt.rcParams['figure.figsize'] = (15,5)
plt.rcParams['figure.figsize'] = (10,5)
plt.rcParams["mathtext.default"] = "regular"
plt.rcParams['pdf.fonttype'] = 42
plt.close('all')

pad = 0.2

export = True
palette

In [None]:
base_path = Path("../..")
export_path = base_path.joinpath("exports")
result_path = base_path.joinpath("results")
plot_path = result_path.joinpath("plots")
stats_path = result_path.joinpath("statistics")

bp.utils.file_handling.mkdirs([result_path, plot_path, stats_path])

## Import

### Raw Cortisol

In [None]:
cort_samples = import_cortisol_raw(export_path.joinpath("cortisol_samples_cleaned.csv"))
cort_samples.head()

### Cortsol Features

In [None]:
cort_features = import_cortisol_features(export_path.joinpath("cortisol_features_cleaned.csv"))
cort_features.head()

## Plots and Statistics

In [None]:
xlabel_lp = "Time after Awakening [min]"
xticklabels_lp = [i*15 for i in range(5)]
ylabel_lp = "Cortisol [nmol/l]"
#ylim = [5, 16]

### Weekend vs. Weekday

#### Class Distribution

In [None]:
describe_groups_df(cort_samples, "weekend")

In [None]:
order = ["Spontaneous", "Known Alarm", "Unknown Alarm"]

df_stacked = bp.utils.dataframe_handling.stack_groups_percent(cort_samples, hue="condition", stacked="weekend", order=order)

display(df_stacked.T)

fig, ax = plt.subplots()
bp.plotting.stacked_barchart(data=df_stacked, order=order, ylabel="Recorded Nights [%]", ax=ax)
fig.tight_layout()

if export:
    fig.savefig(plot_path.joinpath("img_barchart_weekdays.pdf"), transparent=True)

#### Raw Cortisol

##### Plots

In [None]:
x = 'sample'
y = 'cortisol'
hue = 'weekend'
style = 'weekend'

dv = y
group = hue

fig, ax = plt.subplots(figsize=(10,5))
bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, style=style, ax=ax, xlabel=xlabel_lp, xticklabels=xticklabels_lp, ylabel=ylabel_lp)

fig.tight_layout()
if export:
    fig.savefig(plot_path.joinpath("img_car_weekend.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_raw(cort_samples, variable="weekend")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_raw_weekend.xlsx"))
pipeline.display_results(posthoc=False)

#### Features

##### Plots

In [None]:
x = 'saliva_feature'
y = 'cortisol'
hue = 'weekend'

cats = ["auc", "slope", "max_inc"]

notch = False

fig, axs = plt.subplots(figsize=(12,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()    
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, ax=ax, notch=notch)
    h, l = ax.get_legend_handles_labels()
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.85, 1.0))
if export:
    fig.savefig(plot_path.joinpath("img_boxplots_car_features_weekend.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_features(cort_features, variable="weekend")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_features_weekend.xlsx"))
pipeline.display_results(posthoc=False)

### Chronotype

#### Class Distribution

In [None]:
describe_groups_df(cort_samples, "chronotype")

#### Raw Cortisol

##### Plots

In [None]:
x = 'sample'
y = 'cortisol'

hue = 'chronotype'
style = 'chronotype'

hue_order = ['Morning', 'Intermediate', 'Evening']

fig, ax = plt.subplots()
bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, hue_order=hue_order, style=style, xlabel=xlabel_lp, xticklabels=xticklabels_lp, ylabel=ylabel_lp, ax=ax)
fig.tight_layout()
if export:
    fig.savefig(plot_path.joinpath("img_car_chronotype.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_raw(cort_samples, variable="chronotype")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_raw_chronotype.xlsx"))
pipeline.display_results(posthoc=True)

#### Features

##### Plots

In [None]:
x = 'saliva_feature'
y = 'cortisol'
hue = 'chronotype'

cats = ["auc", "slope", "max_inc"]
order = ['Morning', 'Intermediate', 'Evening']

notch = False

fig, axs = plt.subplots(figsize=(12,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()    
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, hue_order=order, ax=ax, notch=notch)
    h, l = ax.get_legend_handles_labels()
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.85, 1.0))

if export:
    fig.savefig(plot_path.joinpath("img_boxplot_car_features_chronotype.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_features(cort_features, variable="chronotype", test_type='welch_anova')
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_features_chronotype.xlsx"))
pipeline.display_results(posthoc=False)

In [None]:
cort_features.reset_index().groupby("saliva_feature").apply(lambda df: pg.linear_regression(df['MEQ'], df['cortisol'], remove_na=True))

### Within Ideal Bedtime

#### Class Distribution

In [None]:
describe_groups_df(cort_samples, "within_ideal_bed_time")

#### Raw Cortisol

##### Plots

In [None]:
x = 'sample'
y = 'cortisol'
hue = 'within_ideal_bed_time'
style = 'within_ideal_bed_time'

fig, ax = plt.subplots()

bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, style=style, xlabel=xlabel_lp, xticklabels=xticklabels_lp, ylabel=ylabel_lp, ax=ax)

fig.tight_layout()
if export:
    fig.savefig(plot_path.joinpath("img_car_within_bedtime.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_raw(cort_samples, variable="within_ideal_bed_time")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_raw_ideal_bedtime.xlsx"))
pipeline.display_results(posthoc=False)

#### Features

##### Plots

In [None]:
x = 'saliva_feature'
y = 'cortisol'
hue = 'within_ideal_bed_time'

cats = ["auc", "slope", "max_inc"]

notch = False

fig, axs = plt.subplots(figsize=(12,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()    
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, ax=ax, notch=notch)
    h, l = ax.get_legend_handles_labels()
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.85, 1.0))
if export:
    fig.savefig(plot_path.joinpath("img_boxplot_car_wakeup_source.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_features(cort_features, variable="within_ideal_bed_time")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_features_ideal_bedtime.xlsx"))
pipeline.display_results(posthoc=False)

### Wakeup Source

#### Raw Cortisol

##### Plots

In [None]:
x = 'sample'
y = 'cortisol'

hue = 'wakeup_source'
style = 'wakeup_source'

hue_order = ['Spontaneous', 'Alarm', 'Not Specified']

fig, ax = plt.subplots()
bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, hue_order=hue_order, style=style, ax=ax)
fig.tight_layout()
if export:
    fig.savefig(plot_path.joinpath("img_car_wakeup_source.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_raw(cort_samples, variable="wakeup_source")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_raw_wakeup_source.xlsx"))
pipeline.display_results(posthoc=False)

#### Features

##### Plots

In [None]:
cort_features.index.get_level_values('wakeup_source').unique()

In [None]:
x = 'saliva_feature'
y = 'cortisol'
hue = 'wakeup_source'

notch = False

order = ['Alarm', 'Spontaneous', 'Not Specified']
cats = ["auc", "slope", "max_inc"]

fig, axs = plt.subplots(figsize=(12,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()    
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, hue_order=order, ax=ax, notch=notch)
    h, l = ax.get_legend_handles_labels()
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.85, 1.0))
if export:
    fig.savefig(plot_path.joinpath("img_boxplot_car_features_wakeup_source.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_features(cort_features, variable="wakeup_source")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_features_wakeup_source.xlsx"))
pipeline.display_results(posthoc=False)

### Condition

#### Class Distribution

In [None]:
describe_groups_df(cort_samples, "condition")

#### Raw Cortisol

##### Plots

In [None]:
x = 'sample'
y = 'cortisol'
hue = 'condition'
style = 'condition'

hue_order = ['Spontaneous', 'Known Alarm', 'Unknown Alarm']

fig, ax = plt.subplots()
bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, hue_order=hue_order, style=style, xlabel=xlabel_lp, xticklabels=xticklabels_lp, ylabel=ylabel_lp, ax=ax)

fig.tight_layout()
if export:
    fig.savefig(plot_path.joinpath("img_car_condition.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_raw(cort_samples, variable="condition")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_raw_condition.xlsx"))
pipeline.display_results(posthoc=False)

#### Features

##### Plots

In [None]:
cort_features

In [None]:
x = 'saliva_feature'
y = 'cortisol'
hue = 'condition'

notch = False

cats = ["auc_i", "slope", "max"]

boxplot_pairs = {
    'auc_i': [(('auc_i', 'Spontaneous'),('auc_i', 'Unknown Alarm'))],
    'slope': [(('slopeS0S3', 'Known Alarm'), ('slopeS0S3', 'Unknown Alarm')), (('slopeS0S3', 'Spontaneous'), ('slopeS0S3', 'Unknown Alarm')), (('slopeS0S4', 'Known Alarm'), ('slopeS0S4', 'Unknown Alarm')), (('slopeS0S4', 'Spontaneous'), ('slopeS0S4', 'Unknown Alarm'))],
    'max': [(('max_inc', 'Known Alarm'), ('max_inc', 'Unknown Alarm')), (('max_inc', 'Spontaneous'), ('max_inc', 'Unknown Alarm'))]
}

xticklabels = {
    'auc_i': ["$AUC_I$"],
    'slope': ["$a_{S0S3}$", "$a_{S0S4}$"],
    'max': ["Max", "$inc_{max}$"]
}

ylabels = {
    'auc_i': r"Cortisol AUC $\left[\frac{nmol \cdot min}{l} \right]$",
    'slope': r"Cortisol Slope $\left[\frac{nmol}{l \cdot min} \right]$",
    'max': "Cortisol [nmol/l]"
}

fig, axs = plt.subplots(figsize=(15,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, hue_order=order, ax=ax, notch=notch)
    box_pairs = boxplot_pairs.get(cat, [])
    if len(box_pairs) > 0:
        add_stat_annotation(ax=ax, data=data_plot.reset_index(), box_pairs=box_pairs, x=x, y=y, hue=hue, hue_order=order, test='t-test_ind', comparisons_correction=None, pvalue_thresholds=[[1e-3, "***"], [1e-2, "**"], [0.05, "*"]])
    
    h, l = ax.get_legend_handles_labels()
    ax.set_ylabel(ylabels[cat])
    ax.set_xlabel(None)
    ax.set_xticklabels(xticklabels[cat])
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.84, 1.0))
if export:
    fig.savefig(plot_path.joinpath("img_boxplots_car_condition.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_features(cort_features, variable="condition", test_type='anova')
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_features_condition.xlsx"))
pipeline.display_results(posthoc=True)

### Pairplots

In [None]:
g = sns.pairplot(data=cort_samples['cortisol'].unstack('sample').reset_index('condition'), hue='condition', corner=True)

In [None]:
g = sns.pairplot(data=np.log(cort_samples['cortisol'].unstack('sample')).reset_index('condition'), hue='condition', corner=True)
g = g.map_lower(sns.kdeplot, levels=3, color=".2", alpha=0.5)

In [None]:
g = sns.pairplot(data=cort_features['cortisol'].unstack('saliva_feature').reset_index('condition'), hue='condition', corner=True)
#g = g.map_lower(sns.kdeplot, levels=3, color=".2", alpha=0.5)