# Saliva Analysis

In [None]:
from pathlib import Path

import pandas as pd
import numpy as np

import biopsykit as bp
from biopsykit.stats import StatsPipeline

from carwatch_analysis.saliva_helper import analysis_saliva_raw, analysis_saliva_features

from statannot import add_stat_annotation

import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib widget
#%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
sns.set(style='ticks', context='talk')
#plt.rcParams['figure.figsize'] = (15,5)
plt.rcParams['figure.figsize'] = (10,5)
plt.close('all')

save_fig = False

palette = bp.colors.fau_palette
sns.set_palette(palette)
palette

In [None]:
pg.options['round'] = 3

In [None]:
export_path = Path("../../exports")
plot_path = export_path.joinpath("plots")
stats_path = export_path.joinpath("statistics")

bp.utils.file_handling.mkdirs([plot_path, stats_path])

## Import

In [None]:
cort_samples = pd.read_csv(export_path.joinpath("cortisol_samples_cleaned.csv"))
# assign each night an unique_id to allow repeated measures analyses and insert into dataframe
cort_samples.insert(2, 'night_id', cort_samples['subject'] + '_' + cort_samples['night'].astype(str))
cort_samples = cort_samples.drop(columns='time')
cort_samples = cort_samples.set_index(list(cort_samples.columns.drop('cortisol')))

cort_samples.head()

In [None]:
cort_features = pd.read_csv(export_path.joinpath("cortisol_features_cleaned.csv"))
cort_features['biomarker'] = cort_features['biomarker'].str.replace("cortisol_", "")
cort_features = cort_features.set_index(list(cort_features.columns[:-1]))
cort_features.head()

## Plots and Statistics

In [None]:
xlabel_lp = "Time after Awakening [min]"
xticklabels_lp = [i*15 for i in range(5)]
ylabel_lp = "Cortisol [nmol/l]"
#ylim = [5, 16]

### Weekend vs. Weekday

#### Class Distribution

In [None]:
cort_samples.unstack().mean(axis=1).groupby("weekend").describe()

In [None]:
order = ["Spontaneous", "Known Alarm", "Unknown Alarm"]

fig, ax = plt.subplots()
df_nights = pd.DataFrame(cort_samples.groupby(["condition", "weekend"]).size(), columns=["nights"])
df_nights = df_nights.groupby('condition').apply(lambda x: 100 * (x / x.sum())).T.stack().T
df_nights.columns = df_nights.columns.droplevel(0)

df_nights = df_nights.reindex(order)

ax = df_nights.plot(kind='bar', stacked=True, ax=ax, rot=0)
ax.legend().set_title(None)
ax.set_ylabel("Recorded CARs [%]")
fig.tight_layout()

if save_fig:
    fig.savefig(plot_path.joinpath("img_barchart_weekdays.pdf"), transparent=True)

#### Raw Cortisol

##### Plots

In [None]:
x = 'sample'
y = 'cortisol'
hue = 'weekend'
style = 'weekend'

dv = y
group = hue

fig, ax = plt.subplots(figsize=(10,5))
bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, style=style, ax=ax, xlabel=xlabel_lp, xticklabels=xticklabels_lp, ylabel=ylabel_lp)

fig.tight_layout()
if save_fig:
    fig.savefig(plot_path.joinpath("img_car_weekend.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_raw(cort_samples, variable="weekend")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_raw_weekend.xlsx"))
pipeline.display_results(posthoc=False)

#### Features

##### Plots

In [None]:
x = 'biomarker'
y = 'cortisol'
hue = 'weekend'

cats = ["auc", "slope", "max_inc"]

notch = False

fig, axs = plt.subplots(figsize=(12,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()    
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, ax=ax, notch=notch)
    h, l = ax.get_legend_handles_labels()
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.85, 1.0))
if save_fig:
    fig.savefig(plot_path.joinpath("img_boxplots_car_features_weekend.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_features(cort_features, variable="weekend")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_features_weekend.xlsx"))
pipeline.display_results(posthoc=False)

### Chronotype

#### Class Distribution

In [None]:
cort_samples.unstack().mean(axis=1).groupby("chronotype").describe()

#### Raw Cortisol

##### Plots

In [None]:
x = 'sample'
y = 'cortisol'

hue = 'chronotype'
style = 'chronotype'

order = ['Morning', 'Intermediate', 'Evening']

fig, ax = plt.subplots()
bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, order=order, style=style, xlabel=xlabel_lp, xticklabels=xticklabels_lp, ylabel=ylabel_lp, ax=ax)
fig.tight_layout()
if save_fig:
    fig.savefig(plot_path.joinpath("img_car_chronotype.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_raw(cort_samples, variable="chronotype")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_raw_chronotype.xlsx"))
pipeline.display_results(posthoc=False)

#### Features

##### Plots

In [None]:
x = 'biomarker'
y = 'cortisol'
hue = 'chronotype'

cats = ["auc", "slope", "max_inc"]
order = ['Morning', 'Intermediate', 'Evening']

notch = False

fig, axs = plt.subplots(figsize=(12,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()    
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, hue_order=order, ax=ax, notch=notch)
    h, l = ax.get_legend_handles_labels()
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.85, 1.0))

if save_fig:
    fig.savefig(plot_path.joinpath("img_boxplot_car_features_chronotype.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_features(cort_features, variable="chronotype")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_features_chronotype.xlsx"))
pipeline.display_results(posthoc=False)

### Within Ideal Bedtime

#### Class Distribution

In [None]:
cort_samples.unstack().mean(axis=1).groupby("within_ideal_bed_time").describe()

#### Raw Cortisol

##### Plots

In [None]:
x = 'sample'
y = 'cortisol'
hue = 'within_ideal_bed_time'
style = 'within_ideal_bed_time'

fig, ax = plt.subplots()

bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, style=style, xlabel=xlabel_lp, xticklabels=xticklabels_lp, ylabel=ylabel_lp, ax=ax)

fig.tight_layout()
if save_fig:
    fig.savefig(plot_path.joinpath("img_car_within_bedtime.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_raw(cort_samples, variable="within_ideal_bed_time")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_raw_ideal_bedtime.xlsx"))
pipeline.display_results(posthoc=False)

#### Features

##### Plots

In [None]:
x = 'biomarker'
y = 'cortisol'
hue = 'within_ideal_bed_time'

cats = ["auc", "slope", "max_inc"]

notch = False

fig, axs = plt.subplots(figsize=(12,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()    
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, ax=ax, notch=notch)
    h, l = ax.get_legend_handles_labels()
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.85, 1.0))
if save_fig:
    fig.savefig(plot_path.joinpath("img_boxplot_car_wakeup_source.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_features(cort_features, variable="within_ideal_bed_time")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_features_ideal_bedtime.xlsx"))
pipeline.display_results(posthoc=False)

### Wakeup Source

#### Raw Cortisol

##### Plots

In [None]:
x = 'sample'
y = 'cortisol'

hue = 'wakeup_source'
style = 'wakeup_source'

order = ['Spontaneous', 'Alarm', 'Not Specified']

fig, ax = plt.subplots()
bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, order=order, style=style, ax=ax)
fig.tight_layout()
if save_fig:
    fig.savefig(plot_path.joinpath("img_car_wakeup_source.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_raw(cort_samples, variable="wakeup_source")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_raw_wakeup_source.xlsx"))
pipeline.display_results(posthoc=False)

#### Features

##### Plots

In [None]:
x = 'biomarker'
y = 'cortisol'
hue = 'wakeup_source'

notch = False

cats = ["auc", "slope", "max_inc"]

fig, axs = plt.subplots(figsize=(12,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()    
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, hue_order=order, ax=ax, notch=notch)
    h, l = ax.get_legend_handles_labels()
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.85, 1.0))
if save_fig:
    fig.savefig(plot_path.joinpath("img_boxplot_car_features_wakeup_source.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_features(cort_features, variable="wakeup_source")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_features_wakeup_source.xlsx"))
pipeline.display_results(posthoc=False)

### Condition

#### Class Distribution

In [None]:
cort_samples.unstack().mean(axis=1).groupby("condition").describe()

#### Raw Cortisol

##### Plots

In [None]:
x = 'sample'
y = 'cortisol'
hue = 'condition'
style = 'condition'

order = ['Spontaneous', 'Known Alarm', 'Unknown Alarm']

fig, ax = plt.subplots()
bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, order=order, style=style, xlabel=xlabel_lp, xticklabels=xticklabels_lp, ylabel=ylabel_lp, ax=ax)

fig.tight_layout()
if save_fig:
    fig.savefig(plot_path.joinpath("img_car_condition.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_raw(cort_samples, variable="condition")
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_raw_condition.xlsx"))
pipeline.display_results(posthoc=False)

#### Features

##### Plots

In [None]:
x = 'biomarker'
y = 'cortisol'
hue = 'condition'

notch = False

cats = ["auc_i", "slope", "max_inc"]

boxplot_pairs = {
    'auc_i': [(('auc_i', 'Spontaneous'),('auc_i', 'Unknown Alarm'))],
    'slope': [(('slope03', 'Known Alarm'), ('slope03', 'Unknown Alarm')), (('slope03', 'Spontaneous'), ('slope03', 'Unknown Alarm')), (('slope04', 'Known Alarm'), ('slope04', 'Unknown Alarm')), (('slope04', 'Spontaneous'), ('slope04', 'Unknown Alarm'))],
    'max_inc': [(('max_inc', 'Known Alarm'), ('max_inc', 'Unknown Alarm')), (('max_inc', 'Spontaneous'), ('max_inc', 'Unknown Alarm'))],
}

xticklabels = {
    'auc_i': ["$AUC_I$"],
    'slope': ["$a_{S0S3}$", "$a_{S0S4}$"],
    'max_inc': ["$inc_{max}$"],
}

ylabels = {
    'auc_i': r"Cortisol AUC $\left[\frac{nmol \cdot min}{l} \right]$",
    'slope': r"Cortisol Slope $\left[\frac{nmol}{l \cdot min} \right]$",
    'max_inc': "Cortisol [nmol/l]",
}

fig, axs = plt.subplots(figsize=(15,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, hue_order=order, ax=ax, notch=notch)
    
    add_stat_annotation(ax=ax, data=data_plot.reset_index(), box_pairs=boxplot_pairs[cat], x=x, y=y, hue=hue, hue_order=order, test='t-test_ind', comparisons_correction=None, pvalue_thresholds=[[1e-3, "***"], [1e-2, "**"], [0.05, "*"]])
    
    h, l = ax.get_legend_handles_labels()
    ax.set_ylabel(ylabels[cat])
    ax.set_xlabel(None)
    ax.set_xticklabels(xticklabels[cat])
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.84, 1.0))
if save_fig:
    fig.savefig(plot_path.joinpath("img_boxplots_car_condition.pdf"), transparent=True)

##### Statistics

In [None]:
pipeline = analysis_saliva_features(cort_features, variable="condition", test_type='anova')
pipeline.export_statistics(stats_path.joinpath("analysis_cortisol_features_condition.xlsx"))
pipeline.display_results(posthoc=False)

### Pairplots

In [None]:
g = sns.pairplot(data=cort_samples['cortisol'].unstack('sample').reset_index('condition'), hue='condition', corner=True)

In [None]:
g = sns.pairplot(data=np.log(cort_samples['cortisol'].unstack('sample')).reset_index('condition'), hue='condition', corner=True)
g = g.map_lower(sns.kdeplot, levels=3, color=".2", alpha=0.5)

In [None]:
g = sns.pairplot(data=cort_features['cortisol'].unstack('biomarker').reset_index('condition'), hue='condition', corner=True)
#g = g.map_lower(sns.kdeplot, levels=3, color=".2", alpha=0.5)