# Saliva Analysis

In [None]:
from pathlib import Path

import pandas as pd
import numpy as np

import biopsykit as bp

import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib widget
#%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
sns.set(style='ticks', context='talk')
#plt.rcParams['figure.figsize'] = (15,5)
plt.rcParams['figure.figsize'] = (10,5)
plt.close('all')

palette = bp.colors.fau_palette
sns.set_palette(palette)
palette

In [None]:
pg.options['round'] = 3

In [None]:
export_path = Path("../../exports")
plot_path = export_path.joinpath("plots")

## Import

In [None]:
cort_samples = pd.read_csv(export_path.joinpath("cortisol_samples_cleaned.csv"))
# assign each night an unique_id to allow repeated measures analyses and insert into dataframe
cort_samples.insert(2,'night_id', cort_samples['subject'] + '_' + cort_samples['night'].astype(str))
cort_samples = cort_samples.drop(columns='time')
cort_samples = cort_samples.set_index(list(cort_samples.columns.drop('cortisol')))

cort_samples.head()

In [None]:
cort_features = pd.read_csv(export_path.joinpath("cortisol_features_cleaned.csv"))
cort_features['biomarker'] = cort_features['biomarker'].str.replace("cortisol_", "")
cort_features = cort_features.set_index(list(cort_features.columns[:-1]))
cort_features.head()

## Plots and Statistics

In [None]:
xlabel = "Time after Awakening [min]"
xticklabels = [i*15 for i in range(5)]
ylabel = "Cortisol [nmol/l]"
#ylim = [5, 16]

In [None]:
order = ['Spontaneous', 'Known Alarm', 'Unknown Alarm']

### Weekend vs. Weekday

Condition per Weekday Type

In [None]:
cort_samples.unstack().mean(axis=1).groupby('weekend').describe()

In [None]:
fig, ax = plt.subplots()
df_nights = pd.DataFrame(cort_samples.groupby(["condition", "weekend"]).size(), columns=["nights"])
df_nights = df_nights.groupby('condition').apply(lambda x: 100 * (x / x.sum())).T.stack().T
df_nights.columns = df_nights.columns.droplevel(0)

df_nights = df_nights.reindex(order)

ax = df_nights.plot(kind='bar', stacked=True, ax=ax, rot=0)
ax.legend().set_title(None)
ax.set_ylabel("Recorded CARs [%]")
fig.tight_layout()

fig.savefig(plot_path.joinpath("img_bar_chart_weekdays.pdf"), transparent=True)

#### Raw Cortisol

In [None]:
hue = 'weekend'
style = 'weekend'
x = 'sample'
y = 'cortisol'

dv = y
group = hue

In [None]:
df_stats = np.log(cort_samples)#np.log(cort_samples)

df_norm = df_stats.reset_index().groupby(x).apply(lambda df: pg.normality(data=df, dv=dv, group=group))
df_var = df_stats.reset_index().groupby(x).apply(lambda df: pg.homoscedasticity(data=df, dv=dv, group=group))

df_anova = pg.mixed_anova(data=df_stats.reset_index(), dv=y, between=hue, within=x, subject='night_id')

display(df_norm)
display(df_var)
display(df_anova)

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, style=style, ax=ax, xlabel=xlabel, xticklabels=xticklabels, ylabel=ylabel)

fig.tight_layout()
fig.savefig(plot_path.joinpath("img_car_weekend.pdf"), transparent=True)

#### Features

In [None]:
notch = True

AUC Features

In [None]:
hue = 'weekend'
x = 'biomarker'
y = 'cortisol'

dv = y
group = hue

In [None]:
df_stats = cort_features

df_norm = df_stats.reset_index().groupby(x).apply(lambda df: pg.normality(data=df, dv=dv, group=group))
df_var = df_stats.reset_index().groupby(x).apply(lambda df: pg.homoscedasticity(data=df, dv=dv, group=group))

df_anova = df_stats.reset_index().groupby(x).apply(lambda df: pg.welch_anova(data=df, dv=y, between=hue))

display(df_norm)
display(df_var)
display(df_anova)

In [None]:
cats = ["auc", "slope", "max_inc"]

fig, axs = plt.subplots(figsize=(12,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()    
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, ax=ax, notch=notch)
    h, l = ax.get_legend_handles_labels()
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.85, 1.0))

### Chronotype

In [None]:
order = ['Morning', 'Intermediate', 'Evening']

#### Raw Cortisol

In [None]:
hue = 'chronotype'
style = 'chronotype'
x = 'sample'
y = 'cortisol'

dv = y
group = hue

In [None]:
cort_samples.unstack().mean(axis=1).groupby(group).describe()

In [None]:
fig, ax = plt.subplots()
bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, order=order, style=style, ax=ax, ylabel=ylabel, xlabel=xlabel, xticklabels=xticklabels)
fig.tight_layout()
fig.savefig(plot_path.joinpath("img_car_chronotype.pdf"), transparent=True)

In [None]:
df_stats = cort_samples

df_norm = df_stats.reset_index().groupby(x).apply(lambda df: pg.normality(data=df, dv=dv, group=group))
df_var = df_stats.reset_index().groupby(x).apply(lambda df: pg.homoscedasticity(data=df, dv=dv, group=group))

df_anova = pg.mixed_anova(data=df_stats.reset_index(), dv=y, between=hue, within=x, subject='night_id')
df_ttests = pg.pairwise_ttests(data=df_stats.reset_index(), dv=y, between=hue, within=x, subject='night_id', padjust='fdr_bh')

display(df_norm.T)
display(df_var.T)
display(df_anova)
#if df_anova.loc[2]['p-unc'] < 0.05:
#    display(df_ttests)

#### Features

In [None]:
notch = True

In [None]:
order = ['Morning', 'Intermediate', 'Evening']

In [None]:
hue = 'chronotype'
x = 'biomarker'
y = 'cortisol'

dv = y
group = hue

In [None]:
df_stats = cort_features

df_norm = df_stats.reset_index().groupby(x).apply(lambda df: pg.normality(data=df, dv=dv, group=group))
df_var = df_stats.reset_index().groupby(x).apply(lambda df: pg.homoscedasticity(data=df, dv=dv, group=group))

df_anova = df_stats.reset_index().groupby(x).apply(lambda df: pg.welch_anova(data=df, dv=y, between=hue))
df_ph = df_stats.reset_index().groupby(x).apply(lambda df: pg.pairwise_tukey(data=df, dv=y, between=hue))

display(df_norm.T)
display(df_var.T)
display(df_anova)
#display(df_ph)

In [None]:
cats = ["auc", "slope", "max_inc"]

fig, axs = plt.subplots(figsize=(12,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()    
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, hue_order=order, ax=ax, notch=notch)
    h, l = ax.get_legend_handles_labels()
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.85, 1.0))

### Within Ideal Bedtime

#### Raw Cortisol

In [None]:
hue = 'within_ideal_bed_time'
style = 'within_ideal_bed_time'
x = 'sample'
y = 'cortisol'

dv = y
group = hue

In [None]:
cort_samples.unstack().mean(axis=1).groupby(group).describe()

In [None]:
fig, ax = plt.subplots()

bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, style=style, xlabel=xlabel, xticklabels=xticklabels, ylabel=ylabel, ax=ax)

fig.tight_layout()
fig.savefig(export_path.joinpath("plots/img_car_within_bedtime.pdf"), transparent=True)

In [None]:
df_stats = cort_samples

df_norm = df_stats.reset_index().groupby(x).apply(lambda df: pg.normality(data=df, dv=dv, group=group))
df_var = df_stats.reset_index().groupby(x).apply(lambda df: pg.homoscedasticity(data=df, dv=dv, group=group))

df_anova = pg.mixed_anova(data=df_stats.reset_index(), dv=y, between=hue, within=x, subject='night_id')
df_ttests = pg.pairwise_ttests(data=df_stats.reset_index(), dv=y, between=hue, within=x, subject='night_id', padjust='fdr_bh')

display(df_norm.T)
display(df_var.T)
display(df_anova)
#if df_anova.loc[2]['p-unc'] < 0.05:
#    display(df_ttests)

#### Features

In [None]:
notch = True

AUC Features

In [None]:
hue = 'within_ideal_bed_time'
x = 'biomarker'
y = 'cortisol'

dv = y
group = hue

In [None]:
df_stats = cort_features

df_norm = df_stats.reset_index().groupby(x).apply(lambda df: pg.normality(data=df, dv=dv, group=group))
df_var = df_stats.reset_index().groupby(x).apply(lambda df: pg.homoscedasticity(data=df, dv=dv, group=group))

df_anova = df_stats.reset_index().groupby(x).apply(lambda df: pg.welch_anova(data=df, dv=y, between=hue))
df_ph = df_stats.reset_index().groupby(x).apply(lambda df: pg.pairwise_tukey(data=df, dv=y, between=hue))

display(df_norm.T)
display(df_var.T)
display(df_anova)
#display(df_ph)

In [None]:
cats = ["auc", "slope", "max_inc"]

fig, axs = plt.subplots(figsize=(12,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()    
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, ax=ax, notch=notch)
    h, l = ax.get_legend_handles_labels()
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.85, 1.0))

### Wakeup Source

In [None]:
order = ['Spontaneous', 'Alarm', 'Not Specified']

#### Raw Cortisol

In [None]:
hue = 'wakeup_source'
style = 'wakeup_source'
x = 'sample'
y = 'cortisol'

dv = y
group = hue

In [None]:
fig, ax = plt.subplots()
bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, order=order, style=style, ax=ax)
fig.tight_layout()

In [None]:
df_stats = cort_samples

df_norm = df_stats.reset_index().groupby(x).apply(lambda df: pg.normality(data=df, dv=dv, group=group))
df_var = df_stats.reset_index().groupby(x).apply(lambda df: pg.homoscedasticity(data=df, dv=dv, group=group))

df_anova = pg.mixed_anova(data=df_stats.reset_index(), dv=y, between=hue, within=x, subject='night_id')
df_ttests = pg.pairwise_ttests(data=df_stats.reset_index(), dv=y, between=hue, within=x, subject='night_id', padjust='fdr_bh')

display(df_norm.T)
display(df_var.T)
display(df_anova)
#if df_anova.loc[2]['p-unc'] < 0.05:
#    display(df_ttests)

#### Features

In [None]:
notch = True

AUC Features

In [None]:
hue = 'wakeup_source'
x = 'biomarker'
y = 'cortisol'

dv = y
group = hue

In [None]:
df_stats = cort_features

df_norm = df_stats.reset_index().groupby(x).apply(lambda df: pg.normality(data=df, dv=dv, group=group))
df_var = df_stats.reset_index().groupby(x).apply(lambda df: pg.homoscedasticity(data=df, dv=dv, group=group))

df_anova = df_stats.reset_index().groupby(x).apply(lambda df: pg.anova(data=df, dv=y, between=hue))
df_ph = df_stats.reset_index().groupby(x).apply(lambda df: pg.pairwise_ttests(data=df, dv=y, between=hue, padjust='fdr_bh'))

display(df_norm)
display(df_var)
display(df_anova)
display(df_ph)

In [None]:
cats = ["auc", "slope", "max_inc"]

fig, axs = plt.subplots(figsize=(12,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()    
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, hue_order=order, ax=ax, notch=notch)
    h, l = ax.get_legend_handles_labels()
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.85, 1.0))

### Condition

In [None]:
order = ['Spontaneous', 'Known Alarm', 'Unknown Alarm']

#### Raw Cortisol

In [None]:
hue = 'condition'
style = 'condition'
x = 'sample'
y = 'cortisol'

dv = y
group = hue

In [None]:
cort_samples.unstack().mean(axis=1).groupby(group).describe()

In [None]:
df_stats = np.log(cort_samples)

df_norm = df_stats.reset_index().groupby(x).apply(lambda df: pg.normality(data=df, dv=dv, group=group))
df_var = df_stats.reset_index().groupby(x).apply(lambda df: pg.homoscedasticity(data=df, dv=dv, group=group))

df_anova = pg.mixed_anova(data=df_stats.reset_index(), dv=y, between=hue, within=x, subject='night_id')
df_ttests = pg.pairwise_ttests(data=df_stats.reset_index(), dv=y, between=hue, within=x, subject='night_id', padjust='fdr_bh')

display(df_norm.T)
display(df_var.T)
display(df_anova)
if df_anova.loc[2]['p-unc'] < 0.05:
    display(df_ttests)

In [None]:
fig, ax = plt.subplots()
bp.plotting.lineplot(cort_samples, x=x, y=y, hue=hue, order=order, style=style, xlabel=xlabel, xticklabels=xticklabels, ylabel=ylabel, ax=ax)

fig.tight_layout()
fig.savefig(plot_path.joinpath("img_car_condition.pdf"), transparent=True)

#### Features

In [None]:
notch = False

AUC Features

In [None]:
hue = 'condition'
x = 'biomarker'
y = 'cortisol'

dv = y
group = hue

In [None]:
df_stats = cort_features

df_norm = df_stats.reset_index().groupby(x).apply(lambda df: pg.normality(data=df, dv=dv, group=group))
df_var = df_stats.reset_index().groupby(x).apply(lambda df: pg.homoscedasticity(data=df, dv=dv, group=group))

df_anova = df_stats.reset_index().groupby(x).apply(lambda df: pg.anova(data=df, dv=y, between=hue))
df_ph = df_stats.reset_index().groupby(x).apply(lambda df: pg.pairwise_tukey(data=df, dv=y, between=hue))

display(df_norm.T)
display(df_var.T)
display(df_anova)
display(df_ph)

In [None]:
from statannot import add_stat_annotation

In [None]:
cats = ["auc_i", "slope", "max_inc"]

boxplot_pairs = {
    'auc_i': [(('auc_i', 'Spontaneous'),('auc_i', 'Unknown Alarm'))],
    'slope': [(('slope03', 'Known Alarm'), ('slope03', 'Unknown Alarm')), (('slope03', 'Spontaneous'), ('slope03', 'Unknown Alarm')), (('slope04', 'Known Alarm'), ('slope04', 'Unknown Alarm')), (('slope04', 'Spontaneous'), ('slope04', 'Unknown Alarm'))],
    'max_inc': [(('max_inc', 'Known Alarm'), ('max_inc', 'Unknown Alarm')), (('max_inc', 'Spontaneous'), ('max_inc', 'Unknown Alarm'))],
}

xticklabels = {
    'auc_i': ["$AUC_I$"],
    'slope': ["$a_{S0S3}$", "$a_{S0S4}$"],
    'max_inc': ["$inc_{max}$"],
}

ylabels = {
    'auc_i': r"Cortisol AUC $\left[\frac{nmol \cdot min}{l} \right]$",
    'slope': r"Cortisol Slope $\left[\frac{nmol}{l \cdot min} \right]$",
    'max_inc': "Cortisol [nmol/l]",
}

fig, axs = plt.subplots(figsize=(15,5), ncols=len(cats))
for ax, cat in zip(axs, cats):
    data_plot = cort_features.unstack().filter(like=cat).stack()
    sns.boxplot(data=data_plot.reset_index(), x=x, y=y, hue=hue, hue_order=order, ax=ax, notch=notch)
    h, l = ax.get_legend_handles_labels()
    add_stat_annotation(ax=ax, data=data_plot.reset_index(), box_pairs=boxplot_pairs[cat], x=x, y=y, hue=hue, hue_order=order, test='t-test_ind', comparisons_correction=None, pvalue_thresholds=[[1e-3, "***"], [1e-2, "**"], [0.05, "*"]])
    ax.set_ylabel(ylabels[cat])
    ax.set_xlabel(None)
    ax.set_xticklabels(xticklabels[cat])
    ax.legend().remove()

fig.legend(h,l, loc='upper right', bbox_to_anchor=(1.0, 1.0))
fig.tight_layout(rect=(0, 0, 0.84, 1.0))
fig.savefig(plot_path.joinpath("img_boxplots_car.pdf"), transparent=True)

### Pairplots

In [None]:
g = sns.pairplot(data=cort_samples['cortisol'].unstack('sample').reset_index('condition'), hue='condition', corner=True)

In [None]:
g = sns.pairplot(data=np.log(cort_samples['cortisol'].unstack('sample')).reset_index('condition'), hue='condition', corner=True)
g = g.map_lower(sns.kdeplot, levels=3, color=".2", alpha=0.5)

In [None]:
g = sns.pairplot(data=cort_features['cortisol'].unstack('biomarker').reset_index('condition'), hue='condition', corner=True)
#g = g.map_lower(sns.kdeplot, levels=3, color=".2", alpha=0.5)