# Sleep Analysis

In [None]:
from pathlib import Path

import pandas as pd
import numpy as np

import pingouin as pg

from carwatch_analysis.general_helper import describe_groups_df
import biopsykit as bp
from biopsykit.stats import StatsPipeline
from biopsykit.utils.dataframe_handling import multi_xs

from statannot import add_stat_annotation

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib widget
#%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
sns.set(style='ticks', context='notebook')
#plt.rcParams['figure.figsize'] = (15,5)
plt.rcParams['figure.figsize'] = (10,5)
plt.close('all')

save_fig = False

palette = bp.colors.fau_palette
sns.set_palette(palette)
palette

In [None]:
pg.options['round'] = 3

In [None]:
export_path = Path("../../exports")
data_path = Path("../../../../Data/Questionnaire_Data")

plot_path = export_path.joinpath("plots")
stats_path = export_path.joinpath("statistics")

bp.utils.file_handling.mkdirs([plot_path, stats_path])

## Import

In [None]:
imu_data = pd.read_csv(export_path.joinpath("imu_features_complete.csv"))
imu_data = imu_data.set_index(list(imu_data.columns[:-1]))
imu_data.head()

Load Cortisol Data and use the as index for the IMU data to join on (*currently not used*)

In [None]:
#cort_samples = pd.read_csv(export_path.joinpath("cortisol_samples_cleaned.csv"))
# assign each night an unique_id to allow repeated measures analyses and insert into dataframe
#cort_samples.insert(2, 'night_id', cort_samples['subject'] + '_' + cort_samples['night'].astype(str))
#cort_samples = cort_samples.drop(columns=['time'])
#cort_samples = cort_samples.set_index(list(cort_samples.columns.drop('cortisol')))

In [None]:
#data_index = cort_samples.drop(columns='cortisol').unstack()
#data_index.columns = []
#data_index.reset_index()[]#index.get_level_values(["condition", "subject", "night_id"])
#imu_data = data_index.join(imu_data, how='inner')
#imu_data.head()

In [None]:
imu_data.unstack(['type', 'time_span', 'feature']).head()

## Data Preparation

### Feature Selection

In [None]:
imu_data.index.get_level_values("feature").unique()

### Outlier Removal

In [None]:
imu_features = imu_data.xs(('imu', 'last_30min'), level=['type', 'time_span'])
filter_cols = ['max', 'mean', 'std']

list_dfs = []
imu_features = imu_features.unstack()
for col in filter_cols:
    list_dfs.append(imu_features.filter(like=col))
imu_features = pd.concat(list_dfs, axis=1).stack()

outlier_mask = imu_features.groupby('feature').apply(lambda df: ((df - df.mean())/df.std()).abs() > 3)['data']

print("Number of nights before outlier removal: {}".format(len(imu_features.unstack('feature'))))

imu_features = imu_features.loc[~outlier_mask]
imu_features = imu_features.unstack('feature').dropna().stack()

print("Number of nights after outlier removal: {}".format(len(imu_features.unstack())))

## Plots and Statistics

### Condition

#### Class Distribution

In [None]:
describe_groups_df(imu_features, "condition")

#### Statistics

In [None]:
pipeline = StatsPipeline(
    steps=[
        ('prep', 'normality'),
        ('prep', 'equal_var'),
        ('test', 'welch_anova'),
        ('posthoc', 'pairwise_tukey')
    ],
    params={
        'groupby': ['feature'],
        'group': 'condition',
        'dv': 'data',
        'between': "condition",
        'padjust': 'fdr_bh'
    }
)

pipeline.apply(imu_features)

pipeline.export_statistics(stats_path.joinpath("analysis_imu_features_condition.xlsx"))
pipeline.display_results(prep=True, sig_only={'test': True, 'posthoc': True})

In [None]:
sig_pairs, pvals = pipeline.sig_brackets(pipeline.results_cat('posthoc'), stats_type='between', plot_type='multi')

In [None]:
pvals

#### Plots

### Pairplots

In [None]:
df_pairplot = imu_features['data'].unstack('feature').reset_index(level="condition")

#g = sns.pairplot(data=df_pairplot, hue='condition', corner=True)
#g = g.map_lower(sns.kdeplot, levels=3, color=".2", alpha=0.5)