# Saliva Processing – All Log Types (Naive, Selfreport, App, Sensor)

In [None]:
from pathlib import Path
import json

import numpy as np
import pandas as pd
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.auto import tqdm

import biopsykit as bp
from biopsykit.utils.dataframe_handling import multi_xs
from biopsykit.carwatch_logs import LogData
from biopsykit.carwatch_logs.log_data import get_logs_for_action
import biopsykit.carwatch_logs.log_actions as log_actions


from carwatch_analysis.io import load_sensor_awakening_times
from carwatch_analysis.datasets import CarWatchDatasetProcessed
from carwatch_analysis.exceptions import AppLogDataNotFoundException
from carwatch_analysis.stats import create_unique_night_id

from carwatch_analysis.data_processing.app_logs import (
    process_app_log_single_subject,
    restructure_sample_times_dataframe_app,
)
from carwatch_analysis.data_processing.sample_times import (
    add_naive_sample_times,
    sample_times_long_format,
    compute_sample_times_parameter,
    restructure_sample_times_dataframe,
    compute_time_diff_to_naive,
    add_delay_group_index,
)
from carwatch_analysis.data_processing.saliva import compute_saliva_features

import datetime

%load_ext autoreload
%autoreload 2
%matplotlib widget

## Setup Paths

In [None]:
deploy_type = "develop"

In [None]:
# build path to data folder
config_dict = json.load(Path("../../../config.json").open(encoding="utf-8"))
base_path = Path(config_dict[deploy_type]["base_path"])

base_path

In [None]:
export_path = Path("../../exports")
export_path.resolve()

In [None]:
dataset = CarWatchDatasetProcessed(base_path, use_cache=True)
dataset

## Load Data

### Awakening Times

#### Selfreport

In [None]:
bedtimes = dataset.endpoints_selfreport
bedtimes = bedtimes[["wake_onset_selfreport"]].dropna()
bedtimes = bedtimes.apply(pd.to_timedelta).sort_index()

bedtimes.head()

#### App

In [None]:
app_wakeup_path = base_path.joinpath("app_logs/app_data_wakeup.xlsx")

app_wakeup = pd.read_excel(app_wakeup_path)
app_wakeup = app_wakeup.set_index("subject")
app_wakeup.columns.name = "night"
app_wakeup = pd.DataFrame(pd.to_timedelta(app_wakeup.stack()), columns=["wake_onset_app"]).sort_index()

app_wakeup.head()

#### Sensor

In [None]:
# get path to general analysis export folder
export_path_general = export_path.joinpath("../../00_general/exports")
file_path = export_path_general.joinpath("imu_sleep_endpoints_cleaned.csv")

sensor_wakeup = load_sensor_awakening_times(file_path)
sensor_wakeup.head()

### Cortisol Samples

In [None]:
cortisol_samples = pd.read_csv(export_path.joinpath("cortisol_samples_cleaned.csv"))

# restructure dataframe
index_cols = ["subject", "night", "condition", "sample"]
cortisol_samples = cortisol_samples.set_index(index_cols)
cortisol_samples = cortisol_samples.rename(columns={"time_abs": "sample_time_selfreport"})
cortisol_samples = cortisol_samples.drop(columns=["time", "wake_onset_time"])

# add time information from selfreport, app, and sensor
cortisol_samples = cortisol_samples.join(bedtimes).join(app_wakeup).join(sensor_wakeup)

# convert columns to timedelta
td_cols = ["sample_time_selfreport"] + list(cortisol_samples.filter(like="wake_onset").columns)
cortisol_samples[td_cols] = cortisol_samples[td_cols].apply(pd.to_timedelta)

cortisol_samples.head()

## Data Processing

In [None]:
dict_barcode_day = {}

for subset in tqdm(dataset.groupby("subject")):
    subject_id = subset.index["subject"][0]
    try:
        df_barcode_scanned = process_app_log_single_subject(subset)
        if df_barcode_scanned is not None:
            dict_barcode_day[subject_id] = df_barcode_scanned
    except AppLogDataNotFoundException as e:
        continue

In [None]:
sample_times = pd.concat(dict_barcode_day, names=["subject"])
sample_times = restructure_sample_times_dataframe_app(sample_times)

# add sampling times from app to dataframe
cortisol_samples_time = cortisol_samples.join(sample_times).sort_index()

cortisol_samples_time = add_naive_sample_times(cortisol_samples_time)
cortisol_samples_time = sample_times_long_format(cortisol_samples_time)
cortisol_samples_time = compute_sample_times_parameter(cortisol_samples_time)
cortisol_samples_time = restructure_sample_times_dataframe(cortisol_samples_time)
cortisol_samples_time = compute_time_diff_to_naive(cortisol_samples_time)

cortisol_samples_time = cortisol_samples_time.rename(lambda s: s.replace("_", " + "), level="log_type")

cortisol_samples_time.head()

## Data Cleaning

### Sensor Awakening Times

In [None]:
# consider only delay to S0
cort_tmp = cortisol_samples_time.xs("S0", level="sample")
# consider only Sensor wake onsets
cort_tmp = multi_xs(cort_tmp, keys=["Sensor + Selfreport", "Sensor + App"], level="log_type")
# select nights where Sensor wake onset is more than 1 min *later* or more than 15 min *earlier*
# than the first reported sampling time
imu_mask = (cort_tmp["time_diff_to_naive_min"] < -1) | (cort_tmp["time_diff_to_naive_min"] > 30)

# drop selected nights
cortisol_samples_time_cleaned = cortisol_samples_time.unstack().drop(index=imu_mask.loc[imu_mask].index).stack()
cortisol_samples_time_cleaned.head()

### Cortisol Samples

In [None]:
col = "time_diff_to_naive_min"

# remove data that have sampling time differences of 30 min or more
cort_mask = cortisol_samples_time_cleaned[col].abs() >= 30
cortisol_samples_time_cleaned = cortisol_samples_time_cleaned.loc[~cort_mask]
cortisol_samples_time_cleaned = cortisol_samples_time_cleaned.unstack("sample").dropna().stack()
cortisol_samples_time_cleaned.head()

### Further Split *Selfreport* into *Selfreport with App* and *Selfreport without App*

In [None]:
subjects_with_app = cortisol_samples_time_cleaned.xs("App", level="log_type").index.get_level_values("subject").unique()

cortisol_samples_with_app = cortisol_samples_time_cleaned.loc[subjects_with_app]
cortisol_samples_without_app = cortisol_samples_time_cleaned.drop(subjects_with_app)

cortisol_selfreport = {
    "Selfreport without App": cortisol_samples_without_app.xs("Selfreport", level="log_type"),
    "Selfreport with App": cortisol_samples_with_app.xs("Selfreport", level="log_type"),
    "Sensor + Selfreport without App": cortisol_samples_without_app.xs("Sensor + Selfreport", level="log_type"),
    "Sensor + Selfreport with App": cortisol_samples_with_app.xs("Sensor + Selfreport", level="log_type"),
}
cortisol_selfreport = pd.concat(cortisol_selfreport, names=["log_type"])
cortisol_selfreport = cortisol_selfreport.reorder_levels(cortisol_samples_time_cleaned.index.names)

cortisol_samples_time_cleaned_all = pd.concat([cortisol_samples_time_cleaned, cortisol_selfreport])
cortisol_samples_time_cleaned_all.head()

## Feature Computation

In [None]:
# Create copy of dataframe for computing cortisol features and prepare dataframe
cort_samples_compute = cortisol_samples_time_cleaned_all.copy()
cort_samples_compute = cort_samples_compute.rename(columns={"time_diff_min": "time"})

cort_samples_compute.head()

In [None]:
cortisol_features = compute_saliva_features(cort_samples_compute)
cortisol_features.head()

## Export

In [None]:
export_path = Path("../../exports")
export_path.mkdir(exist_ok=True)

In [None]:
cortisol_samples_time_cleaned_all.to_csv(export_path.joinpath("cortisol_samples_processed_all_log_types.csv"))
cortisol_features.to_csv(export_path.joinpath("cortisol_features_processed_all_log_types.csv"))