# Saliva Processing – All Reporting Types (Naive, Selfreport, App, Sensor)

In [1]:
from pathlib import Path
import json

import numpy as np
import pandas as pd
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.auto import tqdm

import biopsykit as bp
from biopsykit.utils.dataframe_handling import multi_xs
from biopsykit.carwatch_logs import LogData
from biopsykit.carwatch_logs.log_data import get_logs_for_action
import biopsykit.carwatch_logs.log_actions as log_actions


from carwatch_analysis.io import load_sensor_awakening_times, convert_cortisol_sample_ids
from carwatch_analysis.datasets import CarWatchDatasetProcessed
from carwatch_analysis.exceptions import AppLogDataNotFoundException
from carwatch_analysis.stats import create_unique_night_id

from carwatch_analysis.data_processing.app_logs import (
    process_app_log_single_subject,
    restructure_sample_times_dataframe_app,
)
from carwatch_analysis.data_processing.sample_times import (
    add_naive_sample_times,
    sample_times_long_format,
    compute_sample_times_parameter,
    restructure_sample_times_dataframe,
    compute_time_diff_to_naive,
    add_delay_group_index,
)
from carwatch_analysis.data_processing.saliva import compute_saliva_features

import datetime

%load_ext autoreload
%autoreload 2
%matplotlib widget

## Setup Paths

In [2]:
deploy_type = "develop"

In [3]:
# build path to data folder
config_dict = json.load(Path("../../../config.json").open(encoding="utf-8"))
base_path = Path(config_dict[deploy_type]["base_path"])

base_path

PosixPath('/Users/Richer/Documents/PhD/Projects/HealthPsychology/CARWatch/Data')

In [4]:
export_path = Path("../../exports")
export_path.mkdir(exist_ok=True)
export_path.resolve()

PosixPath('/Users/Richer/Documents/PhD/Projects/HealthPsychology/CARWatch/Code/carwatch_analysis/experiments/2022_car_sampling_pnec/exports')

In [5]:
dataset = CarWatchDatasetProcessed(base_path, use_cache=True)
dataset

Unnamed: 0,subject,night
0,AB19E,0
1,AB19E,1
2,AB31R,0
3,AB31R,1
4,AC12E,0
...,...,...
229,VE19A,1
230,VS09S,0
231,VS09S,1
232,WM13K,0


## Load Data

### Awakening Times

#### Selfreport

In [6]:
bedtimes = dataset.endpoints_selfreport
bedtimes = bedtimes[["wake_onset_selfreport"]].dropna()
bedtimes = bedtimes.apply(pd.to_timedelta).sort_index()

bedtimes.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,wake_onset_selfreport
subject,night,Unnamed: 2_level_1
AB19E,0,0 days 05:45:00
AB31R,0,0 days 05:40:00
AB31R,1,0 days 05:50:00
AC12E,0,0 days 07:00:00
AC12E,1,0 days 07:30:00


#### App

In [7]:
app_wakeup_path = base_path.joinpath("app_logs/app_data_wakeup.xlsx")

app_wakeup = pd.read_excel(app_wakeup_path)
app_wakeup = app_wakeup.set_index("subject")
app_wakeup.columns.name = "night"
app_wakeup = pd.DataFrame(pd.to_timedelta(app_wakeup.stack()), columns=["wake_onset_app"]).sort_index()

app_wakeup.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,wake_onset_app
subject,night,Unnamed: 2_level_1
BC05R,0,0 days 07:31:16
BC05R,1,0 days 08:46:43
BU07E,0,0 days 06:01:24
BU07E,1,0 days 09:13:07
CC09K,0,0 days 06:09:00


#### Sensor

In [8]:
# get path to general analysis export folder
export_path_general = export_path.joinpath("../../00_general/exports")
file_path = export_path_general.joinpath("imu_sleep_endpoints_cleaned.csv")

sensor_wakeup = load_sensor_awakening_times(file_path)
sensor_wakeup.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,wake_onset_sensor
subject,night,Unnamed: 2_level_1
AB19E,0,0 days 05:56:52
AB19E,1,0 days 08:57:44
AB31R,0,0 days 07:47:26
AB31R,1,0 days 08:08:21
AC12E,0,0 days 07:38:46


### Cortisol Samples

In [9]:
cortisol_samples = pd.read_csv(export_path.joinpath("cortisol_samples_cleaned.csv"))

# restructure dataframe
index_cols = ["subject", "night", "condition", "sample"]
cortisol_samples = cortisol_samples.set_index(index_cols)
cortisol_samples = cortisol_samples.rename(columns={"time_abs": "sample_time_selfreport"})
cortisol_samples = cortisol_samples.drop(columns=["time", "wake_onset_time"])

# add time information from selfreport, app, and sensor
cortisol_samples = cortisol_samples.join(bedtimes).join(app_wakeup).join(sensor_wakeup)

# get the number of sampling days with missing time information
num_participants_before = len(cortisol_samples.unstack("sample"))
num_participants_after = len(cortisol_samples.dropna(subset=["wake_onset_selfreport", "date"]).unstack("sample"))

print(f"Number of sampling days before: {num_participants_before}")
print(f"Number of sampling days after: {num_participants_after}")

# convert columns to timedelta
td_cols = ["sample_time_selfreport"] + list(cortisol_samples.filter(like="wake_onset").columns)
cortisol_samples[td_cols] = cortisol_samples[td_cols].apply(pd.to_timedelta)

cortisol_samples.head()

Number of sampling days before: 216
Number of sampling days after: 204


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,sample_time_selfreport,date,cortisol,wake_onset_selfreport,wake_onset_app,wake_onset_sensor
subject,night,condition,sample,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AB19E,0,Known Alarm,S0,0 days 06:00:00,2019-11-18,2.8,0 days 05:45:00,NaT,0 days 05:56:52
AB19E,0,Known Alarm,S1,0 days 06:15:00,2019-11-18,5.59,0 days 05:45:00,NaT,0 days 05:56:52
AB19E,0,Known Alarm,S2,0 days 06:30:00,2019-11-18,13.29,0 days 05:45:00,NaT,0 days 05:56:52
AB19E,0,Known Alarm,S3,0 days 06:45:00,2019-11-18,13.46,0 days 05:45:00,NaT,0 days 05:56:52
AB19E,0,Known Alarm,S4,0 days 07:01:00,2019-11-18,12.65,0 days 05:45:00,NaT,0 days 05:56:52


## Data Processing

In [10]:
dict_barcode_day = {}

for subset in tqdm(dataset.groupby("subject")):
    subject_id = subset.index["subject"][0]
    try:
        df_barcode_scanned = process_app_log_single_subject(subset)
        if df_barcode_scanned is not None:
            dict_barcode_day[subject_id] = df_barcode_scanned
    except AppLogDataNotFoundException as e:
        continue

  0%|          | 0/117 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

In [15]:
sample_times = pd.concat(dict_barcode_day, names=["subject"])
sample_times = restructure_sample_times_dataframe_app(sample_times)

# add sampling times from app to dataframe
cortisol_samples_time = cortisol_samples.join(sample_times).sort_index()

cortisol_samples_time = add_naive_sample_times(cortisol_samples_time)
cortisol_samples_time = sample_times_long_format(cortisol_samples_time)
cortisol_samples_time = compute_sample_times_parameter(cortisol_samples_time)
cortisol_samples_time = restructure_sample_times_dataframe(cortisol_samples_time)
cortisol_samples_time = compute_time_diff_to_naive(cortisol_samples_time)

cortisol_samples_time.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,date,wake_onset,sample_time,cortisol,time_diff_to_wake_onset,time_diff_min,time_diff_to_naive_min
subject,night,condition,reporting_type,sample,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AB19E,0,Known Alarm,Naive,S0,2019-11-18,0 days 05:45:00,0 days 05:45:00,2.8,0 days 00:00:00,0.0,0.0
AB19E,0,Known Alarm,Naive,S1,2019-11-18,0 days 05:45:00,0 days 06:00:00,5.59,0 days 00:15:00,15.0,0.0
AB19E,0,Known Alarm,Naive,S2,2019-11-18,0 days 05:45:00,0 days 06:15:00,13.29,0 days 00:30:00,30.0,0.0
AB19E,0,Known Alarm,Naive,S3,2019-11-18,0 days 05:45:00,0 days 06:30:00,13.46,0 days 00:45:00,45.0,0.0
AB19E,0,Known Alarm,Naive,S4,2019-11-18,0 days 05:45:00,0 days 06:45:00,12.65,0 days 01:00:00,60.0,0.0


## Data Cleaning

### Sensor Awakening Times

In [17]:
# consider only delay to S0
cort_tmp = cortisol_samples_time.xs("S0", level="sample")
# consider only Sensor wake onsets
cort_tmp = cort_tmp.reindex(
    ["AW: Sensor, ST: Naive", "AW: Sensor, ST: Selfreport", "AW: Sensor, ST: App"], level="reporting_type"
)
# select nights where Sensor wake onset is more than 1 min *later* or more than 15 min *earlier*
# than the first reported sampling time
imu_mask = (cort_tmp["time_diff_to_naive_min"] < -1) | (cort_tmp["time_diff_to_naive_min"] > 30)

# drop selected nights
cortisol_samples_time_cleaned = cortisol_samples_time.unstack().drop(index=imu_mask.loc[imu_mask].index).stack()
cortisol_samples_time_cleaned.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,date,wake_onset,sample_time,cortisol,time_diff_to_wake_onset,time_diff_min,time_diff_to_naive_min
subject,night,condition,reporting_type,sample,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AB19E,0,Known Alarm,Naive,S0,2019-11-18,0 days 05:45:00,0 days 05:45:00,2.8,0 days 00:00:00,0.0,0.0
AB19E,0,Known Alarm,Naive,S1,2019-11-18,0 days 05:45:00,0 days 06:00:00,5.59,0 days 00:15:00,15.0,0.0
AB19E,0,Known Alarm,Naive,S2,2019-11-18,0 days 05:45:00,0 days 06:15:00,13.29,0 days 00:30:00,30.0,0.0
AB19E,0,Known Alarm,Naive,S3,2019-11-18,0 days 05:45:00,0 days 06:30:00,13.46,0 days 00:45:00,45.0,0.0
AB19E,0,Known Alarm,Naive,S4,2019-11-18,0 days 05:45:00,0 days 06:45:00,12.65,0 days 01:00:00,60.0,0.0


### Cortisol Samples

In [18]:
col = "time_diff_to_naive_min"

# remove data that have sampling time differences of 30 min or more
cort_mask = cortisol_samples_time_cleaned[col].abs() >= 30
display(cort_mask.sum())
cortisol_samples_time_cleaned = cortisol_samples_time_cleaned.loc[~cort_mask]
cortisol_samples_time_cleaned = cortisol_samples_time_cleaned.unstack("sample").dropna().stack()
cortisol_samples_time_cleaned.head()

54

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,date,wake_onset,sample_time,cortisol,time_diff_to_wake_onset,time_diff_min,time_diff_to_naive_min
subject,night,condition,reporting_type,sample,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AB19E,0,Known Alarm,Naive,S0,2019-11-18,0 days 05:45:00,0 days 05:45:00,2.8,0 days 00:00:00,0.0,0.0
AB19E,0,Known Alarm,Naive,S1,2019-11-18,0 days 05:45:00,0 days 06:00:00,5.59,0 days 00:15:00,15.0,0.0
AB19E,0,Known Alarm,Naive,S2,2019-11-18,0 days 05:45:00,0 days 06:15:00,13.29,0 days 00:30:00,30.0,0.0
AB19E,0,Known Alarm,Naive,S3,2019-11-18,0 days 05:45:00,0 days 06:30:00,13.46,0 days 00:45:00,45.0,0.0
AB19E,0,Known Alarm,Naive,S4,2019-11-18,0 days 05:45:00,0 days 06:45:00,12.65,0 days 01:00:00,60.0,0.0


### Further Split *Selfreport* into *Selfreport with App* and *Selfreport without App*

In [19]:
subjects_with_app = (
    cortisol_samples_time_cleaned.xs("AW & ST: App", level="reporting_type").index.get_level_values("subject").unique()
)

cortisol_samples_with_app = cortisol_samples_time_cleaned.loc[subjects_with_app]
cortisol_samples_without_app = cortisol_samples_time_cleaned.drop(subjects_with_app)

cortisol_selfreport = {
    "AW & ST: Selfreport (without App)": cortisol_samples_without_app.xs("AW & ST: Selfreport", level="reporting_type"),
    "AW & ST: Selfreport (with App)": cortisol_samples_with_app.xs("AW & ST: Selfreport", level="reporting_type"),
    "AW: Sensor, ST: Selfreport (without App)": cortisol_samples_without_app.xs(
        "AW: Sensor, ST: Selfreport", level="reporting_type"
    ),
    "AW: Sensor, ST: Selfreport (with App)": cortisol_samples_with_app.xs(
        "AW: Sensor, ST: Selfreport", level="reporting_type"
    ),
}
cortisol_selfreport = pd.concat(cortisol_selfreport, names=["reporting_type"])
cortisol_selfreport = cortisol_selfreport.reorder_levels(cortisol_samples_time_cleaned.index.names)

cortisol_samples_time_cleaned_all = pd.concat([cortisol_samples_time_cleaned, cortisol_selfreport])
cortisol_samples_time_cleaned_all = convert_cortisol_sample_ids(cortisol_samples_time_cleaned_all)

cortisol_samples_time_cleaned_all.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,date,wake_onset,sample_time,cortisol,time_diff_to_wake_onset,time_diff_min,time_diff_to_naive_min
subject,night,condition,reporting_type,sample,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AB19E,0,Known Alarm,Naive,S1,2019-11-18,0 days 05:45:00,0 days 05:45:00,2.8,0 days 00:00:00,0.0,0.0
AB19E,0,Known Alarm,Naive,S2,2019-11-18,0 days 05:45:00,0 days 06:00:00,5.59,0 days 00:15:00,15.0,0.0
AB19E,0,Known Alarm,Naive,S3,2019-11-18,0 days 05:45:00,0 days 06:15:00,13.29,0 days 00:30:00,30.0,0.0
AB19E,0,Known Alarm,Naive,S4,2019-11-18,0 days 05:45:00,0 days 06:30:00,13.46,0 days 00:45:00,45.0,0.0
AB19E,0,Known Alarm,Naive,S5,2019-11-18,0 days 05:45:00,0 days 06:45:00,12.65,0 days 01:00:00,60.0,0.0


## Feature Computation

In [24]:
# Create copy of dataframe for computing cortisol features and prepare dataframe
cort_samples_compute = cortisol_samples_time_cleaned_all.copy()
cort_samples_compute = cort_samples_compute.rename(columns={"time_diff_min": "time"})

cort_samples_compute.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,date,wake_onset,sample_time,cortisol,time_diff_to_wake_onset,time,time_diff_to_naive_min
subject,night,condition,reporting_type,sample,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AB19E,0,Known Alarm,Naive,S1,2019-11-18,0 days 05:45:00,0 days 05:45:00,2.8,0 days 00:00:00,0.0,0.0
AB19E,0,Known Alarm,Naive,S2,2019-11-18,0 days 05:45:00,0 days 06:00:00,5.59,0 days 00:15:00,15.0,0.0
AB19E,0,Known Alarm,Naive,S3,2019-11-18,0 days 05:45:00,0 days 06:15:00,13.29,0 days 00:30:00,30.0,0.0
AB19E,0,Known Alarm,Naive,S4,2019-11-18,0 days 05:45:00,0 days 06:30:00,13.46,0 days 00:45:00,45.0,0.0
AB19E,0,Known Alarm,Naive,S5,2019-11-18,0 days 05:45:00,0 days 06:45:00,12.65,0 days 01:00:00,60.0,0.0


In [25]:
cortisol_features = compute_saliva_features(cort_samples_compute)
cortisol_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,cortisol
subject,night,condition,reporting_type,saliva_feature,Unnamed: 5_level_1
AB19E,0,Known Alarm,AW & ST: Selfreport,auc_g,614.03
AB19E,0,Known Alarm,AW & ST: Selfreport,auc_i,443.23
AB19E,0,Known Alarm,AW & ST: Selfreport,ini_val,2.8
AB19E,0,Known Alarm,AW & ST: Selfreport,max_inc,10.66
AB19E,0,Known Alarm,AW & ST: Selfreport,max_val,13.46


## Export

In [26]:
cortisol_samples_time_cleaned_all.to_csv(export_path.joinpath("cortisol_samples_processed_all_reporting_types.csv"))
cortisol_features.to_csv(export_path.joinpath("cortisol_features_processed_all_reporting_types.csv"))