# Sleep Information Merge

This Notebook merges sleep information from questionnaires and from IMU data.

In [None]:
import json
from pathlib import Path

import pandas as pd
import numpy as np

import biopsykit as bp
from biopsykit.utils.dataframe_handling import replace_missing_data
from biopsykit.utils.time import get_time_from_date

import pingouin as pg

from tqdm.auto import tqdm
from fau_colors import cmaps

import matplotlib.pyplot as plt
import seaborn as sns

from carwatch_analysis.datasets import CarWatchDatasetRaw
from carwatch_analysis.exceptions import ImuDataNotFoundException

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams["figure.figsize"] = (8, 4)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

pg.options["round"] = 4

palette

## Setup Paths

In [None]:
deploy_type = "develop"

In [None]:
# build path to data folder
config_dict = json.load(Path("../../../config.json").open(encoding="utf-8"))
base_path = Path("..").joinpath(config_dict[deploy_type]["base_path"])
base_path

In [None]:
dataset = CarWatchDatasetRaw(base_path)
dataset

## Load Data

### Self-report Information
* Sleep and wake onsets from self-reports
* Chronotype and ideal bed interval based on the chronotype

In [None]:
bedtime_selfreport = dataset.chronotype_bedtime
bedtime_selfreport = bedtime_selfreport.join(dataset.condition_map).set_index("condition", append=True)
bedtime_selfreport.head()

### IMU-based Information
* Sleep and wake onsets from IMU data
* Further sleep endpoints

In [None]:
dict_endpoints = {}
for subset in tqdm(dataset.groupby(["subject", "night"])):
    subject_id = subset.index["subject"][0]
    night = subset.index["night"][0]
    try:
        dict_endpoints[(subject_id, night)] = subset.imu_sleep_endpoints
    except ImuDataNotFoundException:
        pass

In [None]:
bedtime_imu = pd.concat(dict_endpoints, names=["subject", "night"]).reset_index("date")
bedtime_imu.head()

### Concat Self-Report and IMU Data

In [None]:
data = bedtime_selfreport.join(bedtime_imu)
data = data.assign(**{"bed": data["bed_interval_start"]})

data.head()

### Convert datetime information from `datetime.datetime` into `datetime.time`

In [None]:
data.columns

In [None]:
for column in ["sleep_onset", "bed", "wake_onset"]:
    data = data.assign(**{f"{column}_time": get_time_from_date(data[column], is_utc=True, tz_convert=True)})

for column in ["sleep_onset_selfreport", "wake_onset_selfreport", "bed_selfreport", "ideal_bed_start", "ideal_bed_end"]:
    data = data.assign(**{f"{column}_time": get_time_from_date(data[column])})

In [None]:
data[["sleep_onset_time", "sleep_onset_selfreport_time"]].head()

In [None]:
data[["wake_onset_time", "wake_onset_selfreport_time"]].head()

### Replace missing time information with self-report

In [None]:
data = replace_missing_data(data, "sleep_onset_time", "sleep_onset_selfreport_time", dropna=False)
# wake onset is inverted (use self-report normally and fill with sensor data where self-report is missing) because wake onset was not computed reliably
data = replace_missing_data(data, "wake_onset_selfreport_time", "wake_onset_time", dropna=False)
data["wake_onset_time"] = data["wake_onset_selfreport_time"]
data = replace_missing_data(data, "bed_time", "bed_selfreport_time", dropna=False)

### Compute whether participants went to bed within their ideal bed time

In [None]:
for column in ["ideal_bed_start_time", "ideal_bed_end_time", "bed_time"]:
    after_midnight_mask = data[column] < pd.Timedelta("18:00:00")
    # add 1 day to all times after midnight (i.e., before 6pm)
    data = data.assign(**{f"{column}": data[column].where(~after_midnight_mask, data[column] + pd.Timedelta("1d"))})

In [None]:
ideal_bedtime = (data["bed_time"] >= data["ideal_bed_start_time"]) & (data["bed_time"] <= data["ideal_bed_end_time"])
data["within_ideal_bed_time"] = ideal_bedtime.astype(int)
data["within_ideal_bed_time"].value_counts()

In [None]:
data.head()

In [None]:
data.columns

In [None]:
data = data.drop(
    columns=[
        "ideal_bed_end",
        "ideal_bed_start",
        "bed_interval_start",
        "bed_interval_end",
        "number_wake_bouts",
        "sleep_bouts",
        "wake_bouts",
        "bed",
        "sleep_onset",
        "wake_onset",
        "sleep_onset_selfreport",
        "sleep_onset_selfreport_time",
        "wake_onset_selfreport",
        "wake_onset_selfreport_time",
        "bed_selfreport",
        "bed_selfreport_time",
        "ideal_bed_start_time",
        "ideal_bed_end_time",
    ],
    errors="ignore",
)

In [None]:
data

## Export

In [None]:
export_path = base_path.joinpath("questionnaire/processed")

data.to_csv(export_path.joinpath("sleep_information_merged.csv"))