# IMU Data Cleaning

In [None]:
from pathlib import Path
import json

import pandas as pd
import numpy as np
import pingouin as pg

import biopsykit as bp
from biopsykit.utils.time import time_to_timedelta
from biopsykit.utils.dataframe_handling import multi_xs, int_from_str_idx, camel_to_snake

from fau_colors import cmaps

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.auto import tqdm

from carwatch_analysis.datasets import CarWatchDatasetProcessed
from carwatch_analysis.data_cleaning.imu import clean_statistical_outlier
from carwatch_analysis.exceptions import ImuDataNotFoundException

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams["figure.figsize"] = (8, 4)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

pg.options["round"] = 4

palette

## Setup Paths

In [None]:
deploy_type = "local"

In [None]:
# build path to data folder
config_dict = json.load(Path("../../../config.json").open(encoding="utf-8"))
base_path = Path("..").joinpath(config_dict[deploy_type]["base_path"])
base_path

In [None]:
dataset = CarWatchDatasetProcessed(base_path)
dataset

## Load Data

### Questionnaire

In [None]:
sleep_info = dataset.sleep_information_merged
sleep_info.head()

### IMU

#### Load Static Moment Features and concatenate into one DataFrame

In [None]:
dict_features = {}
for subset in tqdm(dataset.groupby(["subject", "night"])):
    subject_id = subset.index["subject"][0]
    night = subset.index["night"][0]
    try:
        dict_features[(subject_id, night)] = subset.imu_static_moment_features
    except ImuDataNotFoundException:
        pass

static_features = pd.concat(dict_features, names=["subject", "night"])

In [None]:
static_features.head()

#### Load Sleep Endpoints and concatenate into one DataFrame

In [None]:
dict_endpoints = {}
for subset in tqdm(dataset.groupby(["subject", "night"])):
    subject_id = subset.index["subject"][0]
    night = subset.index["night"][0]
    try:
        dict_endpoints[(subject_id, night)] = subset.imu_sleep_endpoints
    except ImuDataNotFoundException:
        pass

imu_endpoints = pd.concat(dict_endpoints, names=["subject", "night"])

In [None]:
imu_endpoints.head()

## Data Cleaning

### Drop Nights with Incomplete IMU Data

In [None]:
sleep_duration_mask = imu_endpoints.reset_index("date")["total_sleep_duration"] < 4 * 60

imu_endpoints = imu_endpoints.loc[~sleep_duration_mask]
static_features = static_features.loc[~sleep_duration_mask]

## Adding Categorial Variables

### Wakeup Sources

In [None]:
wakeup_source = dataset.questionnaire["wakeup_source"].fillna(0).astype(int)

### Weekend

In [None]:
# NOTE: 4 = Friday, 5 = Saturday; this is chosen because "date" corresponds to the day when the night *started*,
# i.e, the nights from Friday to Saturday and from Saturday to Sunday are considered weekend
weekend = dataset.date["date"].dt.weekday.isin([4, 5]).astype(int)
weekend.name = "weekend"

### Wakeup Hour

In [None]:
wakeup_hour = np.floor(dataset.sleep_information_merged["wake_onset_time"] / pd.Timedelta(hours=1))
wakeup_hour.name = "wakeup_hour"

### Chronotype

In [None]:
meq = sleep_info["MEQ"]
chronotype = sleep_info["chronotype_coarse"]
chronotype.name = "chronotype"
within_ideal_bed_time = sleep_info["within_ideal_bed_time"]

In [None]:
for index in [wakeup_source, wakeup_hour, weekend, meq, chronotype, within_ideal_bed_time]:
    if index.name not in static_features.reset_index().columns:
        static_features = static_features.join(index)

## Set Index Levels

In [None]:
index_cols = list(static_features.reset_index().columns.drop(["data", "imu_feature"]))

In [None]:
static_features = static_features.reset_index().set_index(index_cols + ["imu_feature"])
static_features.head()

## Export

In [None]:
export_path = Path("../../exports")
export_path.mkdir(exist_ok=True)

In [None]:
static_features.to_csv(export_path.joinpath("imu_static_moment_features_cleaned.csv"))
imu_endpoints.to_csv(export_path.joinpath("imu_sleep_endpoints_cleaned.csv"))