# IMU Feature Extraction Pipeline

## Setup

In [None]:
from pathlib import Path
import json
import re

import pandas as pd
import numpy as np
import pingouin as pg

import biopsykit as bp
from biopsykit.utils.time import timedelta_to_time
from biopsykit.signals.imu.feature_extraction.static_moments import compute_features

from fau_colors import cmaps

import matplotlib.pyplot as plt
import seaborn as sns

from joblib import Parallel
from tqdm.notebook import tqdm

from carwatch_analysis.datasets import CarWatchDatasetRaw
from carwatch_analysis.data_processing.imu import process_night
from carwatch_analysis.exceptions import ImuDataNotFoundException, NoSuitableImuDataFoundException, DateNotAvailableException

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams["figure.figsize"] = (8, 4)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

pg.options["round"] = 4

palette

## Setup Paths

In [None]:
deploy_type = "local"

In [None]:
# build path to data folder
config_dict = json.load(Path("../../../config.json").open(encoding="utf-8"))
base_path = Path("..").joinpath(config_dict[deploy_type]["base_path"])

base_path

In [None]:
dataset = CarWatchDatasetRaw(base_path)
dataset

## IMU Data Processing

### Processing parameters

In [None]:
compute_endpoints = False
compute_features = False

thres = 100
window_sec = 5
overlap_percent = 0.9

In [None]:
for subset in tqdm(dataset.groupby(["subject", "night"])):
    process_night(
        subset, 
        compute_endpoints=compute_endpoints, 
        compute_features=compute_features,
        thres=thres,
        window_sec=window_sec,
        overlap_percent=overlap_percent,
        compare_endpoints=True
    )

# REMOVE LATER

## Data Cleaning

### Add Condition to DataFrame

In [None]:
df_endpoints = pd.concat(dict_endpoints, names=["subject", "night"]).droplevel(-1)
df_endpoints.head()

In [None]:
df_features = df_features.join(cond_data)
df_features = df_features.drop(np.nan, level='condition')
df_features = df_features.reorder_levels(["subject", "night", "condition", "imu_feature", "time_span", "wakeup_type"])
df_features

### Drop Night 2

In [None]:
df_features = df_features.drop(2, level="night", errors="ignore")
df_features

### Drop Nights with Incomplete IMU Data

Nights with IMU data $< 4h$ are removed

In [None]:
fig, ax = plt.subplots(figsize=(10, 3))
df_endpoints[["total_sleep_duration"]].plot(ax=ax)
ax.axhline(y=4*60, color='g')

In [None]:
sleep_dur_mask = df_sleep_endpoints["total_sleep_time"] >= 4 * 60
df_features = df_features.loc[sleep_dur_mask]
df_features.head()

In [None]:
if df_features is not None:
    df_features.to_csv(export_path.joinpath("imu_features_complete.csv"))
if df_sleep_endpoints is not None:
    df_sleep_endpoints.to_csv(export_path.joinpath("imu_sleep_endpoints_complete.csv"))