# IMU Feature Extraction Pipeline

## Setup

In [None]:
from pathlib import Path
import re

import pandas as pd
import numpy as np

import biopsykit as bp
from biopsykit.utils.time import timedelta_to_time
from biopsykit.signals.imu.feature_extraction.static_moments import compute_features

import carwatch_analysis.imu_helper as helper

import matplotlib.pyplot as plt
import seaborn as sns

from joblib import Parallel
from tqdm.notebook import tqdm

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
plt.close('all')
sns.set_theme(style='ticks')

compute_endpoints = False
compute_features = False
export_figures = False

In [None]:
base_path = Path("../../../../Data")
sleep_path = base_path.joinpath("Sleep_Data")

# create new export folders
export_path = Path("../../exports")
feature_export_path = export_path.joinpath("features")
sleep_endpoints_export_path = export_path.joinpath("sleep_endpoints")
plot_export_path = export_path.joinpath("sleep_plots")

bp.utils.file_handling.mkdirs([export_path, plot_export_path, feature_export_path, sleep_endpoints_export_path])

## Import Index

In [None]:
bedtime_data = pd.read_csv(export_path.joinpath("questionnaire_chronotype_bedtimes.csv"), index_col=['subject', 'night'])
bedtime_data.head()

In [None]:
cond_data = pd.read_csv(export_path.joinpath("questionnaire_condition.csv"), index_col=['subject', 'night', 'condition'])
cond_data.head()

## IMU Data Processing

In [None]:
fs = 102.4
window_size = 5 * fs
overlap = int(0.9 * window_size)
thres = 100

In [None]:
# get all subject directories
subject_dirs = bp.utils.file_handling.get_subject_dirs(sleep_path, pattern="*")

In [None]:
feature_results = {}
endpoint_results = {}

In [None]:
for subject_dir in tqdm(subject_dirs):
    subject_id = helper.subject_id_from_path(subject_dir)
    
    if subject_id in bedtime_data.index:
        selfreport_endpoints = bedtime_data.loc[subject_id]
    else:
        selfreport_endpoints = None
        
    df_endpoints_subject, df_features_subject = helper.process_subject(
        subject_dir=subject_dir, 
        compute_endpoints=compute_endpoints,
        compute_features=compute_features,
        export_figures=export_figures, 
        feature_export_path=feature_export_path, 
        sleep_endpoints_export_path=sleep_endpoints_export_path, 
        plot_export_path=plot_export_path,
        thres=thres, window_size=window_size, overlap=overlap,
        selfreport_endpoints=selfreport_endpoints,
        compare_endpoints=True
    )
    
    if df_endpoints_subject is not None:
        endpoint_results[subject_id] = df_endpoints_subject
    if df_features_subject is not None:
        feature_results[subject_id] = df_features_subject   

In [None]:
if feature_results:
    df_features = pd.concat(feature_results, names=['subject'])
else:
    df_features = None
if endpoint_results:
    df_sleep_endpoints = pd.concat(endpoint_results, names=['subject'])
else:
    df_sleep_endpoints = None

## Data Cleaning

### Add Condition to DataFrame

In [None]:
df_features = df_features.join(cond_data)
df_features = df_features.drop(np.nan, level='condition')
df_features = df_features.reorder_levels(["subject", "night", "condition", "imu_feature", "time_span", "wakeup_type"])
df_features

### Drop Night 2

In [None]:
df_features = df_features.drop(2, level="night", errors="ignore")
df_features

### Drop Nights with Incomplete IMU Data

Nights with IMU data $< 4h$ are removed

In [None]:
fig, ax = plt.subplots(figsize=(10, 3))
df_sleep_endpoints[["total_sleep_time"]].plot(ax=ax)
ax.axhline(y=4*60, color='g')

In [None]:
sleep_dur_mask = df_sleep_endpoints["total_sleep_time"] >= 4 * 60
df_features = df_features.loc[sleep_dur_mask]
df_features.head()

In [None]:
if df_features is not None:
    df_features.to_csv(export_path.joinpath("imu_features_complete.csv"))
if df_sleep_endpoints is not None:
    df_sleep_endpoints.to_csv(export_path.joinpath("imu_sleep_endpoints_complete.csv"))