In [None]:
from pathlib import Path

import pandas as pd
import numpy as np

import biopsykit as bp
from biopsykit.questionnaires.utils import replace_missing_data
from biopsykit.utils.time import get_time_from_date

import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
sns.set(style='ticks')
plt.close('all')

## Load Data

In [None]:
data_path = Path("../exports")

In [None]:
data = pd.concat([pd.read_csv(data_path.joinpath(p), index_col=['subject', 'night']) for p in ['imu_features_complete.csv', 'questionnaire_chronotype_bedtimes.csv', 'sleep_endpoints_complete.csv']], axis=1)

data['bed'] = data['major_rest_period_start']

### Remove corrupted sensor data

Sensor data with reported dates outside the data collection interval are set to ``None``

In [None]:
date_mask = (data['sleep_onset'] < "2019-11-01") | (data['sleep_onset'] > "2020-02-01")
data.loc[date_mask, ['sleep_onset', 'bed']] = None
data.head()

### Convert datetime information

In [None]:
data.columns

In [None]:
data['sleep_onset_time'] = get_time_from_date(data['sleep_onset'], is_utc=True, tz_convert=True)
data['bed_time'] = get_time_from_date(data['bed'], is_utc=True, tz_convert=True)
data['wake_onset_time'] = get_time_from_date(data['wake_onset'], is_utc=True, tz_convert=True)

data['sleep_onset_selfreport_time'] = get_time_from_date(data['sleep_onset_selfreport'])
data['wake_onset_selfreport_time'] = get_time_from_date(data['wake_onset_selfreport'])
data['bed_selfreport_time'] = get_time_from_date(data['bed_selfreport'])
data['ideal_bed_start_time'] = get_time_from_date(data['ideal_bed_start'])
data['ideal_bed_end_time'] = get_time_from_date(data['ideal_bed_end'])

### Replace missing time information with self-report

In [None]:
data = replace_missing_data(data, 'sleep_onset_time', 'sleep_onset_selfreport_time', dropna=False)
data = replace_missing_data(data, 'wake_onset_time', 'wake_onset_selfreport_time', dropna=False)
data = replace_missing_data(data, 'bed_time', 'bed_selfreport_time', dropna=False)

### Compute whether Subjects went to bed within their ideal bed time

TODO:
* weekend or not
* ideal bedtime or not
* alarm or not
* IMU, Cortisol
* split in pre and post sleep midpoint

In [None]:
ideal_bedtime = (data['bed_time'] >= data['ideal_bed_start_time']) & (data['bed_time'] <= data['ideal_bed_end_time'])
data['within_ideal_bed_time'] = ideal_bedtime.astype(int)
data['within_ideal_bed_time'].value_counts()

In [None]:
data

In [None]:
data.columns

In [None]:
data.drop(columns=[
    'sleep_bouts_number', 'ideal_bed_end', 
    'Chronotype_Coarse', 'ideal_bed_start', 'MEQ', 'Chronotype_Fine', 'sleep_onset_selfreport', 
    'bed_selfreport', 'sleep_onset', 'wake_onset', 
    'total_sleep_time', 'major_rest_period_start', 'major_rest_period_end',
    'number_wake_bouts', 'sleep_bouts', 'wake_bouts', 'bed', 'sleep_onset_time', 
    'bed_time', 'sleep_onset_selfreport_time', 'bed_selfreport_time', 'ideal_bed_start_time', 
    'ideal_bed_end_time'
], inplace=True, errors='ignore')

In [None]:
data = data.dropna()

In [None]:
data.columns

## Export

In [None]:
export_path = Path("../exports")

In [None]:
data.to_csv(export_path.joinpath("chronotype_features_complete.csv"))