In [31]:
# file handling
from pathlib import Path
from os import listdir
from os.path import isfile, join
import re
import json

# data handling
import numpy as np
import pandas as pd
 
# plotting
from pandas.plotting import register_matplotlib_converters
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import biopsykit.carwatch_logs.log_actions as la
import biopsykit.carwatch_logs.log_extras as le
from biopsykit import carwatch_logs as cl
from biopsykit.io import carwatch_logs

from carwatch_analysis.saliva_helper import import_cortisol_raw, import_cortisol_features, analysis_saliva_raw, analysis_saliva_features

import biopsykit as bp

# datetime handling
import pytz
import time
#from datetime import datetime
import datetime

# interaction
from ipywidgets import *
from IPython.display import Markdown

pd.options.mode.chained_assignment = None

%load_ext autoreload
%autoreload 2

%matplotlib widget
sns.set(style="whitegrid")
plt.close('all')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
export_path = ""

In [2]:
data_raw = pd.read_csv("cortisol_samples_cleaned.csv")

In [3]:
data_raw.head()

Unnamed: 0,condition,subject,chronotype,MEQ,night,within_ideal_bed_time,wakeup_source,date,weekend,wakeup_hour,sample,cortisol,time
0,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S0,10.28,0.0
1,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S1,12.37,17.0
2,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S2,10.41,32.0
3,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S3,10.22,47.0
4,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S4,8.21,62.0


In [39]:
logs_path = Path("../../../carwatch-data/App_Logs/cleaned_manual")
logs = carwatch_logs.load_logs_all_subjects(logs_path, has_subject_folders=False, return_df=False)

  0%|          | 0/65 [00:00<?, ?it/s]

In [43]:
app_wakeup_path = Path("../../../carwatch-data/App_Logs/app_data_wakeup.xlsx")

app_data = pd.read_excel(app_wakeup_path)
app_data.index = app_data['Code']
#app_data.drop(['Code'], axis=1, inplace=True)
app_data.head()

Unnamed: 0_level_0,Code,T1_wakeup_onset_app,T2_wakeup_onset_app
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CE10B,CE10B,04:48:53,05:40:54
EA09H,EA09H,09:17:47,08:11:54
EM16N,EM16N,08:51:12,
KA17A,KA17A,06:39:00,05:41:00
KA19E,KA19E,,


In [None]:
prefix_wakeup = "_wakeup_onset_app"

In [46]:
def get_timestamp(df, saliva_id):
    return datetime.datetime.strptime(df.loc[df['saliva_id']==saliva_id].iloc[0].name.strftime("%H:%M:%S"), "%H:%M:%S").time()

def get_datetime(timestring):
    return datetime.datetime.strptime(timestring, "%H:%M:%S").time()

def get_from_json(row):
    json_extra = json.loads(row.extras)
    return json_extra.get('saliva_id')

for subject_id, subject_log in logs.items():
    log_subject = carwatch_logs.LogData(subject_log)
    
    finished_day = log_subject.finished_days

    df = cl.log_data.get_logs_for_action(log_subject, la.barcode_scanned)
    
    #df_alarm = log_subject.get_action(la.alarm_ring)
    #df_spontaneous = log_subject.get_action(la.spontaneous_awakening)

    for day in range(2):
        try:
            df_day = df[df.index.date == finished_day[day]]
            df_day['saliva_id'] = df_day.apply(lambda row: get_from_json(row), axis=1)
            if not app_data.loc[subject, 'T'+str(day+1)+prefix].isnull():
                T0 = get_datetime(app_data.loc[subject, 'T'+str(day+1)+'_wakeup_app'])
                T0_delta = datetime.timedelta(hours=T0.hour, minutes=T0.minute, seconds=T0.second)
            
            wake_onset_app_delta = datetime.timedelta(hours=wake_up_app.hour, minutes=wake_up_app.minute, seconds=wake_up_app.second)
            
            for saliva_id in range(5):
                T = get_timestamp(df_day, saliva_id)
                T_delta = datetime.timedelta(hours=T.hour, minutes=T.minute, seconds=T.second)
                data_raw.loc[(data_raw['subject']==subject) & (data_raw['night']==day) & (data_raw['sample']=='S'+str(saliva_id)), 'time_app'] = (T_delta-T0_delta).total_seconds() / 60

        except IndexError:
            pass

        
data = data_raw.rename(columns={'time':'self_report','time_app':'app'})

KeyError: 'T1_wakeup_app'

In [102]:
id_vars = ['subject', 'condition', 'chronotype', 'MEQ', 'night', 'wakeup_source', 'weekend', 'date', 'wakeup_hour', 'sample', 'cortisol']
melted = data.melt(id_vars=id_vars, value_vars=['self_report','app'], var_name='log_mode', value_name='time')

melted.dropna(inplace=True)

cort_long = melted
cort_long.head()

Unnamed: 0,subject,condition,chronotype,MEQ,night,wakeup_source,weekend,date,wakeup_hour,sample,cortisol,log_mode,time
0,AB31R,Spontaneous,Intermediate,56.0,0,Spontaneous,Weekday,2020-01-08,5,S0,10.28,self_report,0.0
1,AB31R,Spontaneous,Intermediate,56.0,0,Spontaneous,Weekday,2020-01-08,5,S1,12.37,self_report,17.0
2,AB31R,Spontaneous,Intermediate,56.0,0,Spontaneous,Weekday,2020-01-08,5,S2,10.41,self_report,32.0
3,AB31R,Spontaneous,Intermediate,56.0,0,Spontaneous,Weekday,2020-01-08,5,S3,10.22,self_report,47.0
4,AB31R,Spontaneous,Intermediate,56.0,0,Spontaneous,Weekday,2020-01-08,5,S4,8.21,self_report,62.0


In [None]:
# export
melted.to_csv('cortisol_samples_app_cleaned.csv', index=False)

In [103]:
idx = ['subject', 'condition', 'chronotype', 'MEQ', 'night', 'wakeup_source', 'weekend', 'date', 'wakeup_hour', 'log_mode', 'sample']
cort_long.set_index(idx, inplace = True)

In [104]:
cort_long.head()

# 56 complete samples

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,cortisol,time
subject,condition,chronotype,MEQ,night,wakeup_source,weekend,date,wakeup_hour,log_mode,sample,Unnamed: 11_level_1,Unnamed: 12_level_1
AB31R,Spontaneous,Intermediate,56.0,0,Spontaneous,Weekday,2020-01-08,5,self_report,S0,10.28,0.0
AB31R,Spontaneous,Intermediate,56.0,0,Spontaneous,Weekday,2020-01-08,5,self_report,S1,12.37,17.0
AB31R,Spontaneous,Intermediate,56.0,0,Spontaneous,Weekday,2020-01-08,5,self_report,S2,10.41,32.0
AB31R,Spontaneous,Intermediate,56.0,0,Spontaneous,Weekday,2020-01-08,5,self_report,S3,10.22,47.0
AB31R,Spontaneous,Intermediate,56.0,0,Spontaneous,Weekday,2020-01-08,5,self_report,S4,8.21,62.0


In [105]:
cort_long_app = cort_long.query("log_mode=='app'")

In [106]:
cort_auc = bp.saliva.auc(cort_long_app, remove_s0=False)
cort_auc.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,saliva_feature,cortisol_auc_g,cortisol_auc_i
subject,condition,chronotype,MEQ,night,wakeup_source,weekend,date,wakeup_hour,log_mode,Unnamed: 10_level_1,Unnamed: 11_level_1
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,697.398333,339.698333
BC05R,Spontaneous,Intermediate,47.0,1,Spontaneous,Weekend,2019-12-08,8,app,945.14425,630.80325
BU07E,Known Alarm,Evening,61.0,0,Alarm,Weekday,2019-12-06,6,app,130.637,-124.3585
CC09K,Unknown Alarm,Intermediate,58.0,0,Alarm,Weekday,2019-12-02,6,app,1466.070083,1153.070083
CE10B,Known Alarm,Evening,64.0,0,Alarm,Weekday,2019-11-12,4,app,763.474263,283.318817


In [109]:
cort_inc = bp.saliva.max_increase(cort_long_app, remove_s0=False)
cort_inc.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,saliva_feature,cortisol_max_inc
subject,condition,chronotype,MEQ,night,wakeup_source,weekend,date,wakeup_hour,log_mode,Unnamed: 10_level_1
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,7.03
BC05R,Spontaneous,Intermediate,47.0,1,Spontaneous,Weekend,2019-12-08,8,app,14.63
BU07E,Known Alarm,Evening,61.0,0,Alarm,Weekday,2019-12-06,6,app,-1.84
CC09K,Unknown Alarm,Intermediate,58.0,0,Alarm,Weekday,2019-12-02,6,app,24.74
CE10B,Known Alarm,Evening,64.0,0,Alarm,Weekday,2019-11-12,4,app,7.81985


In [112]:
cort_slope = bp.saliva.slope(cort_long_app, sample_idx=[0, 3])
cort_slope = cort_slope.join(bp.saliva.slope(cort_long_app, sample_idx=[0, 4]))
#cort_slope = cort_slope.join(bp.saliva.slope(cort_long, sample_idx=[2, 4], biomarker_type='cortisol'))
cort_slope.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,saliva_feature,cortisol_slopeS0S3,cortisol_slopeS0S4
subject,condition,chronotype,MEQ,night,wakeup_source,weekend,date,wakeup_hour,log_mode,Unnamed: 10_level_1,Unnamed: 11_level_1
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,0.14043,0.073973
BC05R,Spontaneous,Intermediate,47.0,1,Spontaneous,Weekend,2019-12-08,8,app,0.321892,0.151238
BU07E,Known Alarm,Evening,61.0,0,Alarm,Weekday,2019-12-06,6,app,-0.053754,-0.043663
CC09K,Unknown Alarm,Intermediate,58.0,0,Alarm,Weekday,2019-12-02,6,app,0.40337,0.205367
CE10B,Known Alarm,Evening,64.0,0,Alarm,Weekday,2019-11-12,4,app,0.036,0.049051


In [114]:
cort_max = pd.DataFrame(cort_long_app['cortisol'].unstack('sample').max(axis=1), columns=['cortisol_cmax'])
cort_max.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,cortisol_cmax
subject,condition,chronotype,MEQ,night,wakeup_source,weekend,date,wakeup_hour,log_mode,Unnamed: 10_level_1
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,12.91
BC05R,Spontaneous,Intermediate,47.0,1,Spontaneous,Weekend,2019-12-08,8,app,19.82
BU07E,Known Alarm,Evening,61.0,0,Alarm,Weekday,2019-12-06,6,app,4.17
CC09K,Unknown Alarm,Intermediate,58.0,0,Alarm,Weekday,2019-12-02,6,app,28.74
CE10B,Known Alarm,Evening,64.0,0,Alarm,Weekday,2019-11-12,4,app,15.527


In [116]:
cort_cini = cort_long_app.xs('S0', level='sample')[['cortisol']]
cort_cini.columns = ["cortisol_cini"]
cort_cini.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,cortisol_cini
subject,condition,chronotype,MEQ,night,wakeup_source,weekend,date,wakeup_hour,log_mode,Unnamed: 10_level_1
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,5.88
BC05R,Spontaneous,Intermediate,47.0,1,Spontaneous,Weekend,2019-12-08,8,app,5.19
BU07E,Known Alarm,Evening,61.0,0,Alarm,Weekday,2019-12-06,6,app,4.17
CC09K,Unknown Alarm,Intermediate,58.0,0,Alarm,Weekday,2019-12-02,6,app,4.0
CE10B,Known Alarm,Evening,64.0,0,Alarm,Weekday,2019-11-12,4,app,7.70715


In [119]:
cort_feat = pd.concat([cort_auc, cort_inc, cort_slope, cort_cini, cort_max], axis=1)
cort_feat = pd.DataFrame(cort_feat.stack(), columns=['cortisol'])
cort_feat.index = cort_feat.index.set_names(cort_feat.index.names[:-1] + ['biomarker'])
cort_feat.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,cortisol
subject,condition,chronotype,MEQ,night,wakeup_source,weekend,date,wakeup_hour,log_mode,biomarker,Unnamed: 11_level_1
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,cortisol_auc_g,697.398333
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,cortisol_auc_i,339.698333
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,cortisol_max_inc,7.03
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,cortisol_slopeS0S3,0.14043
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,cortisol_slopeS0S4,0.073973


In [None]:
cort_feat.to_csv('cortisol_features_app_cleaned.csv')