In [1]:
%matplotlib widget

from pathlib import Path
import json

import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None

import matplotlib.pyplot as plt
import seaborn as sns

import biopsykit.carwatch_logs.log_actions as la
import biopsykit.carwatch_logs.log_extras as le
from biopsykit import carwatch_logs as cl
from biopsykit.io import carwatch_logs

from carwatch_analysis.saliva_helper import import_cortisol_raw, import_cortisol_features, analysis_saliva_raw, analysis_saliva_features

import biopsykit as bp

import datetime

from IPython.display import display

  return warn(


## Load Self-Report Data

In [2]:
export_path = Path("../..").joinpath("exports")

In [3]:
data_raw = pd.read_csv(export_path.joinpath("cortisol_samples_cleaned.csv"))

In [4]:
data_raw.head()

Unnamed: 0,condition,subject,chronotype,MEQ,night,within_ideal_bed_time,wakeup_source,date,weekend,wakeup_hour,sample,cortisol,time
0,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S0,10.28,0.0
1,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S1,12.37,17.0
2,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S2,10.41,32.0
3,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S3,10.22,47.0
4,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S4,8.21,62.0


## Load App Logs

In [5]:
logs_path = Path("../../../carwatch-data/App_Logs/cleaned_manual")
logs = carwatch_logs.load_logs_all_subjects(logs_path, has_subject_folders=False, return_df=False)

  0%|          | 0/65 [00:00<?, ?it/s]

## Load Cleaned App Wakeup Onsets

In [6]:
app_wakeup_path = Path("../../../carwatch-data/App_Logs/app_data_wakeup.xlsx")

app_data = pd.read_excel(app_wakeup_path)
app_data.index = app_data['Code']
#app_data.drop(['Code'], axis=1, inplace=True)
app_data.head()

Unnamed: 0_level_0,Code,0,1
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CE10B,CE10B,04:48:53,05:40:54
EA09H,EA09H,09:17:47,08:11:54
EM16N,EM16N,08:51:12,
KA17A,KA17A,06:39:00,05:41:00
KA19E,KA19E,,


In [18]:
app_data_long = app_data.melt(['Code'], [0,1], 'night', 'time')
app_data_long.set_index(['Code', 'night'], inplace=True)
app_data_long.dropna(inplace=True)
app_data_long = pd.to_timedelta(app_data_long['time'])
app_data_long.head()

Code   night
CE10B  0       0 days 04:48:53
EA09H  0       0 days 09:17:47
EM16N  0       0 days 08:51:12
KA17A  0       0 days 06:39:00
LB21E  0       0 days 08:57:41
Name: time, dtype: timedelta64[ns]

## Load Self-Report Wakeup Onsets

In [8]:
self_report_wake_onset_path = export_path.joinpath("imu_questionnaire_merged.csv")

In [9]:
sr_wake_onset = pd.read_csv(self_report_wake_onset_path)

In [10]:
# Rename and set index

sr_wake_onset.rename({"subject": "Code"}, inplace=True, axis=1)
sr_wake_onset.set_index(["Code", "night"], inplace=True)

In [11]:
sr_wake_onset.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,MEQ,chronotype_coarse,sleep_onset_latency,getup_latency,wake_after_sleep_onset,sleep_onset_time,bed_time,wake_onset_time,within_ideal_bed_time
Code,night,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AB19E,0,45.0,1.0,1.0,2.0,18.0,0 days 00:15:22,0 days 00:14:54.052733952,0 days 05:45:00,0
AB19E,1,45.0,1.0,,,,,,,0
AB31R,0,56.0,1.0,27.0,4.0,7.0,0 days 00:46:56,0 days 00:20:05.966797056,0 days 05:40:00,0
AB31R,1,56.0,1.0,13.0,1.0,10.0,0 days 00:41:21,0 days 00:28:30.966797056,0 days 05:50:00,0
AC12E,0,46.0,1.0,34.0,40.0,37.0,0 days 23:51:16,0 days 23:17:25.966797056,0 days 07:00:00,0


In [29]:
wake_onset_time = sr_wake_onset["wake_onset_time"]

wake_onset_time = pd.to_timedelta(wake_onset_time)
wake_onset_time.dropna(inplace=True)

wake_onset_time = wake_onset_time.apply(lambda x: x.seconds / 60 % 10)

freq_app = app_data_long.apply(lambda x: x.seconds / 60 % 10)

In [36]:
fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.sharey(ax2)
sns.histplot(wake_onset_time,stat="probability", bins=10, discrete=True, ax=ax1).set_title('self_report')
sns.histplot(freq_app,stat="probability", bins=10, discrete=True, ax=ax2).set_title('app')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 1.0, 'app')

## Extract Saliva Sample Times From App Logs

In [21]:
#wake_onset = "app"
wake_onset = "self_report"

#sampling_times = "app"
sampling_times = "self_report"

In [22]:
def get_timestamp(df, saliva_id):
    return datetime.datetime.strptime(df.loc[df['saliva_id']==saliva_id].iloc[0].name.strftime("%H:%M:%S"), "%H:%M:%S").time()

def get_datetime(timestring):
    return datetime.datetime.strptime(timestring, "%H:%M:%S").time()

def get_from_json(row):
    json_extra = json.loads(row.extras)
    return json_extra.get('saliva_id')

for subject, subject_log in logs.items():
    log_subject = carwatch_logs.LogData(subject_log)
    
    finished_day = log_subject.finished_days

    df = cl.log_data.get_logs_for_action(log_subject, la.barcode_scanned)

    for day in range(2):
        try:
            df_day = df[df.index.date == finished_day[day]]
            df_day['saliva_id'] = df_day.apply(lambda row: get_from_json(row), axis=1)
            
            if pd.isna(app_data.loc[subject, day]):
                continue
            
            T0 = get_datetime(app_data.loc[subject, day])
            T0_delta = datetime.timedelta(hours=T0.hour, minutes=T0.minute, seconds=T0.second)
            
            T0_delta_sr = pd.to_timedelta(sr_wake_onset.query("Code == @subject & night == @day")["wake_onset_time"].item())
            #print(T0_delta)
            
            for saliva_id in range(5):
                T = get_timestamp(df_day, saliva_id)
                T_delta = datetime.timedelta(hours=T.hour, minutes=T.minute, seconds=T.second)
                #data_raw.loc[(data_raw['subject']==subject) & (data_raw['night']==day) & (data_raw['sample']=='S'+str(saliva_id)), 'onset_sr_samples_app'] = (T_delta-T0_delta_sr).total_seconds() / 60
                #data_raw.loc[(data_raw['subject']==subject) & (data_raw['night']==day) & (data_raw['sample']=='S'+str(saliva_id)), 'onset_app_samples_sr'] = (T_delta-T0_delta).total_seconds() / 60
                data_raw.loc[(data_raw['subject']==subject) & (data_raw['night']==day) & (data_raw['sample']=='S'+str(saliva_id)), 'time_app'] = (T_delta-T0_delta).total_seconds() / 60
                data_raw.loc[(data_raw['subject']==subject) & (data_raw['night']==day) & (data_raw['sample']=='S'+str(saliva_id)), 'naive'] = saliva_id*15.0
                

                
        except IndexError:
            pass

data = data_raw.rename(columns={'time':'self_report','time_app':'app'})

In [23]:
data.head()

Unnamed: 0,condition,subject,chronotype,MEQ,night,within_ideal_bed_time,wakeup_source,date,weekend,wakeup_hour,sample,cortisol,self_report,app,naive
0,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S0,10.28,0.0,,
1,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S1,12.37,17.0,,
2,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S2,10.41,32.0,,
3,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S3,10.22,47.0,,
4,Spontaneous,AB31R,Intermediate,56.0,0,False,Spontaneous,2020-01-08,Weekday,5,S4,8.21,62.0,,


In [24]:
# Optional: drop all subjects with self report only
self_report_only = data.dropna(inplace=True)

self_report_only.head()

AttributeError: 'NoneType' object has no attribute 'head'

In [56]:
id_vars = ['subject', 'condition', 'chronotype', 'MEQ', 'night', 'wakeup_source', 'weekend', 'date', 'wakeup_hour', 'sample', 'cortisol']
melted = data.melt(id_vars=id_vars, value_vars=['self_report','app','naive'], var_name='log_mode', value_name='time')

melted.dropna(inplace=True)

# Something went wrong there, drop manually
melted = melted.loc[~((melted["subject"]=="MD26R")&( melted["log_mode"]=="app")),:]
       
cort_long = melted
cort_long.head()

Unnamed: 0,subject,condition,chronotype,MEQ,night,wakeup_source,weekend,date,wakeup_hour,sample,cortisol,log_mode,time
0,BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,S0,5.88,self_report,0.0
1,BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,S1,12.54,self_report,15.0
2,BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,S2,12.91,self_report,30.0
3,BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,S3,12.3,self_report,45.0
4,BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,S4,10.38,self_report,60.0


In [57]:
# export
melted.to_csv(export_path.joinpath('cortisol_samples_app_cleaned.csv'), index=False)

In [58]:
idx = ['subject', 'condition', 'chronotype', 'MEQ', 'night', 'wakeup_source', 'weekend', 'date', 'wakeup_hour', 'log_mode', 'sample']
cort_long.set_index(idx, inplace = True)

In [59]:
cort_long.head()

# 56 complete samples

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,cortisol,time
subject,condition,chronotype,MEQ,night,wakeup_source,weekend,date,wakeup_hour,log_mode,sample,Unnamed: 11_level_1,Unnamed: 12_level_1
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,self_report,S0,5.88,0.0
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,self_report,S1,12.54,15.0
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,self_report,S2,12.91,30.0
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,self_report,S3,12.3,45.0
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,self_report,S4,10.38,60.0


## Compute Cortisol Features For App Times

In [31]:
# cort_long_app = cort_long.query("log_mode=='app'")

In [67]:
cort_auc = bp.saliva.auc(cort_long, remove_s0=False)
cort_auc.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,saliva_feature,cortisol_auc_g,cortisol_auc_i
subject,condition,chronotype,MEQ,night,wakeup_source,weekend,date,wakeup_hour,log_mode,Unnamed: 10_level_1,Unnamed: 11_level_1
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,697.398333,339.698333
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,naive,688.2,335.4
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,self_report,688.2,335.4
BC05R,Spontaneous,Intermediate,47.0,1,Spontaneous,Weekend,2019-12-08,8,app,945.14425,630.80325
BC05R,Spontaneous,Intermediate,47.0,1,Spontaneous,Weekend,2019-12-08,8,naive,936.9,625.5


In [68]:
#cort_inc = bp.saliva.max_increase(cort_long, remove_s0=False)
#cort_inc.head()

In [69]:
cort_slope = bp.saliva.slope(cort_long, sample_idx=[0, 3])
cort_slope = cort_slope.join(bp.saliva.slope(cort_long, sample_idx=[0, 4]))
#cort_slope = cort_slope.join(bp.saliva.slope(cort_long, sample_idx=[2, 4], biomarker_type='cortisol'))
cort_slope.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,saliva_feature,cortisol_slopeS0S3,cortisol_slopeS0S4
subject,condition,chronotype,MEQ,night,wakeup_source,weekend,date,wakeup_hour,log_mode,Unnamed: 10_level_1,Unnamed: 11_level_1
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,0.14043,0.073973
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,naive,0.142667,0.075
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,self_report,0.142667,0.075
BC05R,Spontaneous,Intermediate,47.0,1,Spontaneous,Weekend,2019-12-08,8,app,0.321892,0.151238
BC05R,Spontaneous,Intermediate,47.0,1,Spontaneous,Weekend,2019-12-08,8,naive,0.325111,0.152667


In [70]:
#cort_max = pd.DataFrame(cort_long['cortisol'].unstack('sample').max(axis=1), columns=['cortisol_cmax'])
#cort_max.head()

In [71]:
#cort_cini = cort_long.xs('S0', level='sample')[['cortisol']]
#cort_cini.columns = ["cortisol_cini"]
#cort_cini.head()

In [72]:
cort_feat = pd.concat([cort_auc, cort_inc, cort_slope, cort_cini, cort_max], axis=1)
cort_feat = pd.DataFrame(cort_feat.stack(), columns=['cortisol'])
cort_feat.index = cort_feat.index.set_names(cort_feat.index.names[:-1] + ['saliva_feature'])
cort_feat.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,cortisol
subject,condition,chronotype,MEQ,night,wakeup_source,weekend,date,wakeup_hour,log_mode,saliva_feature,Unnamed: 11_level_1
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,cortisol_auc_g,697.398333
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,cortisol_auc_i,339.698333
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,cortisol_max_inc,7.03
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,cortisol_slopeS0S3,0.14043
BC05R,Spontaneous,Intermediate,47.0,0,Spontaneous,Weekend,2019-12-07,7,app,cortisol_slopeS0S4,0.073973


In [74]:
# export
cort_feat.to_csv(export_path.joinpath('cortisol_features_app_cleaned.csv'))