In [292]:
from pathlib import Path
import json

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.auto import tqdm

import biopsykit as bp
from biopsykit.utils.dataframe_handling import multi_xs
from biopsykit.carwatch_logs import LogData
from biopsykit.carwatch_logs.log_data import get_logs_for_action
import biopsykit.carwatch_logs.log_actions as log_actions


from carwatch_analysis.datasets import CarWatchDatasetProcessed
from carwatch_analysis.exceptions import AppLogDataNotFoundException

import datetime

%load_ext autoreload
%autoreload 2
%matplotlib widget

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Setup Paths

In [365]:
deploy_type = "local"

In [366]:
# build path to data folder
config_dict = json.load(Path("../config.json").open(encoding="utf-8"))
base_path = Path("..").joinpath(config_dict[deploy_type]["base_path"])
base_path

PosixPath('/Volumes/MAD_DATA/StudyData/HealthPsychology/CARWatch/Data')

In [367]:
dataset = CarWatchDatasetProcessed(base_path)

## Load Data

### Self-Report Wakeup Onsets

In [368]:
bedtimes = dataset.endpoints_selfreport
bedtimes = bedtimes.dropna().sort_index()
bedtimes = bedtimes[["wake_onset_selfreport"]].apply(pd.to_timedelta)
bedtimes = bedtimes.rename(columns={"wake_onset_selfreport": "wake_onset_time_selfreport"})

bedtimes

Unnamed: 0_level_0,Unnamed: 1_level_0,wake_onset_time_selfreport
subject,night,Unnamed: 2_level_1
AB19E,0,0 days 05:45:00
AB31R,0,0 days 05:40:00
AB31R,1,0 days 05:50:00
AC12E,0,0 days 07:00:00
AC12E,1,0 days 07:30:00
...,...,...
VA30T,1,0 days 08:40:00
VE19A,0,0 days 07:11:00
VE19A,1,0 days 06:53:00
VS09S,0,0 days 08:15:00


### App Wakeup Onsets

In [370]:
app_wakeup_path = base_path.joinpath("app_logs/app_data_wakeup.xlsx")

app_wakeup = pd.read_excel(app_wakeup_path)
app_wakeup = app_wakeup.set_index("Code")
app_wakeup.index.name = "subject"
app_wakeup.columns.name = "night"
app_wakeup = pd.DataFrame(pd.to_timedelta(app_wakeup.stack()), columns=["wake_onset_time_app"]).sort_index()

app_wakeup

Unnamed: 0_level_0,Unnamed: 1_level_0,wake_onset_time_app
subject,night,Unnamed: 2_level_1
BC05R,0,0 days 07:31:16
BC05R,1,0 days 08:46:43
BU07E,0,0 days 06:01:24
CC09K,0,0 days 06:09:00
CC09K,1,0 days 07:21:01
...,...,...
UH09L,0,0 days 06:46:43
UH09L,1,0 days 09:26:27
VE19A,0,0 days 07:11:37
WM13K,0,0 days 06:39:00


### Cortisol Samples

In [371]:
cortisol_samples = dataset.cortisol_samples

cortisol_samples = cortisol_samples.rename(
    columns={
        "time_abs": "time_abs_selfreport",
        "time": "time_selfreport",
        "wake_onset_time": "wake_onset_time_selfreport"
    }
)
cortisol_samples

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,time_abs_selfreport,date,wake_onset_time_selfreport,time_selfreport,cortisol
subject,night,condition,sample,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AB19E,0,Known Alarm,S0,0 days 06:00:00,2019-11-18,0 days 05:45:00,0.0,2.80
AB19E,0,Known Alarm,S1,0 days 06:15:00,2019-11-18,0 days 05:45:00,15.0,5.59
AB19E,0,Known Alarm,S2,0 days 06:30:00,2019-11-18,0 days 05:45:00,30.0,13.29
AB19E,0,Known Alarm,S3,0 days 06:45:00,2019-11-18,0 days 05:45:00,45.0,13.46
AB19E,0,Known Alarm,S4,0 days 07:01:00,2019-11-18,0 days 05:45:00,61.0,12.65
...,...,...,...,...,...,...,...,...
WM13K,1,Unknown Alarm,S0,0 days 06:30:00,2019-12-03,0 days 04:02:35,0.0,13.94
WM13K,1,Unknown Alarm,S1,0 days 06:45:00,2019-12-03,0 days 04:02:35,15.0,15.72
WM13K,1,Unknown Alarm,S2,0 days 07:00:00,2019-12-03,0 days 04:02:35,30.0,17.95
WM13K,1,Unknown Alarm,S3,0 days 07:15:00,2019-12-03,0 days 04:02:35,45.0,16.81


In [372]:
dataset.date.xs("BC05R", level="subject")

Unnamed: 0_level_0,Unnamed: 1_level_0,date
night,condition,Unnamed: 2_level_1
0,Spontaneous,2019-12-07
1,Spontaneous,2019-12-08


In [234]:
def get_saliva_id_from_json(col):
    json_extra = json.loads(col)
    return f"S{json_extra.get('saliva_id')}"

In [364]:
dict_barcode_day = {}

#for subset in dataset.get_subset(subject=["BC05R", "CC09K"]).groupby("subject"):
for subset in tqdm(dataset.groupby("subject")):
    subject_id = subset.index["subject"][0]
    try:
        app_logs = subset.app_logs
        log_data = LogData(app_logs)
        # add 1 day because it's the next morning
        recording_days = subset.date + pd.Timedelta("1 day")
        print(log_data.log_dates)
        recording_days["date"] = recording_days["date"].dt.tz_localize("Europe/Berlin")
        finished_days = [day.normalize() for day in log_data.finished_days]
        
        df_barcode = get_logs_for_action(log_data, log_actions.barcode_scanned)
        
        for night_id, day in enumerate(recording_days["date"]):
            if day not in finished_days:
                continue
            
            day_mask = df_barcode.index.normalize().isin([day])
            df_barcode_day = df_barcode.loc[day_mask]
            
            df_barcode_day = df_barcode_day.assign(
                **{"sample": df_barcode_day["extras"].apply(get_saliva_id_from_json)}
            )
            df_barcode_day = df_barcode_day.set_index("sample", append=True)
            df_barcode_day = df_barcode_day.drop("S5", level="sample", errors="ignore")
            
            if df_barcode_day.empty:
                continue
            
            dict_barcode_day[(subject_id, night_id)] = df_barcode_day
            
    except AppLogDataNotFoundException as e:
        continue

  0%|          | 0/117 [00:00<?, ?it/s]

[datetime.date(2019, 11, 18) datetime.date(2019, 11, 19)
 datetime.date(2019, 11, 20) datetime.date(2019, 11, 21)]
[datetime.date(2019, 11, 18) datetime.date(2019, 11, 19)
 datetime.date(2019, 11, 20) datetime.date(2019, 11, 21)]
[datetime.date(2019, 12, 5) datetime.date(2019, 12, 6)
 datetime.date(2019, 12, 7) datetime.date(2019, 12, 8)]
[datetime.date(2019, 12, 5) datetime.date(2019, 12, 6)
 datetime.date(2019, 12, 7) datetime.date(2019, 12, 8)]
[datetime.date(2019, 12, 2) datetime.date(2019, 12, 3)
 datetime.date(2019, 12, 4) datetime.date(2019, 12, 5)]
[datetime.date(2019, 11, 11) datetime.date(2019, 11, 12)
 datetime.date(2019, 11, 13) datetime.date(2019, 11, 14)]
[datetime.date(2019, 12, 16) datetime.date(2019, 12, 17)
 datetime.date(2019, 12, 18)]
[datetime.date(2019, 11, 22) datetime.date(2019, 11, 23)
 datetime.date(2019, 11, 24)]
[datetime.date(2019, 11, 14) datetime.date(2019, 11, 15)
 datetime.date(2019, 11, 16) datetime.date(2019, 11, 17)]
[datetime.date(2019, 12, 19) date

In [294]:
multi_xs(app_wakeup, ["BC05R", "CC09K"], level="subject")

Unnamed: 0_level_0,Unnamed: 1_level_0,wake_onset_time_app
subject,night,Unnamed: 2_level_1
BC05R,0,0 days 07:31:16
BC05R,1,0 days 08:46:43
CC09K,0,0 days 06:09:00
CC09K,1,0 days 07:21:01


In [295]:
multi_xs(bedtimes, ["BC05R", "CC09K"], level="subject")

Unnamed: 0_level_0,Unnamed: 1_level_0,wake_onset_time_selfreport
subject,night,Unnamed: 2_level_1
BC05R,0,0 days 07:31:00
BC05R,1,0 days 08:46:00
CC09K,0,0 days 06:10:00
CC09K,1,0 days 07:40:00


In [296]:
multi_xs(cortisol_samples, ["BC05R", "CC09K"], level="subject")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,time_abs_selfreport,date,wake_onset_time_selfreport,time_selfreport,cortisol
subject,night,condition,sample,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BC05R,0,Spontaneous,S0,0 days 07:32:00,2019-12-07,0 days 07:31:00,0.0,5.88
BC05R,0,Spontaneous,S1,0 days 07:47:00,2019-12-07,0 days 07:31:00,15.0,12.54
BC05R,0,Spontaneous,S2,0 days 08:02:00,2019-12-07,0 days 07:31:00,30.0,12.91
BC05R,0,Spontaneous,S3,0 days 08:17:00,2019-12-07,0 days 07:31:00,45.0,12.3
BC05R,0,Spontaneous,S4,0 days 08:32:00,2019-12-07,0 days 07:31:00,60.0,10.38
BC05R,1,Spontaneous,S0,0 days 08:47:00,2019-12-08,0 days 08:46:00,0.0,5.19
BC05R,1,Spontaneous,S1,0 days 09:02:00,2019-12-08,0 days 08:46:00,15.0,13.67
BC05R,1,Spontaneous,S2,0 days 09:17:00,2019-12-08,0 days 08:46:00,30.0,19.2
BC05R,1,Spontaneous,S3,0 days 09:32:00,2019-12-08,0 days 08:46:00,45.0,19.82
BC05R,1,Spontaneous,S4,0 days 09:47:00,2019-12-08,0 days 08:46:00,60.0,14.35


In [307]:
multi_xs(df_concat, ["BC05R", "CC09K"], level="subject")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,time,wake_onset_time_app,time_abs_selfreport,date,wake_onset_time_selfreport,time_selfreport,cortisol
subject,night,sample,condition,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
BC05R,0,S0,Spontaneous,2019-12-08 08:47:31.431000+01:00,0 days 07:31:16,0 days 07:32:00,2019-12-07,0 days 07:31:00,0.0,5.88
BC05R,0,S1,Spontaneous,2019-12-08 09:02:43.154000+01:00,0 days 07:31:16,0 days 07:47:00,2019-12-07,0 days 07:31:00,15.0,12.54
BC05R,0,S2,Spontaneous,2019-12-08 09:17:53.931000+01:00,0 days 07:31:16,0 days 08:02:00,2019-12-07,0 days 07:31:00,30.0,12.91
BC05R,0,S3,Spontaneous,2019-12-08 09:32:58.969000+01:00,0 days 07:31:16,0 days 08:17:00,2019-12-07,0 days 07:31:00,45.0,12.3
BC05R,0,S4,Spontaneous,2019-12-08 09:48:05.250000+01:00,0 days 07:31:16,0 days 08:32:00,2019-12-07,0 days 07:31:00,60.0,10.38
CC09K,0,S0,Unknown Alarm,2019-12-03 06:09:44.954000+01:00,0 days 06:09:00,0 days 06:11:00,2019-12-02,0 days 06:10:00,0.0,4.0
CC09K,0,S1,Unknown Alarm,2019-12-03 06:25:10.583000+01:00,0 days 06:09:00,0 days 06:26:00,2019-12-02,0 days 06:10:00,15.0,10.42
CC09K,0,S2,Unknown Alarm,2019-12-03 06:40:28.699000+01:00,0 days 06:09:00,0 days 06:41:00,2019-12-02,0 days 06:10:00,30.0,18.41
CC09K,0,S2,Unknown Alarm,2019-12-03 06:55:46.541000+01:00,0 days 06:09:00,0 days 06:41:00,2019-12-02,0 days 06:10:00,30.0,18.41
CC09K,0,S3,Unknown Alarm,2019-12-03 07:11:04.163000+01:00,0 days 06:09:00,0 days 06:56:00,2019-12-02,0 days 06:10:00,45.0,28.74


In [301]:
dict_barcode_day[("BC05R", 0)]

Unnamed: 0_level_0,Unnamed: 1_level_0,action,extras
time,sample,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-12-08 08:47:31.431000+01:00,S0,barcode_scanned,"{""id"": 4711, ""saliva_id"": 0, ""barcode_value"": ..."
2019-12-08 09:02:43.154000+01:00,S1,barcode_scanned,"{""id"": 37478, ""saliva_id"": 1, ""barcode_value"":..."
2019-12-08 09:17:53.931000+01:00,S2,barcode_scanned,"{""id"": 70245, ""saliva_id"": 2, ""barcode_value"":..."
2019-12-08 09:32:58.969000+01:00,S3,barcode_scanned,"{""id"": 103012, ""saliva_id"": 3, ""barcode_value""..."
2019-12-08 09:48:05.250000+01:00,S4,barcode_scanned,"{""id"": 135779, ""saliva_id"": 4, ""barcode_value""..."


In [356]:
df_concat = pd.concat(dict_barcode_day, names=["subject", "night"])
df_concat = df_concat.drop(columns=["action", "extras"])
df_concat = df_concat.join(app_wakeup).reset_index("time")

#df_concat["wake_onset_time_app"] = df_concat["wake_onset_time_app"] + df_concat["time"].dt.normalize()
#df_concat = df_concat.rename(columns={"time": "time_abs_app"})
#
#df_concat = df_concat.assign(**{"time_diff_to_wakeup_app": (df_concat["time_abs_app"] - df_concat["wake_onset_time_app"])})
#df_concat["time_abs_app"] = df_concat["time_abs_app"] - df_concat["time_abs_app"].dt.normalize()
#df_concat = df_concat.loc[~df_concat.index.duplicated(keep="last")]
##df_concat.xs("CC09K", level="subject")
#
df_concat = df_concat.join(cortisol_samples)

In [358]:
df_concat

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,time,wake_onset_time_app,time_abs_selfreport,date,wake_onset_time_selfreport,time_selfreport,cortisol
subject,night,sample,condition,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
BC05R,0,S0,Spontaneous,2019-12-08 08:47:31.431000+01:00,0 days 07:31:16,0 days 07:32:00,2019-12-07,0 days 07:31:00,0.0,5.88
BC05R,0,S1,Spontaneous,2019-12-08 09:02:43.154000+01:00,0 days 07:31:16,0 days 07:47:00,2019-12-07,0 days 07:31:00,15.0,12.54
BC05R,0,S2,Spontaneous,2019-12-08 09:17:53.931000+01:00,0 days 07:31:16,0 days 08:02:00,2019-12-07,0 days 07:31:00,30.0,12.91
BC05R,0,S3,Spontaneous,2019-12-08 09:32:58.969000+01:00,0 days 07:31:16,0 days 08:17:00,2019-12-07,0 days 07:31:00,45.0,12.3
BC05R,0,S4,Spontaneous,2019-12-08 09:48:05.250000+01:00,0 days 07:31:16,0 days 08:32:00,2019-12-07,0 days 07:31:00,60.0,10.38
CC09K,0,S0,Unknown Alarm,2019-12-03 06:09:44.954000+01:00,0 days 06:09:00,0 days 06:11:00,2019-12-02,0 days 06:10:00,0.0,4.0
CC09K,0,S1,Unknown Alarm,2019-12-03 06:25:10.583000+01:00,0 days 06:09:00,0 days 06:26:00,2019-12-02,0 days 06:10:00,15.0,10.42
CC09K,0,S2,Unknown Alarm,2019-12-03 06:40:28.699000+01:00,0 days 06:09:00,0 days 06:41:00,2019-12-02,0 days 06:10:00,30.0,18.41
CC09K,0,S2,Unknown Alarm,2019-12-03 06:55:46.541000+01:00,0 days 06:09:00,0 days 06:41:00,2019-12-02,0 days 06:10:00,30.0,18.41
CC09K,0,S3,Unknown Alarm,2019-12-03 07:11:04.163000+01:00,0 days 06:09:00,0 days 06:56:00,2019-12-02,0 days 06:10:00,45.0,28.74


## Extract Saliva Sample Times From App Logs

In [None]:
def get_timestamp(df, saliva_id):
    return datetime.datetime.strptime(df.loc[df['saliva_id']==saliva_id].iloc[0].name.strftime("%H:%M:%S"), "%H:%M:%S").time()

def get_datetime(timestring):
    return datetime.datetime.strptime(timestring, "%H:%M:%S").time()

def get_from_json(row):
    json_extra = json.loads(row.extras)
    return json_extra.get('saliva_id')

for subject, subject_log in logs.items():
    log_subject = carwatch_logs.LogData(subject_log)
    
    finished_day = log_subject.finished_days

    df = cl.log_data.get_logs_for_action(log_subject, la.barcode_scanned)

    for day in range(2):
        try:
            df_day = df[df.index.date == finished_day[day].date()]
            df_day['saliva_id'] = df_day.apply(lambda row: get_from_json(row), axis=1)
                
            if pd.isna(app_data.loc[subject, day]):
                continue
            
            T0 = get_datetime(app_data.loc[subject, day])
            T0_delta = datetime.timedelta(hours=T0.hour, minutes=T0.minute, seconds=T0.second)
            
            for saliva_id in range(5):
                T = get_timestamp(df_day, saliva_id)
                T_delta = datetime.timedelta(hours=T.hour, minutes=T.minute, seconds=T.second)

                data_raw.loc[(data_raw['subject']==subject) & (data_raw['night']==day) & (data_raw['sample']=='S'+str(saliva_id)), 'time_app'] = (T_delta-T0_delta).total_seconds() / 60
                #data_raw.loc[(data_raw['subject']==subject) & (data_raw['night']==day) & (data_raw['sample']=='S'+str(saliva_id)), 'wake_onset_app'] = app_data_long.xs((subject, day))                

        except IndexError:
            pass
            
            

data = data_raw.rename(columns={'time':'self_report','time_app':'app'})

In [None]:
data['naive'] = np.empty(data.shape[0])

for sample in range(5):
    data["naive"].loc[data["sample"]=="S"+str(sample)] = sample*15
    
data.head()

In [None]:
# Optional: drop all subjects with self report only

data_clean = data.dropna()
self_report_only = data[~data.index.isin(data_clean.index)]

self_report_only.head()

In [None]:
id_vars = ['subject', 'condition', 'chronotype', 'MEQ', 'night', 'wakeup_source', 'weekend', 'date', 'wakeup_hour', 'sample', 'cortisol']
value_vars = ['self_report','app','naive']
cort_long = data_clean.melt(id_vars=id_vars, value_vars=value_vars, var_name='log_mode', value_name='time')

cort_long_self_report_only = self_report_only.melt(id_vars =id_vars, value_vars=value_vars, var_name='log_mode', value_name='time')

cort_long_self_report_only = cort_long_self_report_only.replace({'self_report': 'self_report_only', 'naive': 'naive_sr_only'})
                                    
cort_long = pd.concat([cort_long, cort_long_self_report_only])
cort_long.dropna(inplace=True)
       
cort_long.head()

In [None]:
cort_long['time_diff'] = np.empty(cort_long.shape[0])

for sample in range(5):
    cort_long["time_diff"].loc[cort_long["sample"]=="S"+str(sample)] = cort_long.loc[cort_long["sample"]=="S"+str(sample)].apply(lambda row: row["time"]-15*sample, axis=1)
    
cort_long.head()

In [None]:
# reindex to match all subjects

app_data_long = app_data_long.reindex_like(wake_onset_long)

In [None]:
cort_long["wakeup_self_report"] = cort_long.apply(lambda row: wake_onset_long.xs((row['subject'], row['night'])), axis=1)

cort_long["wakeup_app"] = cort_long.apply(lambda row: app_data_long.xs((row['subject'], row['night'])), axis=1)

cort_long.head()

In [None]:
count = pd.DataFrame(cort_long.groupby('log_mode').size()/5)
count.rename(columns={0:'number of samples'}, inplace=True)
count

In [None]:
idx = ['subject', 'condition', 'chronotype', 'MEQ', 'night', 'wakeup_source', 'weekend', 'date', 'wakeup_hour', 'log_mode', 'wakeup_self_report', 'wakeup_app', 'sample']
cort_long.set_index(idx, inplace = True)

cort_long.head()

In [None]:
# export
cort_long.to_csv(export_path.joinpath('cortisol_samples_app_cleaned.csv'), index=True)

## Compute Cortisol Features For App Times

In [None]:
cort_auc = bp.saliva.auc(cort_long, remove_s0=False)
cort_auc.head()

In [None]:
cort_inc = bp.saliva.max_increase(cort_long, remove_s0=False)
cort_inc.head()

In [None]:
cort_slope = bp.saliva.slope(cort_long, sample_idx=[0, 3])
cort_slope = cort_slope.join(bp.saliva.slope(cort_long, sample_idx=[0, 4]))
#cort_slope = cort_slope.join(bp.saliva.slope(cort_long, sample_idx=[2, 4], biomarker_type='cortisol'))
cort_slope.head()

In [None]:
cort_max = pd.DataFrame(cort_long['cortisol'].unstack('sample').max(axis=1), columns=['cortisol_cmax'])
cort_max.head()

In [None]:
cort_cini = cort_long.xs('S0', level='sample')[['cortisol']]
cort_cini.columns = ["cortisol_cini"]
cort_cini.head()

In [None]:
cort_feat = pd.concat([cort_auc, cort_inc, cort_slope, cort_cini, cort_max], axis=1)
cort_feat = pd.DataFrame(cort_feat.stack(), columns=['cortisol'])
cort_feat.index = cort_feat.index.set_names(cort_feat.index.names[:-1] + ['saliva_feature'])
cort_feat.head()

In [None]:
# export
cort_feat.to_csv(export_path.joinpath('cortisol_features_app_cleaned.csv'))
