In [None]:
#TODO: Deal with Apple users!

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
from datetime import datetime, date
import numpy as np
from datenspende_who5 import styling
from textwrap import wrap

In [None]:
def load_df():
    
    df = pd.read_feather('../data/01_raw/who5_responses.feather')
    df['date'] = pd.to_datetime(df.created_at, unit='ms').dt.date
    return df

def load_vitals():
    
    df = pd.read_feather('../data/01_raw/vitals.feather')
    return df

def preprocess_vitals(df):
    
    # Compute sleep onset and offset
    mask = df.vitalid.isin([52, 53])
    values = (pd.to_datetime(df[mask].value, unit='s') - df[mask].date) / pd.Timedelta(hours=1)
    df.loc[mask, 'value'] = values
    
    # Remove implausible values for onset and offset
    invalid = (df.vitalid == 52) & (~df.value.between(-12, 12))
    df = df[~invalid]

    invalid = (df.vitalid == 53) & (~df.value.between(0, 18))
    df = df[~invalid]
    
    # Correct for DST
    dst_2021 = df.vitalid.isin([52, 53]) & (df.date < datetime(2021, 10, 31))
    dst_2022 = df.vitalid.isin([52, 53]) & (df.date > datetime(2022, 3, 27)) & (df.date < datetime(2022, 10, 30))

    df.loc[dst_2021 | dst_2022, 'value'] += 1
    
    return df


def compute_rolling_vitals(df):
    
    df = df.set_index('date').groupby(['userid', 'vitalid']).rolling('28D', min_periods=7).mean().dropna()
    df.reset_index(inplace=True)
    
    return df

def get_data_for_analysis():
    
    answers = load_df()
    
    vitals = load_vitals()
    vitals = preprocess_vitals(vitals)
    vitals = compute_rolling_vitals(vitals)
    
    vitals['date'] = pd.to_datetime(vitals['date']).dt.date
    df = pd.merge(answers, vitals, left_on=['user_id', 'date'], right_on=['userid', 'date'])
    
    return df

In [None]:
data = get_data_for_analysis()

# Average vital data for each survey response

In [None]:
df = data.groupby(['question', 'vitalid', 'choice_id']).value.agg(['mean', 'std', 'count'])
df['err'] = df['std'] / np.sqrt(df['count'])

In [None]:
ylabel = {65: 'Ruhepuls', 9: 'Aktivität', 43: 'Schlafdauer', 52: 'Einschlafzeit', 53: 'Aufwachzeit'}

f, axarr = plt.subplots(5, 5, figsize=(10, 7), sharey='row', sharex=True)

for row_id, question in enumerate([49, 50, 54, 55, 56]):
    
    title = data[data.question == question].description.values[0]
    title = '\n'.join(wrap(title, 30))
    axarr[0, row_id].set_title(title, size=8)
    
    for col_id, vital in enumerate([65, 9, 43, 52, 53]):
        plot_data = df.loc[question, vital]
        axarr[col_id, row_id].errorbar(plot_data.index, plot_data['mean'], yerr=plot_data['err'], fmt='o', markersize=4)
        
        axarr[col_id, 0].set_ylabel(ylabel[vital]) 

for ax in axarr.flatten():
    styling.hide_and_move_axis(ax)
    ax.set_xticks([1, 2, 3, 4, 5])
    
plt.tight_layout()
plt.savefig('../output/1.02a-average_vital_per_survey_response.pdf')

# Average survey response for given vital data

In [None]:
df = data.copy()

df.loc[df.vitalid == 9, 'value'] = np.round(data[data.vitalid == 9].value, -3) 
df.loc[df.vitalid == 65, 'value'] = np.round(data[data.vitalid == 65].value, 0)
df.loc[df.vitalid == 43, 'value'] = np.round(data[data.vitalid == 43].value / 15, 0) * 15
df.loc[df.vitalid == 52, 'value'] = np.round(data[data.vitalid == 52].value / 2, 1) * 2
df.loc[df.vitalid == 53, 'value'] = np.round(data[data.vitalid == 53].value / 2, 1) * 2

In [None]:
df.loc[(df.vitalid == 65) & ((df.value > 90) | (df.value < 45)), 'value'] = np.nan
df.loc[(df.vitalid == 9) & ((df.value > 20000) | (df.value < 1000)), 'value'] = np.nan
df.loc[(df.vitalid == 43) & ((df.value > 600) | (df.value < 120)), 'value'] = np.nan
df.loc[(df.vitalid == 52) & ((df.value > 2) | (df.value < -3)), 'value'] = np.nan
df.loc[(df.vitalid == 53) & ((df.value < 4.5) | (df.value > 10)), 'value'] = np.nan

df.dropna(inplace=True)

In [None]:
df = df.groupby(['question', 'vitalid', 'value']).choice_id.agg(['mean', 'std', 'count'])
df['err'] = df['std'] / np.sqrt(df['count'])
df.dropna(inplace=True)

In [None]:
xlabel = {65: 'Ruhepuls', 9: 'Aktivität', 43: 'Schlafdauer', 52: 'Einschlafzeit', 53: 'Aufwachzeit'}

f, axarr = plt.subplots(5, 5, figsize=(10, 7), sharex='col')

for row_id, question in enumerate([49, 50, 54, 55, 56]):
    
    title = data[data.question == question].description.values[0]
    title = '\n'.join(wrap(title, 22))
    axarr[row_id, 0].set_ylabel(title, size=8)
    
    for col_id, vital in enumerate([65, 9, 43, 52, 53]):
        plot_data = df.loc[question, vital]
        axarr[row_id, col_id].errorbar(plot_data.index, plot_data['mean'], yerr=plot_data['err'], fmt='o', markersize=4)
        
        axarr[-1, col_id].set_xlabel(xlabel[vital]) 

for ax in axarr.flatten():
    styling.hide_and_move_axis(ax)
    
plt.tight_layout()
plt.savefig('../output/1.02b-average_survey_response_per_vital.pdf')