In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from datenspende_who5.styling import hide_and_move_axis
from textwrap import wrap

In [None]:
data = pd.read_feather('../data/03_derived/input_data_users_surveys_rolling_vitals.feather')

In [None]:
wording = {
        'q49': 'For the last four weeks I have been happy and in good spirits.',
        'q50': 'In the last four weeks I have felt calm and relaxed.',
        'q54': 'For the past four weeks, I have felt energetic and active.',
        'q55': 'For the past four weeks, I have felt fresh and rested when I wake up.',
        'q56': 'For the past four weeks, my daily life has been full of things that interest me.',
        'total_wellbeing': 'Average wellbeing'
}

In [None]:
vitals_keys = [
    'v9', 'v43', 'v65', 'v52', 'v53', 'midsleep', 'v9weekend', 
    'v43weekend', 'v65weekend', 'v52weekend', 'v53weekend',
    'midsleepweekend', 'v9weekday', 'v43weekday', 'v65weekday',
    'v52weekday', 'v53weekday', 'midsleepweekday', 'v9difference',
    'v65difference', 'v43difference', 'v52difference', 'v53difference',
    'social_jetlag'
]

question_keys = ['q49', 'q50', 'q54', 'q55', 'q56', 'total_wellbeing']

In [None]:
def corrcoef(group, question_key, vital_key):
    x = group[vital_key]
    y = group[question_key]
    
    mask = np.isfinite(x) & np.isfinite(y)
    n = mask.sum()
    
    if n < 2:
        corr = np.nan
    else:
        corr = np.corrcoef(x[mask], y[mask])[0, 1]
    
    return corr, n

In [None]:
corr_df = corr = data.groupby(['userid', 'deviceid']).size().reset_index().drop(columns=0)

for question_key in ['q49', 'q50', 'q54', 'q55', 'q56', 'total_wellbeing', ]:
    for vital_key in ['v9', 'v65', 'v43', 'v52', 'v53']:
        
        print(question_key, vital_key)
        
        corr = data.groupby(['userid', 'deviceid']).apply(corrcoef, question_key, vital_key).reset_index()
        corr[f'{question_key}_{vital_key}_corr'] = corr[0].apply(lambda x: x[0])
        corr[f'{question_key}_{vital_key}_N'] = corr[0].apply(lambda x: x[1])
        corr.drop(columns=0, inplace=True)

        corr_df = pd.merge(corr_df, corr, on=['userid', 'deviceid'])

In [None]:
vitals = {'v9': 'Steps', 'v65': 'RHR', 'v43': 'Sleep duration', 'v52': 'Sleep onset', 'v53': 'Sleep offset'}

def plot_correlations(vital_key):

    title = vitals[vital_key]

    f, axarr = plt.subplots(2, 3, figsize=(10, 6), sharex=True, sharey=True)

    for question_key, ax in zip(wording.keys(), axarr.flatten()):

        corrs = corr_df[f'{question_key}_{vital_key}_corr']
        N = corr_df[f'{question_key}_{vital_key}_N']
        corrs = corrs[N >= 10]

        ax.hist(corrs, bins=np.arange(-1, 1.0001, 0.05), density=True, alpha=0.9)
        ax.axvline(np.nanmedian(corrs), c='k')
        hide_and_move_axis(ax)
        ax.set_title('\n'.join(wrap(wording[question_key], 30)), size=10)

    for ax in axarr[1]:
        ax.set_xlabel(f'Correlation Coefficient\nwith {title}')

    for ax in axarr[:, 0]:
        ax.set_ylabel('Probability density')

    plt.tight_layout()
    plt.savefig(f'../output/correlations/correlations_{title}.pdf')

In [None]:
plot_correlations('v9')
plot_correlations('v65')
plot_correlations('v43')
plot_correlations('v52')
plot_correlations('v53')

In [None]:
averages = data.drop(columns=['salutation', 'NUTS3']).groupby(['user_id', 'deviceid']).mean().reset_index()

In [None]:
averages = pd.merge(averages, corr_df, on=['userid', 'deviceid'])

In [None]:
averages.v53.median()

In [None]:
vitals = {'v9': 'Steps', 'v65': 'RHR', 'v43': 'Sleep duration', 'v52': 'Sleep onset', 'v53': 'Sleep offset'}

def plot_correlation_discriminated(vital_key, threshold):

    title = vitals[vital_key]
    label=[f'{title} > {threshold}', f'{title} < {threshold}']

    f, axarr = plt.subplots(2, 3, figsize=(10, 6), sharex=True, sharey=True)

    for question_key, ax in zip(wording.keys(), axarr.flatten()):

        corrs = averages[f'{question_key}_{vital_key}_corr']
        N = averages[f'{question_key}_{vital_key}_N']
        value = averages[vital_key]

        corrs1 = corrs[(N >= 10) & (value > threshold)]
        corrs2 = corrs[(N >= 10) & (value < threshold)]

        corrs = [corrs1, corrs2]

        ax.hist(corrs, bins=np.arange(-1, 1.0001, 0.2), density=True, alpha=0.9, label=label)
        #ax.axvline(np.nanmedian(corrs), c='k')
        hide_and_move_axis(ax)
        ax.set_title('\n'.join(wrap(wording[question_key], 30)), size=10)

    for ax in axarr[1]:
        ax.set_xlabel(f'Correlation Coefficient\nwith {title}')

    for ax in axarr[:, 0]:
        ax.set_ylabel('Probability density')

    axarr[0, 0].legend(loc='upper left')
    plt.tight_layout()
    
    plt.savefig(f'../output/correlations/correlations_{title}_discriminated.pdf')

In [None]:
plot_correlation_discriminated('v43', 420)
plot_correlation_discriminated('v52', -1)
plot_correlation_discriminated('v53', 7)