In [None]:
import pandas as pd
from matplotlib import pyplot as plt
from datenspende_who5.styling import hide_and_move_axis
from textwrap import wrap
import numpy as np
from scipy.stats import pearsonr

In [None]:
correlations = pd.read_feather('../data/03_derived/correlation_coefficients.feather')
data = pd.read_feather('../data/03_derived/input_data_users_surveys_rolling_vitals.feather')

averages = data.drop(columns=['salutation', 'NUTS3']).groupby(['user_id', 'deviceid']).mean().reset_index()
averages = pd.merge(averages, correlations, on=['userid', 'deviceid'])

In [None]:
wording = {
        'q49': 'For the last four weeks I have been happy and in good spirits.',
        'q50': 'In the last four weeks I have felt calm and relaxed.',
        'q54': 'For the past four weeks, I have felt energetic and active.',
        'q55': 'For the past four weeks, I have felt fresh and rested when I wake up.',
        'q56': 'For the past four weeks, my daily life has been full of things that interest me.',
        'total_wellbeing': 'Average wellbeing'
}

vitals = {
    'v9': 'Steps', 
    'v65': 'RHR', 
    'v43': 'Sleep duration', 
    'v52': 'Sleep onset', 
    'v53': 'Sleep offset'
}

In [None]:
N[np.isfinite(p)]

In [None]:
corrs = correlations['total_wellbeing_v9_corr']
p = correlations['total_wellbeing_v9_pvalue']
N = correlations['total_wellbeing_v9_N']

mask = np.isfinite(p)
corrs = corrs[mask]
N = N[mask]
p = p[mask]

count, bins = np.histogram(corrs[p < 0.05], bins=np.arange(-1, 1.1, 0.2))
bins = .5 * (bins[1:] + bins[:-1]) 
width = np.diff(bins)[0]
count = count / len(corrs)

f, ax = plt.subplots()

ax.bar(bins, count, width=width * .95)
ax.set_xticks(bins)

In [None]:
def plot_correlations(vital_key):

    title = vitals[vital_key]

    f, axarr = plt.subplots(2, 3, figsize=(10, 6), sharex=True, sharey=True)

    for question_key, ax in zip(wording.keys(), axarr.flatten()):

        corrs = correlations[f'{question_key}_{vital_key}_corr']
        N = correlations[f'{question_key}_{vital_key}_N']
        corrs = corrs[N >= 10]

        ax.hist(corrs, bins=np.arange(-1, 1.0001, 0.05), density=True, alpha=0.9)
        ax.axvline(np.nanmedian(corrs), c='k')
        hide_and_move_axis(ax)
        ax.set_title('\n'.join(wrap(wording[question_key], 30)), size=10)

    for ax in axarr[1]:
        ax.set_xlabel(f'Correlation Coefficient\nwith {title}')

    for ax in axarr[:, 0]:
        ax.set_ylabel('Probability density')

    plt.tight_layout()
    plt.savefig(f'../output/correlations/correlations_{title}.pdf')
    

def plot_correlation_discriminated(vital_key, threshold):

    title = vitals[vital_key]
    label=[f'{title} > {threshold}', f'{title} < {threshold}']

    f, axarr = plt.subplots(2, 3, figsize=(10, 6), sharex=True, sharey=True)

    for question_key, ax in zip(wording.keys(), axarr.flatten()):

        corrs = averages[f'{question_key}_{vital_key}_corr']
        N = averages[f'{question_key}_{vital_key}_N']
        value = averages[vital_key]

        corrs1 = corrs[(N >= 10) & (value > threshold)]
        corrs2 = corrs[(N >= 10) & (value < threshold)]

        corrs = [corrs1, corrs2]

        ax.hist(corrs, bins=np.arange(-1, 1.0001, 0.2), density=True, alpha=0.9, label=label)
        #ax.axvline(np.nanmedian(corrs), c='k')
        hide_and_move_axis(ax)
        ax.set_title('\n'.join(wrap(wording[question_key], 30)), size=10)

    for ax in axarr[1]:
        ax.set_xlabel(f'Correlation Coefficient\nwith {title}')

    for ax in axarr[:, 0]:
        ax.set_ylabel('Probability density')

    axarr[0, 0].legend(loc='upper left')
    plt.tight_layout()
    
    plt.savefig(f'../output/correlations/correlations_{title}_discriminated.pdf')

In [None]:
for vital_key in vitals.keys():
    plot_correlations(vital_key)

In [None]:
plot_correlation_discriminated('v43', 420)
plot_correlation_discriminated('v52', -1)
plot_correlation_discriminated('v53', 7)