In [None]:
%load_ext autoreload
%autoreload 2

import hydra
import os
import datetime
from pathlib import Path

# Initialize hydra and move to the root of the repository
try:
    hydra.initialize(version_base=None, config_path="../config/")
    CONFIG = hydra.compose(config_name="main.yaml")
    print('Initializing hydra')
except:
    print('Hydra already initalized!')
else:
    os.chdir('..')

# Create an output folder in the root of the repository
OUTPUT_FOLDER = Path('output/{0}/{1}'.format(datetime.date.today(), datetime.datetime.now().strftime("%H-%M-%S")))
Path(OUTPUT_FOLDER).mkdir(parents=True, exist_ok=True)

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
from src.utils.colors import flatuicolors as colors
from matplotlib import pyplot as plt
from src.utils.styling import hide_and_move_axis
from textwrap import wrap
import numpy as np
from scipy.stats import pearsonr

In [None]:
correlations = pd.read_feather(Path(CONFIG.compute.folder) / CONFIG.compute.filenames.correlations)

data = pd.read_feather(Path(CONFIG.data.processed) / CONFIG.data.filenames.merged_data)
averages = data.drop(columns=['salutation', 'NUTS3', 'zip_3digit', 'zip_5digit']).groupby(['user_id', 'deviceid']).mean().reset_index()
averages = pd.merge(averages, correlations, on=['userid', 'deviceid'])

In [None]:
wording = {
        'q49': 'For the last four weeks I have been happy and in good spirits.',
        'q50': 'In the last four weeks I have felt calm and relaxed.',
        'q54': 'For the past four weeks, I have felt energetic and active.',
        'q55': 'For the past four weeks, I have felt fresh and rested when I wake up.',
        'q56': 'For the past four weeks, my daily life has been full of things that interest me.',
        'total_wellbeing': 'Average wellbeing'
}

vitals = {
    'v9': 'Steps', 
    'v65': 'RHR', 
    'v43': 'Sleep duration', 
    'v52': 'Sleep onset', 
    'v53': 'Sleep offset'
}

In [None]:
def plot_correlation_histogram(ax, key, N_min=12, significance_level=0.05, bins=np.arange(-1, 1.1, 0.1)):

    corrs = correlations[f'{key}_corr']
    p = correlations[f'{key}_pvalue']
    N = correlations[f'{key}_N']
    
    corrs = corrs[N >= N_min]
    p = p[N >= N_min]
    corrs[corrs == 1] = 0.999
    corrs[corrs == -1] = -0.999

    users = correlations.userid[(N >= N_min) & (p < significance_level)]
    
    significant = corrs[p < significance_level]
    insignificant = corrs[p >= significance_level]
    bottom = np.zeros(len(bins) - 1)

    for data, alpha in zip((significant, insignificant), (1, 0.3)):
        count, bins = np.histogram(data, bins=bins)
        bin_centers = .5 * (bins[1:] + bins[:-1]) 
        width = np.diff(bin_centers)[0]
        ax.bar(bin_centers, count, width=width * .9, bottom=bottom, color=colors.greensea, alpha=alpha)
        bottom += count

    ax.axvline(corrs.mean(), lw=2, c=colors.wetasphalt, zorder=-10)
    print(corrs.mean())
    hide_and_move_axis(ax)
    ax.set_xlim(-1, 1)
    ax.set_xticks(bins[::2])
    
    return users
    
f, axarr = plt.subplots(1, 2, figsize=(10, 3.5), sharey=True)

users1 = plot_correlation_histogram(axarr[0], 'total_wellbeing_v65')
users2 = plot_correlation_histogram(axarr[1], 'total_wellbeing_v9')

plt.savefig(OUTPUT_FOLDER / f'correlations.png', dpi=400)

In [None]:
def plot_correlations(vital_key):

    title = vitals[vital_key]

    f, axarr = plt.subplots(2, 3, figsize=(10, 6), sharex=True, sharey=True)

    for question_key, ax in zip(wording.keys(), axarr.flatten()):

        corrs = correlations[f'{question_key}_{vital_key}_corr']
        N = correlations[f'{question_key}_{vital_key}_N']
        corrs = corrs[N >= 10]

        ax.hist(corrs, bins=np.arange(-1, 1.0001, 0.05), density=True, alpha=0.9)
        ax.axvline(np.nanmedian(corrs), c='k')
        hide_and_move_axis(ax)
        ax.set_title('\n'.join(wrap(wording[question_key], 30)), size=10)

    for ax in axarr[1]:
        ax.set_xlabel(f'Correlation Coefficient\nwith {title}')

    for ax in axarr[:, 0]:
        ax.set_ylabel('Probability density')

    plt.tight_layout()
    plt.savefig(f'../output/correlations/correlations_{title}.pdf')
    

def plot_correlation_discriminated(vital_key, threshold):

    title = vitals[vital_key]
    label=[f'{title} > {threshold}', f'{title} < {threshold}']

    f, axarr = plt.subplots(2, 3, figsize=(10, 6), sharex=True, sharey=True)

    for question_key, ax in zip(wording.keys(), axarr.flatten()):

        corrs = averages[f'{question_key}_{vital_key}_corr']
        N = averages[f'{question_key}_{vital_key}_N']
        value = averages[vital_key]

        corrs1 = corrs[(N >= 10) & (value > threshold)]
        corrs2 = corrs[(N >= 10) & (value < threshold)]

        corrs = [corrs1, corrs2]

        ax.hist(corrs, bins=np.arange(-1, 1.0001, 0.2), density=True, alpha=0.9, label=label)
        #ax.axvline(np.nanmedian(corrs), c='k')
        hide_and_move_axis(ax)
        ax.set_title('\n'.join(wrap(wording[question_key], 30)), size=10)

    for ax in axarr[1]:
        ax.set_xlabel(f'Correlation Coefficient\nwith {title}')

    for ax in axarr[:, 0]:
        ax.set_ylabel('Probability density')

    axarr[0, 0].legend(loc='upper left')
    plt.tight_layout()
    
    plt.savefig(f'../output/correlations/correlations_{title}_discriminated.pdf')

In [None]:
for vital_key in vitals.keys():
    plot_correlations(vital_key)

In [None]:
plot_correlation_discriminated('v43', 420)
plot_correlation_discriminated('v52', -1)
plot_correlation_discriminated('v53', 7)