In [None]:
import pandas as pd
import numpy as np
from datenspende_who5.colors import flatuicolors as colors
from matplotlib import pyplot as plt
from datenspende_who5.styling import hide_and_move_axis
import string
from textwrap import wrap
import seaborn as sns

In [None]:
# Load data
df = pd.read_feather('../data/03_derived/input_data_users_surveys_rolling_vitals.feather')

# Drop users with survey reponses that are too early (I found them by hand)
df = df[~df.user_id.isin([1143114, 1143193, 1144681, 1147298, 1144157, 1155559])]

# Drop users with unreasonable birth dates
df = df[~df.birth_date.isin([2004, 1984, 2005])]

# Drop users with salution 'D' due to the low sample size
df = df[df.salutation != 'D']

# Compute age and define age groups
df['age'] = (2020 - df.birth_date + 2.5) 

age_level1 = 40
age_level2 = 65

df.loc[df['age'].between(0, 35, inclusive='left'), 'age_group'] = 0
df.loc[df['age'].between(35, 60, inclusive='left'), 'age_group'] = 1
df.loc[df['age'].between(60, 100, inclusive='left'), 'age_group'] = 2

# Compute Z-Scores for each combination of age and gender
demogs = ['salutation', 'birth_date']

for question_key in ['v9', 'v9std', 'v65', 'v65std', 'v43', 'v43std', 'v52', 'v52std', 'v53', 'v53std', 'q49', 'q50', 'q54', 'q55', 'q56', 'total_wellbeing']:
    # Make sure to always compute user averages first!!!
    user_averages = df.groupby(['user_id']).agg({demogs[0]: 'max', demogs[1]: 'max', question_key: 'mean'})

    # From each user average we compute the mean and std per bucket
    averages = user_averages.groupby(demogs)[question_key].agg(['mean', 'std'])
    averages.reset_index(inplace=True)
    averages.rename(columns={'mean': question_key + '_demographics_mean', 'std': question_key + '_demographics_std'}, inplace=True)

    # Add the averages and std to the main data frame and compute Z-scores
    df = pd.merge(df, averages, on=demogs)
    df[question_key +'_Z'] = (df[question_key] - df[question_key + '_demographics_mean']) / df[question_key + '_demographics_std']

In [None]:
def plot_wellbeing_per_gender(ax, df, question_key='total_wellbeing', binwidth=0.2, xmin=1, xmax=5):

    wellbeing_per_gender = df.groupby('user_id')[['salutation', question_key]].agg({'salutation': 'max', question_key: 'mean'})
    
    for salutation in ('M', 'F'):

        sign = (1. if salutation == 'M' else -1)
        color = (colors.greensea if salutation == 'M' else colors.pumpkin)
        label = ('Male' if salutation == 'M' else 'Female')


        values = wellbeing_per_gender[wellbeing_per_gender.salutation == salutation][question_key]
        count, bins = np.histogram(values, bins=np.arange(xmin - .5 * binwidth, xmax + .501 * binwidth, binwidth))
        bins = .5 * (bins[1:] + bins[:-1])
        w = .4 * np.diff(bins)[0]
        count = count / len(values)

        ax.bar(bins, count, width=sign * w, align='edge', edgecolor='w', label=label, color=color)

    hide_and_move_axis(ax)

    ax.legend()
    ax.set_xlabel('Average WHO-5 Wellbeing')
    ax.set_ylabel('Relative Frequency')
    
    
def plot_wellbeing_per_gender_violin(ax, df, question_key='total_wellbeing'):

    color = {'M': colors.greensea, 'F': colors.pumpkin}

    df = df.groupby('user_id')[['salutation', question_key]].agg({'salutation': 'max', question_key: 'mean'})

    vio = sns.violinplot(df[df.salutation.isin(['M', 'F'])], x='salutation', y=question_key, ax=ax, 
                   palette=color, alpha=0.8, cut=0, inner=None)
    plt.setp(vio.collections, alpha=.4)

    ax.axhline(df[df.salutation == 'M'].total_wellbeing.mean(), xmin=.6, xmax=.9, c=color['M'], lw=2)
    ax.axhline(df[df.salutation == 'F'].total_wellbeing.mean(), xmin=.1, xmax=.4, c=color['F'], lw=2)

    hide_and_move_axis(ax)
    ax.set_xticklabels(['Female', 'Male'])
    ax.set_xlabel(None)
    ax.set_ylabel('WHO-5 Wellbeing')

    
def plot_wellbeing_per_age(ax, df, color=colors.wetasphalt):
    
    # First aggregate per user
    df_age = df.groupby('user_id')[['birth_date', 'total_wellbeing']].mean()
    
    # Then aggregate per birth_date
    df_age = df_age.groupby('birth_date').agg(['mean', 'std', 'count'])
    
    df_age.columns = df_age.columns.droplevel(0)
    df_age['err'] = 1.96 * df_age['std'] / np.sqrt(df_age['count'])

    ax.fill_between(df_age.index, df_age['mean'] - df_age['err'], df_age['mean'] + df_age['err'], alpha=.3, color=color)
    ax.plot(df_age['mean'], color=color, marker='o', markersize=4)
    
    hide_and_move_axis(ax)

    #ax.legend()
    ax.set_xlabel('Birth Year')
    ax.set_ylabel('Average WHO-5 Wellbeing')
    ax.set_xticks(range(1930, 2010, 10))
    
    
def add_label(axarr, pos, size=20):
    
    for i, ax in enumerate(axarr.flatten()):
        label = string.ascii_uppercase[i]
        
        xmin, xmax = ax.get_xlim()
        ymin, ymax = ax.get_ylim()
        
        if pos[i] == 'upper left':
            ax.text(0.075, 0.925, label, horizontalalignment='center', verticalalignment='center', transform = ax.transAxes, size=size)
        elif pos[i] == 'upper right':
            ax.text(0.925, 0.925, label, horizontalalignment='center', verticalalignment='center', transform = ax.transAxes, size=size)
        elif pos[i] == 'lower left':
            ax.text(0.075, 0.075, label, horizontalalignment='center', verticalalignment='center', transform = ax.transAxes, size=size)
            
            
def plot_survey_response_per_vitals(ax, df, vital_key, question_key, color='r', bins=40, label=None, xlabel=None, err_fac=1.96, norm=False):

    df = df[[vital_key, question_key]].dropna()

    norm_val = df[question_key].mean()
    
    df['bins'] = pd.cut(df[vital_key], bins)
    df.bins = df['bins'].apply(lambda x: x.left + 0.5 * (x.right - x.left))

    df = df.groupby(['bins'])[question_key].agg(['mean', 'count', 'std'])
    
    if norm:
        #norm_val = df['mean'].max()
        df['mean'] = df['mean'] / norm_val
        df['std'] = df['std'] / norm_val

    df['err'] = err_fac * df['std'] / np.sqrt(df['count'])
    df = df[df['count'] > 50]

    ax.fill_between(df.index, df['mean'] - df['err'], df['mean'] + df['err'], alpha=.3, color=color)
    ax.plot(df['mean'], color=color, marker='o', markersize=3, label=label)
    
    hide_and_move_axis(ax)
    ax.set_xlabel(xlabel)
    #ax.set_ylabel('Average WHO-5 Wellbeing')
    #ax.legend()

# Figure 1

In [None]:
f, axarr = plt.subplots(2, 3, figsize=(10, 6))

question_key = 'total_wellbeing'

plot_wellbeing_per_gender_violin(axarr[0, 0], df)

plot_survey_response_per_vitals(axarr[0, 1], df[df.salutation=='M'], 'v65', question_key, bins=np.arange(30, 91, 1), color=colors.greensea, label='Male')
plot_survey_response_per_vitals(axarr[0, 1], df[df.salutation=='F'], 'v65', question_key, bins=np.arange(30, 91, 1), color=colors.pumpkin, label='Female')

plot_survey_response_per_vitals(axarr[0, 2], df[df.salutation=='M'], 'v9', question_key, bins=np.arange(0, 25000, 500), color=colors.greensea, label='Male')
plot_survey_response_per_vitals(axarr[0, 2], df[df.salutation=='F'], 'v9', question_key, bins=np.arange(0, 25000, 500), color=colors.pumpkin, label='Female')

plot_wellbeing_per_age(axarr[1, 0], df)

plot_survey_response_per_vitals(axarr[1, 1], df[df.age_group==0], 'v65', question_key, bins=np.arange(30, 91, 2), color=colors.greensea, label='<40')
plot_survey_response_per_vitals(axarr[1, 1], df[df.age_group==1], 'v65', question_key, bins=np.arange(30, 91, 2), color=colors.pumpkin, label='40-65')
plot_survey_response_per_vitals(axarr[1, 1], df[df.age_group==2], 'v65', question_key, bins=np.arange(30, 91, 2), color=colors.wisteria, label='65+')

plot_survey_response_per_vitals(axarr[1, 2], df[df.age_group==0], 'v9', question_key, bins=np.arange(0, 25000, 1000), color=colors.greensea, label='<40')
plot_survey_response_per_vitals(axarr[1, 2], df[df.age_group==1], 'v9', question_key, bins=np.arange(0, 25000, 1000), color=colors.pumpkin, label='40-65')
plot_survey_response_per_vitals(axarr[1, 2], df[df.age_group==2], 'v9', question_key, bins=np.arange(0, 25000, 1000), color=colors.wisteria, label='65+')

axarr[1, 0].axvline(-(age_level1 - 2.5 - 2020), c=colors.wetasphalt, ls=':')
axarr[1, 0].axvline(-(age_level2 - 2.5 - 2020), c=colors.wetasphalt, ls=':')
axarr[1, 0].fill_between([1930, -(age_level2 - 2.5 - 2020) - 2.5], 2.8, 2.9, color=colors.wisteria, alpha=0.3)
axarr[1, 0].fill_between([-(age_level2 - 2.5 - 2020) + 2.5, -(age_level1 - 2.5 - 2020) - 2.5], 2.8, 2.9, color=colors.pumpkin, alpha=0.3)
axarr[1, 0].fill_between([-(age_level1 - 2.5 - 2020) + 2.5, 2000], 2.8, 2.9, color=colors.greensea, alpha=0.3)

add_label(axarr.flatten(), pos=['upper right', 'upper right', 'upper left', 'lower left', 'upper right', 'upper left'], size=22)

axarr[0, 1].set_ylim(2.5, 3.8)
axarr[0, 2].set_ylim(2.5, 3.8)

axarr[1, 0].set_ylim(2.5, 3.8)
axarr[1, 1].set_ylim(2.5, 3.8)
axarr[1, 2].set_ylim(2.5, 3.8)

axarr[0, 1].legend(loc='lower left')
axarr[1, 1].legend(loc='lower left')

axarr[0, 1].set_xlabel('Resting heart rate')
axarr[1, 1].set_xlabel('Resting heart rate')

axarr[0, 2].set_xlabel('Daily Step Count')
axarr[1, 2].set_xlabel('Daily Step Count')

axarr[1, 0].set_ylabel('WHO-5 Wellbeing')

for ax in axarr[:, 1:].flatten():
    ax.set_ylabel('Average WHO-5 Wellbeing')

plt.tight_layout()
plt.savefig('../output/draft/figure1_wellbeing_heartrate_steps.pdf')

# Figure 2a - Sleep and Wellbeing

In [None]:
# TODO: Add t-test!

In [None]:
def get_bins(vmin, vmax, binwidth):
    return np.arange(vmin - .5 * binwidth, vmax + .5 * binwidth, binwidth)

f, axarr = plt.subplots(2, 2, figsize=(6, 6), sharey=True)
df['v43_hr'] = df.v43 / 60

question_key = 'total_wellbeing_Z'

# Sleep Onset
bins = get_bins(-4 + 2/3, 2 + 1/3, 1/3)
plot_survey_response_per_vitals(axarr[0, 0], df, 'v52', question_key, bins=bins, color=colors.midnightblue, err_fac=1.96)

# Sleep Offset
bins = get_bins(4 + 2/3, 9, 1/3)
plot_survey_response_per_vitals(axarr[0, 1], df, 'v53', question_key, bins=bins, color=colors.midnightblue, err_fac=1.96)

# Midsleep
bins = get_bins(0.5 + 2 * 1/3, 6 - 2/3, 1/3)
plot_survey_response_per_vitals(axarr[1, 0], df, 'midsleep', question_key, bins=bins, color=colors.midnightblue, err_fac=1.96)


# Sleep duration
bins = get_bins(5.5, 10, 0.25)
plot_survey_response_per_vitals(axarr[1, 1], df, 'v43_hr', question_key, bins=bins, color=colors.midnightblue, err_fac=1.96)

# Finalize plot
axarr[0, 0].axvline(0, c=colors.midnightblue, ls=':')
axarr[0, 1].axvline(7, c=colors.midnightblue, ls=':')
axarr[1, 1].axvline(7.5, c=colors.midnightblue, ls=':')
axarr[1, 0].axvline(3.5, c=colors.midnightblue, ls=':')

axarr[0, 0].set_xlabel('Sleep Onset [hrs relative to midnight]')
axarr[0, 1].set_xlabel('Sleep Offset [hrs relative to midnight]')
axarr[1, 0].set_xlabel('Midsleep [hrs relative to midnight]')
axarr[1, 1].set_xlabel('Sleep duration [hrs]')

axarr[0, 0].set_ylabel('Average WHO-5 Wellbeing Z-Scores')
axarr[1, 0].set_ylabel('Average WHO-5 Wellbeing Z-Scores')
axarr[0, 1].set_ylabel(None)
axarr[1, 1].set_ylabel(None)

add_label(axarr.flatten(), pos=['lower left', 'lower left', 'lower left', 'lower left'], size=22)

plt.tight_layout()

plt.savefig('../output/draft/figure2_wellbeing_sleep.pdf')

# Figure 2b - Variance of sleep and wellbeing

In [None]:
f, axarr = plt.subplots(2, 2, figsize=(6, 6), sharey=True)

vital_key = 'v52'
bins = np.arange(0, 3.1, 1/3)
plot_survey_response_per_vitals(axarr[0, 0], df, f'{vital_key}stdweekend', 'total_wellbeing_Z', bins=bins, color=colors.pomegranate, err_fac=1.96, label='Weekends')
plot_survey_response_per_vitals(axarr[0, 0], df, f'{vital_key}stdweekday', 'total_wellbeing_Z', bins=bins, color=colors.greensea, err_fac=1.96, label='Weekdays')
plot_survey_response_per_vitals(axarr[0, 0], df, f'{vital_key}std', 'total_wellbeing_Z', bins=bins, color=colors.midnightblue, err_fac=1.96, label='All Days')

vital_key = 'v53'
bins = np.arange(0, 3.1, 1/3)
plot_survey_response_per_vitals(axarr[0, 1], df, f'{vital_key}stdweekend', 'total_wellbeing_Z', bins=bins, color=colors.pomegranate, err_fac=1.96, label='Weekends')
plot_survey_response_per_vitals(axarr[0, 1], df, f'{vital_key}stdweekday', 'total_wellbeing_Z', bins=bins, color=colors.greensea, err_fac=1.96, label='Weekdays')
plot_survey_response_per_vitals(axarr[0, 1], df, f'{vital_key}std', 'total_wellbeing_Z', bins=bins, color=colors.midnightblue, err_fac=1.96, label='All Days')


vital_key = 'midsleep'
bins = np.arange(0, 2.5, 1/3)

plot_survey_response_per_vitals(axarr[1, 0], df, f'{vital_key}stdweekend', 'total_wellbeing_Z', bins=bins, color=colors.pomegranate, err_fac=1.96, label='Weekends')
plot_survey_response_per_vitals(axarr[1, 0], df, f'{vital_key}stdweekday', 'total_wellbeing_Z', bins=bins, color=colors.greensea, err_fac=1.96, label='Weekdays')
plot_survey_response_per_vitals(axarr[1, 0], df, f'{vital_key}std', 'total_wellbeing_Z', bins=bins, color=colors.midnightblue, err_fac=1.96, label='All Days')


vital_key = 'v43'
bins = np.arange(20, 120, 10)
plot_survey_response_per_vitals(axarr[1, 1], df, f'{vital_key}stdweekend', 'total_wellbeing_Z', bins=bins, color=colors.pomegranate, err_fac=1.96, label='Weekends')
plot_survey_response_per_vitals(axarr[1, 1], df, f'{vital_key}stdweekday', 'total_wellbeing_Z', bins=bins, color=colors.greensea, err_fac=1.96, label='Weekdays')
plot_survey_response_per_vitals(axarr[1, 1], df, f'{vital_key}std', 'total_wellbeing_Z', bins=bins, color=colors.midnightblue, err_fac=1.96, label='All Days')



axarr[0, 0].set_xlabel('Standard Deviation in\nSleep Onset [hrs]')
axarr[0, 1].set_xlabel('Standard Deviation in\nSleep Offset [hrs]')
axarr[1, 0].set_xlabel('Standard Deviation in\nMidsleep [hrs]')
axarr[1, 1].set_xlabel('Standard Deviation in\nSleep Duration [mins]')

plt.tight_layout()

axarr[0, 0].set_ylabel('Average WHO-5 Wellbeing Z-Scores')
axarr[1, 0].set_ylabel('Average WHO-5 Wellbeing Z-Scores')

axarr[0, 1].set_ylabel(None)
axarr[1, 1].set_ylabel(None)

add_label(axarr.flatten(), pos=['lower left', 'upper right', 'lower left', 'lower left'], size=22)

axarr[0, 1].legend(loc='lower left')

plt.savefig('../output/draft/figure3_wellbeing_sleep_variation.pdf')

# Figure 2c: 2a and 2b in one Figure

In [None]:
def get_bins(vmin, vmax, binwidth):
    return np.arange(vmin - .5 * binwidth, vmax + .5 * binwidth, binwidth)

f, axarr = plt.subplots(3, 4, figsize=(12, 9), sharey='row')
df['v43_hr'] = df.v43 / 60

question_key = 'total_wellbeing_Z'

# Sleep Onset
bins = get_bins(-4 + 2/3, 2 + 1/3, 1/3)
plot_survey_response_per_vitals(axarr[0, 0], df, 'v52', question_key, bins=bins, color=colors.midnightblue, err_fac=1.96)

# Sleep Offset
bins = get_bins(4 + 2/3, 9, 1/3)
plot_survey_response_per_vitals(axarr[0, 1], df, 'v53', question_key, bins=bins, color=colors.midnightblue, err_fac=1.96)

# Midsleep
bins = get_bins(0.5 + 2 * 1/3, 6 - 2/3, 1/3)
plot_survey_response_per_vitals(axarr[0, 2], df, 'midsleep', question_key, bins=bins, color=colors.midnightblue, err_fac=1.96)


# Sleep duration
bins = get_bins(5.5, 10, 0.25)
plot_survey_response_per_vitals(axarr[0, 3], df, 'v43_hr', question_key, bins=bins, color=colors.midnightblue, err_fac=1.96)


# Standard deviation over all days

vital_key = 'v52'
bins = np.arange(1/6, 3.1, 1/6)
plot_survey_response_per_vitals(axarr[1, 0], df, f'{vital_key}std', 'total_wellbeing_Z', bins=bins, color=colors.midnightblue, err_fac=1.96, label='All Days')

vital_key = 'v53'
bins = np.arange(1/6, 2.5, 1/6)
plot_survey_response_per_vitals(axarr[1, 1], df, f'{vital_key}std', 'total_wellbeing_Z', bins=bins, color=colors.midnightblue, err_fac=1.96, label='All Days')

vital_key = 'midsleep'
bins = np.arange(0, 2.1, 1/6)
plot_survey_response_per_vitals(axarr[1, 2], df, f'{vital_key}std', 'total_wellbeing_Z', bins=bins, color=colors.midnightblue, err_fac=1.96, label='All Days')

vital_key = 'v43'
bins = np.arange(10, 120, 5)
plot_survey_response_per_vitals(axarr[1, 3], df, f'{vital_key}std', 'total_wellbeing_Z', bins=bins, color=colors.midnightblue, err_fac=1.96, label='All Days')

# Standard deviation discriminated
vital_key = 'v52'
bins = np.arange(0, 3.1, 1/3)
plot_survey_response_per_vitals(axarr[2, 0], df, f'{vital_key}stdweekend', 'total_wellbeing_Z', bins=bins, color=colors.pomegranate, err_fac=1.96, label='Weekends')
plot_survey_response_per_vitals(axarr[2, 0], df, f'{vital_key}stdweekday', 'total_wellbeing_Z', bins=bins, color=colors.greensea, err_fac=1.96, label='Weekdays')

vital_key = 'v53'
bins = np.arange(0, 3.1, 1/3)
plot_survey_response_per_vitals(axarr[2, 1], df, f'{vital_key}stdweekend', 'total_wellbeing_Z', bins=bins, color=colors.pomegranate, err_fac=1.96, label='Weekends')
plot_survey_response_per_vitals(axarr[2, 1], df, f'{vital_key}stdweekday', 'total_wellbeing_Z', bins=bins, color=colors.greensea, err_fac=1.96, label='Weekdays')

vital_key = 'midsleep'
bins = np.arange(0, 2.5, 1/3)
plot_survey_response_per_vitals(axarr[2, 2], df, f'{vital_key}stdweekend', 'total_wellbeing_Z', bins=bins, color=colors.pomegranate, err_fac=1.96, label='Weekends')
plot_survey_response_per_vitals(axarr[2, 2], df, f'{vital_key}stdweekday', 'total_wellbeing_Z', bins=bins, color=colors.greensea, err_fac=1.96, label='Weekdays')

vital_key = 'v43'
bins = np.arange(10, 120, 10)
plot_survey_response_per_vitals(axarr[2, 3], df, f'{vital_key}stdweekend', 'total_wellbeing_Z', bins=bins, color=colors.pomegranate, err_fac=1.96, label='Weekends')
plot_survey_response_per_vitals(axarr[2, 3], df, f'{vital_key}stdweekday', 'total_wellbeing_Z', bins=bins, color=colors.greensea, err_fac=1.96, label='Weekdays')


# Finalize plot
axarr[0, 0].axvline(0, c=colors.midnightblue, ls=':')
axarr[0, 1].axvline(7, c=colors.midnightblue, ls=':')
axarr[0, 2].axvline(3.5, c=colors.midnightblue, ls=':')
axarr[0, 3].axvline(7.5, c=colors.midnightblue, ls=':')

axarr[0, 0].set_xlabel('Sleep Onset [hrs relative to midnight]')
axarr[0, 1].set_xlabel('Sleep Offset [hrs relative to midnight]')
axarr[0, 2].set_xlabel('Midsleep [hrs relative to midnight]')
axarr[0, 3].set_xlabel('Sleep duration [hrs]')

axarr[0, 0].set_ylabel('Average WHO-5 Wellbeing Z-Scores')
axarr[1, 0].set_ylabel('Average WHO-5 Wellbeing Z-Scores')
axarr[2, 0].set_ylabel('Average WHO-5 Wellbeing Z-Scores')

add_label(axarr.flatten(), pos=12 * ['lower left'], size=22)

axarr[1, 0].set_xlabel('Standard Deviation in\nSleep Onset [hrs]')
axarr[1, 1].set_xlabel('Standard Deviation in\nSleep Offset [hrs]')
axarr[1, 2].set_xlabel('Standard Deviation in\nMidsleep [hrs]')
axarr[1, 3].set_xlabel('Standard Deviation in\nSleep Duration [mins]')
axarr[2, 0].set_xlabel('Standard Deviation in\nSleep Onset [hrs]')
axarr[2, 1].set_xlabel('Standard Deviation in\nSleep Offset [hrs]')
axarr[2, 2].set_xlabel('Standard Deviation in\nMidsleep [hrs]')
axarr[2, 3].set_xlabel('Standard Deviation in\nSleep Duration [mins]')

axarr[2, 0].legend(loc='upper right')
plt.tight_layout()

plt.savefig('../output/draft/figure2_wellbeing_sleep_with_std.pdf')

# Figure 3

In [None]:
f, axarr = plt.subplots(2, 3, sharey=True)

question_key = 'total_wellbeing_Z'

bins = 50
plot_survey_response_per_vitals(axarr[0, 0], df, 'social_jetlag', question_key, bins=bins, color=colors.midnightblue, err_fac=1.96)

bins = 50
plot_survey_response_per_vitals(axarr[0, 1], df, 'v52difference', question_key, bins=bins, color=colors.midnightblue, err_fac=1.96)

bins = 50
plot_survey_response_per_vitals(axarr[0, 2], df, 'v53difference', question_key, bins=bins, color=colors.midnightblue, err_fac=1.96)

bins = 50
plot_survey_response_per_vitals(axarr[1, 0], df, 'v43difference', question_key, bins=bins, color=colors.midnightblue, err_fac=1.96)

bins = 50
plot_survey_response_per_vitals(axarr[1, 1], df, 'v9difference', question_key, bins=bins, color=colors.midnightblue, err_fac=1.96)

bins = 50
plot_survey_response_per_vitals(axarr[1, 2], df, 'v65difference', question_key, bins=bins, color=colors.midnightblue, err_fac=1.96)

for ax in axarr[:, 1:].flatten():
    ax.set_ylabel(None)

# Sandbox

In [None]:
f, axarr = plt.subplots(2, 2)

def plot_wellbeing_violin_alt(ax, df, question_key='total_wellbeing', group_key='salutation'):

    color = {'M': colors.greensea, 'F': colors.pumpkin, 'D': colors.wisteria, 0: colors.greensea, 1: colors.pumpkin, 2: colors.wisteria}

    df = df.groupby('user_id')[[group_key, question_key]].agg({group_key: 'max', question_key: 'mean'})

    vio = sns.violinplot(df, x=group_key, y=question_key, ax=ax, 
                           palette=color, 
                         alpha=0.8)
    plt.setp(vio.collections, alpha=.4)

    #if group_key == 'salutation':
    #    ax.axhline(df[df.salutation == 'M'].total_wellbeing_Z.mean(), xmin=.6, xmax=.9, c=color['M'], lw=2)
    #    ax.axhline(df[df.salutation == 'F'].total_wellbeing_Z.mean(), xmin=.1, xmax=.4, c=color['F'], lw=2)

    hide_and_move_axis(ax)
    #ax.set_xticklabels(['Female', 'Male'])
    ax.set_xlabel(None)
    ax.set_ylabel('WHO-5 Wellbeing')

plot_wellbeing_violin_alt(ax=axarr[0, 0], df=df, question_key='total_wellbeing', group_key='salutation')
plot_wellbeing_violin_alt(ax=axarr[0, 1], df=df, question_key='total_wellbeing_Z', group_key='salutation')
plot_wellbeing_violin_alt(ax=axarr[1, 0], df=df, question_key='total_wellbeing', group_key='age_group')
plot_wellbeing_violin_alt(ax=axarr[1, 1], df=df, question_key='total_wellbeing_Z', group_key='age_group')

In [None]:
#['q49', 'q50', 'q54', 'q55', 'q56', 'total_wellbeing']

wording = {
    'q49_Z': 'For the last four weeks I have been happy and in good spirits.',
    'q50_Z': 'In the last four weeks I have felt calm and relaxed.',
    'q54_Z': 'For the past four weeks, I have felt energetic and active.',
    'q55_Z': 'For the past four weeks, I have felt fresh and rested when I wake up.',
    'q56_Z': 'For the past four weeks, my daily life has been full of things that interest me.',
    'total_wellbeing_Z': 'Average wellbeing'
}

## 