In [None]:
import pandas as pd
import numpy as np
from datenspende_who5.colors import flatuicolors as colors
from matplotlib import pyplot as plt
from datenspende_who5.styling import hide_and_move_axis
import string
from textwrap import wrap
import seaborn as sns

In [None]:
df = pd.read_feather('../data/03_derived/input_data_users_surveys_rolling_vitals.feather')
df = df[~df.user_id.isin([1143114, 1143193, 1144681, 1147298, 1144157, 1155559])]
df = df[~df.birth_date.isin([2004, 1984, 2005])]

df['age'] = (2020 - df.birth_date + 2.5) 

age_level1 = 40
age_level2 = 65

df.loc[df['age'].between(0, 35, inclusive='left'), 'age_group'] = 0
df.loc[df['age'].between(35, 60, inclusive='left'), 'age_group'] = 1
df.loc[df['age'].between(60, 100, inclusive='left'), 'age_group'] = 2

In [None]:
def plot_wellbeing_per_gender(ax, df, binwidth=0.2):

    wellbeing_per_gender = df.groupby('user_id')[['salutation', 'total_wellbeing']].agg({'salutation': 'max', 'total_wellbeing': 'mean'})
    
    for salutation in ('M', 'F'):

        sign = (1. if salutation == 'M' else -1)
        color = (colors.greensea if salutation == 'M' else colors.pumpkin)
        label = ('Male' if salutation == 'M' else 'Female')


        values = wellbeing_per_gender[wellbeing_per_gender.salutation == salutation].total_wellbeing
        count, bins = np.histogram(values, bins=np.arange(1 - .5 * binwidth, 5 + .501 * binwidth, binwidth))
        bins = .5 * (bins[1:] + bins[:-1])
        w = .4 * np.diff(bins)[0]
        count = count / len(values)

        ax.bar(bins, count, width=sign * w, align='edge', edgecolor='w', label=label, color=color)

    hide_and_move_axis(ax)

    ax.legend()
    ax.set_xlabel('Average WHO-5 Wellbeing')
    ax.set_ylabel('Relative Frequency')
    
    
def plot_wellbeing_per_gender_violin(ax, df):

    color = {'M': colors.greensea, 'F': colors.pumpkin}

    df = df.groupby('user_id')[['salutation', 'total_wellbeing']].agg({'salutation': 'max', 'total_wellbeing': 'mean'})

    vio = sns.violinplot(df[df.salutation.isin(['M', 'F'])], x='salutation', y='total_wellbeing', ax=ax, 
                   palette=color, alpha=0.8, cut=0, inner=None)
    plt.setp(vio.collections, alpha=.4)

    ax.axhline(df[df.salutation == 'M'].total_wellbeing.mean(), xmin=.6, xmax=.9, c=color['M'], lw=2)
    ax.axhline(df[df.salutation == 'F'].total_wellbeing.mean(), xmin=.1, xmax=.4, c=color['F'], lw=2)

    hide_and_move_axis(ax)
    ax.set_xticklabels(['Female', 'Male'])
    ax.set_xlabel(None)
    ax.set_ylabel('WHO-5 Wellbeing')

    
def plot_wellbeing_per_age(ax, df, color=colors.wetasphalt):
    
    # First aggregate per user
    df_age = df.groupby('user_id')[['birth_date', 'total_wellbeing']].mean()
    
    # Then aggregate per birth_date
    df_age = df_age.groupby('birth_date').agg(['mean', 'std', 'count'])
    
    df_age.columns = df_age.columns.droplevel(0)
    df_age['err'] = 1.96 * df_age['std'] / np.sqrt(df_age['count'])

    ax.fill_between(df_age.index, df_age['mean'] - df_age['err'], df_age['mean'] + df_age['err'], alpha=.3, color=color)
    ax.plot(df_age['mean'], color=color, marker='o', markersize=4)
    
    hide_and_move_axis(ax)

    #ax.legend()
    ax.set_xlabel('Birth Year')
    ax.set_ylabel('Average WHO-5 Wellbeing')
    ax.set_xticks(range(1930, 2010, 10))
    
    
def add_label(axarr, pos, size=20):
    
    for i, ax in enumerate(axarr.flatten()):
        label = string.ascii_uppercase[i]
        
        xmin, xmax = ax.get_xlim()
        ymin, ymax = ax.get_ylim()
        
        if pos[i] == 'upper left':
            ax.text(0.075, 0.925, label, horizontalalignment='center', verticalalignment='center', transform = ax.transAxes, size=size)
        elif pos[i] == 'upper right':
            ax.text(0.925, 0.925, label, horizontalalignment='center', verticalalignment='center', transform = ax.transAxes, size=size)
        elif pos[i] == 'lower left':
            ax.text(0.075, 0.075, label, horizontalalignment='center', verticalalignment='center', transform = ax.transAxes, size=size)
            
            
def plot_survey_response_per_vitals(ax, df, vital_key, question_key, color='r', bins=40, label=None, xlabel=None):

    df = df[[vital_key, question_key]].dropna()

    df['bins'] = pd.cut(df[vital_key], bins)
    df.bins = df['bins'].apply(lambda x: x.left + 0.5 * (x.right - x.left))

    df = df.groupby(['bins'])[question_key].agg(['mean', 'count', 'std'])
    df['err'] = 1.96 * df['std'] / np.sqrt(df['count'])
    df = df[df['count'] > 50]

    ax.fill_between(df.index, df['mean'] - df['err'], df['mean'] + df['err'], alpha=.3, color=color)
    ax.plot(df['mean'], color=color, marker='o', markersize=3, label=label)
    
    hide_and_move_axis(ax)
    ax.set_xlabel(xlabel)
    ax.set_ylabel('Average WHO-5 Wellbeing')
    #ax.legend()

In [None]:
f, axarr = plt.subplots(2, 3, figsize=(10, 6))

plot_wellbeing_per_gender_violin(axarr[0, 0], df)

plot_survey_response_per_vitals(axarr[0, 1], df[df.salutation=='M'], 'v65', 'total_wellbeing', bins=np.arange(30, 91, 1), color=colors.greensea, label='Male')
plot_survey_response_per_vitals(axarr[0, 1], df[df.salutation=='F'], 'v65', 'total_wellbeing', bins=np.arange(30, 91, 1), color=colors.pumpkin, label='Female')

plot_survey_response_per_vitals(axarr[0, 2], df[df.salutation=='M'], 'v9', 'total_wellbeing', bins=np.arange(0, 25000, 500), color=colors.greensea, label='Male')
plot_survey_response_per_vitals(axarr[0, 2], df[df.salutation=='F'], 'v9', 'total_wellbeing', bins=np.arange(0, 25000, 500), color=colors.pumpkin, label='Female')

plot_wellbeing_per_age(axarr[1, 0], df)

plot_survey_response_per_vitals(axarr[1, 1], df[df.age_group==0], 'v65', 'total_wellbeing', bins=np.arange(30, 91, 2), color=colors.greensea, label='<40')
plot_survey_response_per_vitals(axarr[1, 1], df[df.age_group==1], 'v65', 'total_wellbeing', bins=np.arange(30, 91, 2), color=colors.pumpkin, label='40-65')
plot_survey_response_per_vitals(axarr[1, 1], df[df.age_group==2], 'v65', 'total_wellbeing', bins=np.arange(30, 91, 2), color=colors.wisteria, label='65+')

plot_survey_response_per_vitals(axarr[1, 2], df[df.age_group==0], 'v9', 'total_wellbeing', bins=np.arange(0, 25000, 1000), color=colors.greensea, label='<40')
plot_survey_response_per_vitals(axarr[1, 2], df[df.age_group==1], 'v9', 'total_wellbeing', bins=np.arange(0, 25000, 1000), color=colors.pumpkin, label='40-65')
plot_survey_response_per_vitals(axarr[1, 2], df[df.age_group==2], 'v9', 'total_wellbeing', bins=np.arange(0, 25000, 1000), color=colors.wisteria, label='65+')

axarr[1, 0].axvline(-(age_level1 - 2.5 - 2020), c=colors.wetasphalt, ls=':')
axarr[1, 0].axvline(-(age_level2 - 2.5 - 2020), c=colors.wetasphalt, ls=':')
axarr[1, 0].fill_between([1930, -(age_level2 - 2.5 - 2020) - 2.5], 2.8, 2.9, color=colors.wisteria, alpha=0.3)
axarr[1, 0].fill_between([-(age_level2 - 2.5 - 2020) + 2.5, -(age_level1 - 2.5 - 2020) - 2.5], 2.8, 2.9, color=colors.pumpkin, alpha=0.3)
axarr[1, 0].fill_between([-(age_level1 - 2.5 - 2020) + 2.5, 2000], 2.8, 2.9, color=colors.greensea, alpha=0.3)

add_label(axarr.flatten(), pos=['upper right', 'upper right', 'upper left', 'lower left', 'upper right', 'upper left'], size=22)

axarr[0, 1].set_ylim(2.5, 3.8)
axarr[0, 2].set_ylim(2.5, 3.8)

axarr[1, 0].set_ylim(2.5, 3.8)
axarr[1, 1].set_ylim(2.5, 3.8)
axarr[1, 2].set_ylim(2.5, 3.8)

axarr[0, 1].legend(loc='lower left')
axarr[1, 1].legend(loc='lower left')

axarr[0, 1].set_xlabel('Resting heart rate')
axarr[1, 1].set_xlabel('Resting heart rate')

axarr[0, 2].set_xlabel('Daily Step Count')
axarr[1, 2].set_xlabel('Daily Step Count')

axarr[1, 0].set_ylabel('WHO-5 Wellbeing')

plt.tight_layout()
plt.savefig('../output/figure1_wellbeing_heartrate_steps.pdf')