In [None]:
import pandas as pd
import numpy as np
from datenspende_who5.colors import flatuicolors as colors
from matplotlib import pyplot as plt
from datenspende_who5.styling import hide_and_move_axis
import string

In [None]:
df = pd.read_feather('../data/03_derived/input_data_users_surveys_rolling_vitals.feather')
df = df[~df.user_id.isin([1143114, 1143193, 1144681, 1147298, 1144157, 1155559])]
df = df[~df.birth_date.isin([2004, 1984, 2005])]

df['age'] = (2020 - df.birth_date + 2.5) 

age_level1 = 35
age_level2 = 60

df.loc[df['age'].between(0, 35, inclusive='left'), 'age_group'] = 0
df.loc[df['age'].between(35, 60, inclusive='left'), 'age_group'] = 1
df.loc[df['age'].between(60, 100, inclusive='left'), 'age_group'] = 2

In [None]:
def plot_wellbeing_per_gender(ax, df, binwidth=0.2):

    wellbeing_per_gender = df.groupby('user_id')[['salutation', 'total_wellbeing']].agg({'salutation': 'max', 'total_wellbeing': 'mean'})
    
    for salutation in ('M', 'F'):

        sign = (1. if salutation == 'M' else -1)
        color = (colors.greensea if salutation == 'M' else colors.pumpkin)
        label = ('Male' if salutation == 'M' else 'Female')


        values = wellbeing_per_gender[wellbeing_per_gender.salutation == salutation].total_wellbeing
        count, bins = np.histogram(values, bins=np.arange(1 - .5 * binwidth, 5 + .501 * binwidth, binwidth))
        bins = .5 * (bins[1:] + bins[:-1])
        w = .4 * np.diff(bins)[0]
        count = count / len(values)

        ax.bar(bins, count, width=sign * w, align='edge', edgecolor='w', label=label, color=color)

    hide_and_move_axis(ax)

    ax.legend()
    ax.set_xlabel('Average WHO-5 Wellbeing')
    ax.set_ylabel('Relative Frequency')
    
    
def plot_wellbeing_per_age(ax, df, color=colors.wisteria):
    
    # First aggregate per user
    df_age = df.groupby('user_id')[['birth_date', 'total_wellbeing']].mean()
    
    # Then aggregate per birth_date
    df_age = df_age.groupby('birth_date').agg(['mean', 'std', 'count'])
    
    df_age.columns = df_age.columns.droplevel(0)
    df_age['err'] = 1.96 * df_age['std'] / np.sqrt(df_age['count'])

    ax.fill_between(df_age.index, df_age['mean'] - df_age['err'], df_age['mean'] + df_age['err'], alpha=.3, color=color)
    ax.plot(df_age['mean'], color=color, marker='o', markersize=4)
    
    hide_and_move_axis(ax)

    #ax.legend()
    ax.set_xlabel('Birth Year')
    ax.set_ylabel('Average WHO-5 Wellbeing')
    ax.set_xticks(range(1930, 2010, 10))
    
    
def add_label(axarr, pos, size=20):
    
    for i, ax in enumerate(axarr.flatten()):
        label = string.ascii_uppercase[i]
        
        xmin, xmax = ax.get_xlim()
        ymin, ymax = ax.get_ylim()
        
        if pos[i] == 'upper left':
            ax.text(xmin + 0.05 * (xmax - xmin), ymin + .87 * (ymax - ymin), label, size=size)
        elif pos[i] == 'upper right':
            ax.text(xmin + 0.95 * (xmax - xmin), ymin + .925 * (ymax - ymin), label, size=size)
        elif pos[i] == 'lower left':
            ax.text(xmin + 0.05 * (xmax - xmin), ymin + .05 * (ymax - ymin), label, size=size)

In [None]:
f, axarr = plt.subplots(1, 2, figsize=(10, 3.5))
plot_wellbeing_per_gender(axarr[0], df, binwidth=.5)
plot_wellbeing_per_age(axarr[1], df)

add_label(axarr, pos=['upper left', 'lower left'], size=25)

plt.savefig('../output/socio-demographics.pdf')