In [None]:
import pandas as pd
from matplotlib import pyplot as plt
from long_covid.colors import flatuicolors
from long_covid import styling
import numpy as np

In [None]:
USERS = pd.read_feather('../data/02_processed/users_processed.feather')

In [None]:
def figure_si_age_groups(users):
    
    f, ax = plt.subplots(figsize=(5,3.8))

    count, bins = np.histogram(users['age'], bins=[0, 21, 25, 40, 60, 65, 100])

    german_pop = pd.read_excel(
        '../data/00_external/statistic_id1365_bevoelkerung-deutschlands-nach-relevanten-altersgruppen-2020.xlsx', 
        sheet_name='Daten',
        header=4,
        usecols=[1, 2],
        nrows=10,
    )

    german_vals = np.append([german_pop['2020'][:5].sum()], german_pop['2020'][5:].values)
    german_vals /= german_vals.sum()

    ax.bar(np.arange(len(count)) - 0.22, count / count.sum(), width=0.37, label='Study cohort', color=flatuicolors.midnightblue)
    ax.bar(np.arange(len(german_vals)) + 0.22, german_vals, width=0.37, label='German Population', color=flatuicolors.concrete)

    ax.set_xticks(np.arange(len(count)))
    ax.set_xticklabels(['0-20', '20-24', '25-39', '40-59', '60-64', '65+'])

    styling.hide_and_move_axis(ax)
    ax.legend(loc='upper left')

    ax.set_xlabel('Age group')
    ax.set_ylabel('Relative frequency')

    ax.set_ylim(0, 0.6)
    plt.tight_layout()
    plt.savefig('../output/si_figure5_age_groups_new.jpg', dpi=400)

In [None]:
figure_si_age_groups(USERS)