In [None]:
import pandas as pd
import numpy as np
pd.options.plotting.backend = 'plotly'

In [None]:
doses = ['1+', '2+']
age_strata = list(range(0, 80, 5))
dates_to_read = ['28-february-2022', '22-december-2022']
raw_data = {date: pd.read_excel(f'../data/covid-19-vaccination-vaccination-data-{date}.xlsx', index_col=0)['Value'] for date in dates_to_read}

In [None]:
def get_cleaned_dec_data(raw_data, doses):
    agegroups = ['16-19'] + [f'{age}-{age + 4}' for age in range(20, 95, 5)] + ['95+']
    vacc_dict = {}
    for dose in doses:
        s = 's' if dose == '2+' else ''
        vacc_dict[dose] = raw_data[[f'Age group - {age} - Number of people who have received at least {dose[0]} dose{s}' for age in agegroups]].values
    data = pd.DataFrame(vacc_dict, index=agegroups)
    data['pop'] = raw_data[[f'Age group - {age} - Population' for age in agegroups]].values
    return data

def get_cleaned_feb_data(raw_data, doses):
    agegroups = ['16-19'] + [f'{age}-{age + 4}' for age in range(20, 95, 5)] + ['95+']
    vacc_dict = {}
    vacc_dict['1+'] = raw_data[[f'Age group - {age} - Number of people with 1 dose' for age in agegroups]].values
    missing_space_agegroups = ['16-19 '] + [f'{age}-{age + 4} ' for age in range(20, 95, 5)] + ['95+']
    vacc_dict['2+'] = raw_data[[f'Age group - {age}- Number of people fully vaccinated' for age in missing_space_agegroups]].values
    final_vacc_data = pd.DataFrame(vacc_dict, index=agegroups)
    final_vacc_data['pop'] = raw_data[[f'Age group - {age} - Population' for age in agegroups]].values
    return final_vacc_data

def get_modelled_dec_estimates(raw_data, cleaned_data):
    modelled_vacc = pd.DataFrame(0.0, index=age_strata, columns=doses)
    for dose in doses:
        modelled_vacc[dose][15] = cleaned_data[dose]['16-19'] / cleaned_data['pop']['16-19']
        for age in age_strata[4: -1]:
            modelled_vacc[dose][age] = cleaned_data[dose][f'{age}-{age + 4}'] / cleaned_data['pop'][f'{age}-{age + 4}']
        modelled_vacc[dose][75] = cleaned_data.loc['75-79':, '1+'].sum() / cleaned_data.loc['75-79':, 'pop'].sum()
        modelled_vacc[dose][modelled_vacc[dose] > 1.0] = 1.0
        s = 's' if dose == '2+' else ''
        modelled_vacc[dose][5] = raw_data[f'National - Number of people 5-11 who have received at least {dose[0]} dose{s}'] / raw_data['National - Population 5-11']
        modelled_vacc[dose][10] = raw_data[f'National - Number of people 12-15 who have received at least {dose[0]} dose{s}'] / raw_data['National - Population 12-15']
    return modelled_vacc

def get_modelled_feb_estimates(raw_data, cleaned_data):
    modelled_vacc = pd.DataFrame(0.0, index=age_strata, columns=doses)
    for dose in doses:
        modelled_vacc[dose][15] = cleaned_data[dose]['16-19'] / cleaned_data['pop']['16-19']
        for age in age_strata[4: -1]:
            modelled_vacc[dose][age] = cleaned_data[dose][f'{age}-{age + 4}'] / cleaned_data['pop'][f'{age}-{age + 4}']
        modelled_vacc[dose][75] = cleaned_data.loc['75-79':, '1+'].sum() / cleaned_data.loc['75-79':, 'pop'].sum()
        modelled_vacc[dose][modelled_vacc[dose] > 1.0] = 1.0
    modelled_vacc['1+'][5] = raw_data[f'National - Number of people 5-11 with 1 dose'] / raw_data['National - Population 5-11']
    modelled_vacc['2+'][5] = raw_data[f'National - Number of people 5-11 fully vaccinated'] / raw_data['National - Population 5-11']
    modelled_vacc['1+'][10] = raw_data[[i for i in raw_data.index if ' Residence state - Number of people 12-15 with 1 dose' in i]].sum() / raw_data['Age group - 12-15 - Population']
    modelled_vacc['2+'][10] = raw_data[[i for i in raw_data.index if ' Residence state - Number of people 12-15 fully vaccinated' in i]].sum() / raw_data['Age group - 12-15 - Population']
    return modelled_vacc

In [None]:
cleaned_data_dict = {}
cleaned_data_dict['22-december-2022'] = get_cleaned_dec_data(raw_data['22-december-2022'], doses)
cleaned_data_dict['28-february-2022'] = get_cleaned_feb_data(raw_data['28-february-2022'], doses)

In [None]:
modelled_vacc_dict = {}
modelled_vacc_dict['22-december-2022'] = get_modelled_dec_estimates(raw_data['22-december-2022'], cleaned_data_dict['22-december-2022'])
modelled_vacc_dict['28-february-2022'] = get_modelled_feb_estimates(raw_data['28-february-2022'], cleaned_data_dict['28-february-2022'])

In [None]:
final_vacc_data = pd.DataFrame()

In [None]:
modelled_vacc_dict['28-february-2022'].columns = [f'feb_{i}' for i in modelled_vacc_dict['28-february-2022'].columns]

In [None]:
pd.concat([modelled_vacc_dict['22-december-2022'], modelled_vacc_dict['28-february-2022']], axis=1).plot()