In [None]:
BASE_PATH = '../../covid19-opendata-vaccini/dati'

In [None]:
def get_fn(fn):
    return os.path.join(BASE_PATH, f'{fn}.csv')

# Vaccines

In [None]:
def collapse_to_80_plus(x):
    if x == '90+':
        return '80+'
    if x == '80-89':
        return '80+'
    return x

def get_complete_vacc(r):
    if r.fornitore == 'Janssen':
        return r.prima_dose
    return r.seconda_dose

def get_got_first_no_jansen(r):
    if r.fornitore == 'Janssen':
        return 0
    return r.prima_dose

def get_in_attesa_seconda(r):
    if r.fornitore == 'Janssen':
        return 0
    return r.prima_dose - r.seconda_dose

In [None]:
def get_daily_vacc_by_dim(df, dims=None, by_dose=True, rolling=7):
    dims_1 = 'date' if dims is None else ['date', *dims]
    tmp = (
        df
        .assign(date=lambda x: pd.to_datetime(x.data_somministrazione))
        .groupby(dims_1)
        [['solo_prima', 'ciclo_completo']]
        .sum()
        .reset_index()
        .melt(id_vars=dims_1, value_vars=['solo_prima', 'ciclo_completo'])
        .sort_values(dims_1)
        .reset_index(drop=True)
    )
    if not by_dose:
        if dims is None:
            smooth = lambda x: x.value.transform(lambda x: x.rolling(rolling, 1).mean())
        else:
            smooth = lambda x: x.groupby(dims).value.transform(lambda x: x.rolling(rolling, 1).mean())
        return (
            tmp
            .groupby(dims_1)[['value']].sum().reset_index()
            .assign(n_vacc=smooth)
        )
    if dims is None:
        smooth = lambda x: x.groupby(['variable']).value.transform(lambda x: x.rolling(rolling, 1).mean())
    else:
        smooth = lambda x: x.groupby([*dims, 'variable']).value.transform(lambda x: x.rolling(rolling, 1).mean())

    return (
        tmp
        .assign(n_vacc=smooth)
        .assign(dose=lambda x: x.variable.str.replace('_', ' '))
        .drop('variable', axis=1)
    )

In [None]:
pop_by_age_df = (
    pd.read_csv(os.path.join(BASE_PATH, 'platea.csv'))
    .groupby('fascia_anagrafica')
    [['totale_popolazione']]
    .sum()
    .reset_index()
)

In [None]:
vacc_df = (
    pd.read_csv(os.path.join(BASE_PATH, 'somministrazioni-vaccini-latest.csv'))
    .drop(['codice_NUTS1', 'codice_NUTS2', 'codice_regione_ISTAT'], axis=1)
    .assign(fascia_anagrafica=lambda x: x.fascia_anagrafica.apply(collapse_to_80_plus))
    .assign(ciclo_completo=lambda x: x.apply(get_complete_vacc, axis=1))
    .assign(solo_prima=lambda x: x.apply(get_got_first_no_jansen, axis=1))
    .assign(in_attesa_seconda=lambda x: x.apply(get_in_attesa_seconda, axis=1))
)
vacc_df.shape # 89182, 11

In [None]:
overall_age_df = (
    vacc_df
    .groupby(['fascia_anagrafica'])
    [['prima_dose', 'seconda_dose', 'ciclo_completo', 'solo_prima', 'in_attesa_seconda']]
    .sum()
    .reset_index()
    .merge(pop_by_age_df, on='fascia_anagrafica')
    .rename(columns={'fascia_anagrafica': 'age'})
    .assign(prima=lambda x: x.prima_dose/x.totale_popolazione)
    .assign(seconda=lambda x: x.seconda_dose/x.totale_popolazione)
    .assign(complete=lambda x: x.ciclo_completo/x.totale_popolazione)
    .assign(waiting_second=lambda x: x.in_attesa_seconda/x.totale_popolazione)
)

In [None]:
vacc_by_age_df = (
    get_daily_vacc_by_dim(vacc_df, dims=['fascia_anagrafica'], by_dose=False, rolling=7)
    .merge(pop_by_age_df, on='fascia_anagrafica')
    .assign(share=lambda x: x.n_vacc*100/x.totale_popolazione)
)

In [None]:
(
    get_daily_vacc_by_dim(vacc_df, dims=['fascia_anagrafica'], by_dose=True, rolling=7)
    .to_csv(get_fn('daily_vaccines_by_age_doses'))
)