In [6]:
# libraries
import pandas as pd 
import numpy as np
import pickle as pkl
from datetime import timedelta, datetime

# Italian regions
north_west_regions = ["Piemonte", "Valle d'Aosta / Vallée d'Aoste", "Liguria", "Lombardia"]
north_east_regions = ['Provincia Autonoma Trento', 'Provincia Autonoma Bolzano / Bozen', "Veneto", "Friuli-Venezia Giulia", "Emilia-Romagna"]
center_regions = ["Toscana", "Umbria", "Marche","Lazio"]
south_regions = ["Abruzzo", "Molise", "Campania", "Puglia", "Basilicata", "Calabria"]
sicily_regions = ["Sicilia"]
sardinia_regions = ["Sardegna"]

# name of fornitori
fornitori = ['Pfizer/BioNTech', 'Moderna', 'Vaxzevria (AstraZeneca)', 'Janssen']
fornitori_names = {'Pfizer/BioNTech': "pfizer", 
                   'Moderna': "moderna", 
                   'Vaxzevria (AstraZeneca)': "astrazeneca", 
                   'Janssen': "JJ"}

# 1. Vaccinations 

In [7]:
def get_vacc_regions(regions, fornitore, savename):

    url = "https://raw.githubusercontent.com/italia/covid19-opendata-vaccini/master/dati/somministrazioni-vaccini-latest.csv"

    # import data
    df  = pd.read_csv(url)
    df.data_somministrazione = pd.to_datetime(df.data_somministrazione)
    df = df.loc[df.fornitore == fornitore].reset_index(drop=True)
    df = df.loc[df.nome_area.isin(regions)].reset_index(drop=True)
    
    # keep only selected columns
    df = df[["data_somministrazione", "fascia_anagrafica", "prima_dose"]]

    # group by date and age group 
    df_grouped = df.groupby(by=["data_somministrazione", "fascia_anagrafica"], as_index=False).sum()

    # sort by date 
    df_grouped.sort_values(by=["data_somministrazione", "fascia_anagrafica"], inplace=True, ignore_index=True)
    
    # map to our age groups 
    vaccinations, vaccinations_zero = {}, {}
    ages = ['16-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80-89', '90+']

    date = datetime(2020, 9, 1)
    while date <= datetime(2021, 7, 26):

        vaccinations[date] = np.zeros(10) # n. of our age groups
        vaccinations_zero[date] = np.zeros(10) # n. of our age groups
        for age in ages: 
            df_date_age = df_grouped.loc[(df_grouped.data_somministrazione == date) & (df_grouped.fascia_anagrafica == age)]
            
            if len(df_date_age) > 0:
                if age == '16-19':
                    vaccinations[date][1] += df_date_age["prima_dose"].values[0]     # 10-19

                elif age == '20-29':
                    vaccinations[date][2] += df_date_age["prima_dose"].values[0] / 2 # 20-24
                    vaccinations[date][3] += df_date_age["prima_dose"].values[0] / 2 # 25-29

                elif age == '30-39':
                    vaccinations[date][4] += df_date_age["prima_dose"].values[0]     # 30-39

                elif age == '40-49':
                    vaccinations[date][5] += df_date_age["prima_dose"].values[0]     # 40-49

                elif age == '50-59':
                    vaccinations[date][6] += df_date_age["prima_dose"].values[0]     # 50-59

                elif age == '60-69':
                    vaccinations[date][7] += df_date_age["prima_dose"].values[0]     # 60-69

                elif age == '70-79':   
                    vaccinations[date][8] += df_date_age["prima_dose"].values[0]     # 70-79

                elif age == '80-89':
                    vaccinations[date][9] += df_date_age["prima_dose"].values[0]     # 80+

                elif age == '90+':
                    vaccinations[date][9] += df_date_age["prima_dose"].values[0]     # 80+

        date += timedelta(days=1)

    with open("../../basins/" + savename + "/vaccinations/vaccinations_" + fornitori_names[fornitore] + ".pkl", "wb") as file:
        pkl.dump(vaccinations, file)
        
    return vaccinations

In [8]:
for fornitore in fornitori:
    print(fornitore)
    vacc_northwest_primadose = get_vacc_regions(north_west_regions, fornitore, "Italy-northwest")
    vacc_northeast_primadose = get_vacc_regions(north_east_regions, fornitore, "Italy-northeast")
    vacc_center_primadose    = get_vacc_regions(center_regions, fornitore, "Italy-center")
    vacc_south_primadose     = get_vacc_regions(south_regions, fornitore, "Italy-south")
    vacc_sicily_primadose    = get_vacc_regions(sicily_regions, fornitore, "Italy-sicily")
    vacc_sardinia_primadose  = get_vacc_regions(sardinia_regions, fornitore, "Italy-sardinia")

Pfizer/BioNTech
Moderna
Vaxzevria (AstraZeneca)
Janssen
