In [1]:
import pandas as pd
import numpy as np

# population
population = {"Piemonte": 4311217, 
              "Valle d'Aosta": 125034, 
              "Liguria": 1524826, 
              "Lombardia": 10027602,
              "Provincia autonoma Trento": 538223,
              "Provincia autonoma Bolzano": 520891,
              "Veneto": 4879133, 
              "Friuli Venezia Giulia": 1206216, 
              "Emilia-Romagna": 4464119,
              "Toscana": 3692555,
              "Umbria": 870165,
              "Marche": 1512672,
              "Lazio": 5755700,
              "Abruzzo": 1293941, 
              "Molise": 300516, 
              "Campania": 5712143, 
              "Puglia": 3953305, 
              "Basilicata": 553254, 
              "Calabria": 1894110,
              "Sicilia": 4875290,
              "Sardegna": 1611621}

def convert_color(row):
    if row["colore"] == 'bianco':
        return 0
    if row["colore"] == 'giallo':
        return 1
    if row["colore"] == 'arancione':
        return 2
    if row["colore"] == 'rosso':
        return 3
    
# Italian regions
north_west_regions = ["Piemonte", "Valle d'Aosta", "Liguria", "Lombardia"]
north_east_regions = ['Provincia autonoma Trento', 'Provincia autonoma Bolzano', "Veneto", "Friuli Venezia Giulia", "Emilia-Romagna"]
center_regions = ["Toscana", "Umbria", "Marche","Lazio"]
south_regions = ["Abruzzo", "Molise", "Campania", "Puglia", "Basilicata", "Calabria"]
sicily_regions = ["Sicilia"]
sardinia_regions = ["Sardegna"]

# import data
url = 'https://raw.githubusercontent.com/imcatta/restrizioni_regionali_covid/main/dataset.csv'
df_colori = pd.read_csv(url)
df_colori["colore_num"] = df_colori.apply(lambda row: convert_color(row), axis=1)
df_colori.head()

Unnamed: 0,data,denominazione_regione,colore,colore_num
0,2020-11-06,Abruzzo,giallo,1
1,2020-11-07,Abruzzo,giallo,1
2,2020-11-08,Abruzzo,giallo,1
3,2020-11-09,Abruzzo,giallo,1
4,2020-11-10,Abruzzo,arancione,2


In [2]:
def get_macroregion_color(regions):
    # loc regions
    df_region = df_colori.loc[df_colori.denominazione_regione.isin(regions)].reset_index(drop=True)

    # get dates
    dates = df_colori.data.unique()
    dates = np.sort(dates)

    # iterate over dates
    values = []
    for date in dates: 
        # loc this date
        df_region_date = df_region.loc[df_region.data == date]
        num, den = 0, 0
        for index, row in df_region_date.iterrows(): 
            num += row["colore_num"] * population[row["denominazione_regione"]]
            den += population[row["denominazione_regione"]]
        values.append(num / den)

    df_aggr = pd.DataFrame(data={"date": dates, "colore_num": values})
    df_aggr["colore_num_round"] = np.round(df_aggr["colore_num"])
    return df_aggr

In [3]:
df_aggr_north_west = get_macroregion_color(north_west_regions)
df_aggr_north_west.to_csv("../../basins/Italy-northwest/restrictions/color.csv", index=False)

df_aggr_north_east = get_macroregion_color(north_east_regions)
df_aggr_north_east.to_csv("../../basins/Italy-northeast/restrictions/color.csv", index=False)

df_aggr_center = get_macroregion_color(center_regions)
df_aggr_center.to_csv("../../basins/Italy-center/restrictions/color.csv", index=False)

df_aggr_south = get_macroregion_color(south_regions)
df_aggr_south.to_csv("../../basins/Italy-south/restrictions/color.csv", index=False)

df_aggr_sicily = get_macroregion_color(sicily_regions)
df_aggr_sicily.to_csv("../../basins/Italy-sicily/restrictions/color.csv", index=False)

df_aggr_sardinia = get_macroregion_color(sardinia_regions)
df_aggr_sardinia.to_csv("../../basins/Italy-sardinia/restrictions/color.csv", index=False)