In [5]:
# libraries
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

# population
population = {"Piedmont": 4311217, 
              "Aosta": 125034, 
              "Liguria": 1524826, 
              "Lombardy": 10027602,
              "Trentino-South Tyrol": 1078069, 
              "Veneto": 4879133, 
              "Friuli-Venezia Giulia": 1206216, 
              "Emilia-Romagna": 4464119,
              "Tuscany": 3692555,
              "Umbria": 870165,
              "Marche": 1512672,
              "Lazio": 5755700,
              "Abruzzo": 1293941, 
              "Molise": 300516, 
              "Campania": 5712143, 
              "Apulia": 3953305, 
              "Basilicata": 553254, 
              "Calabria": 1894110,
              "Sicily": 4875290,
              "Sardinia": 1611621}
 
# Italian regions
north_west_regions = ["Piedmont", "Aosta", "Liguria", "Lombardy"]
north_east_regions = ["Trentino-South Tyrol", "Veneto", "Friuli-Venezia Giulia", "Emilia-Romagna"]
center_regions = ["Tuscany", "Umbria", "Marche","Lazio"]
south_regions = ["Abruzzo", "Molise", "Campania", "Apulia", "Basilicata", "Calabria"]
sicily_regions = ["Sicily"]
sardinia_regions = ["Sardinia"]

# work and other_loc columns
home_cols      = ["residential_percent_change_from_baseline"] 
work_cols      = ["workplaces_percent_change_from_baseline"]
other_loc_cols = ["retail_and_recreation_percent_change_from_baseline",
                  "transit_stations_percent_change_from_baseline"]

# define functions
def add_week_of_year(date): 
    if date.isocalendar()[1] < 10:    
        year_week = str(date.isocalendar()[0]) + "-0" + str(date.isocalendar()[1])
    else: 
        year_week = str(date.isocalendar()[0]) + "-" + str(date.isocalendar()[1])
    return year_week


def save_reductions(df_basin, savename):

    # sort by date
    df_basin.sort_values(by="date", inplace=True)

    # loc work columns, fix weekends, compute contacts reduction
    df_basin["work_red"] = df_basin[work_cols].mean(axis=1)
    df_basin.loc[df_basin.date.dt.weekday==5, "work_red"] = df_basin["work_red"].shift(1)
    df_basin.loc[df_basin.date.dt.weekday==6, "work_red"] = df_basin["work_red"].shift(1)
    df_basin["work_red"] = (1 + df_basin["work_red"] / 100)**2
    df_basin.groupby(by="year_week")["work_red"].mean().to_csv("../../basins/" + savename + "/restrictions/work.csv")

    # loc oth columns, compute contacts reduction
    df_basin["oth_red"] = df_basin[other_loc_cols].mean(axis=1)
    df_basin["oth_red"] = (1 + df_basin["oth_red"] / 100)**2
    df_basin.groupby(by="year_week")["oth_red"].mean().to_csv("../../basins/" + savename + "/restrictions/other_loc.csv")


def weighted_avg_by_regions(regions):
    
    # loc regions
    cond = lambda df, r : (df.sub_region_1.isin(r)) & (df.sub_region_2.isna()) 
    df_region = df[cond(df, regions)].reset_index(drop=True)

    data = {"date": [], 
            "workplaces_percent_change_from_baseline": [], 
            "retail_and_recreation_percent_change_from_baseline": [],
            "transit_stations_percent_change_from_baseline": []}

    # iterate over dates
    for date in dates: 

        # loc date
        df_date = df_region.loc[df_region.date == date]

        tot_work, tot_retail, tot_transit, den = 0, 0, 0, 0
        for index, row in df_date.iterrows(): 
            tot_work += row["workplaces_percent_change_from_baseline"] * population[row["sub_region_1"]]
            tot_retail += row["retail_and_recreation_percent_change_from_baseline"] * population[row["sub_region_1"]]
            tot_transit += row["transit_stations_percent_change_from_baseline"] * population[row["sub_region_1"]]
            den += population[row["sub_region_1"]]

        # append date and weighted avg
        data["date"].append(date)
        data["workplaces_percent_change_from_baseline"].append(tot_work / den)
        data["retail_and_recreation_percent_change_from_baseline"].append(tot_retail / den)
        data["transit_stations_percent_change_from_baseline"].append(tot_transit / den)

    df_wavg = pd.DataFrame(data=data)
    df_wavg["date"] = pd.to_datetime(df_wavg["date"])
    df_wavg["year_week"] = df_wavg["date"].apply(add_week_of_year)
    return df_wavg

In [6]:
# import 
df20 = pd.read_csv("./2020_IT_Region_Mobility_Report.csv")
df21 = pd.read_csv("./2021_IT_Region_Mobility_Report.csv")

# concact 2020 and 2021
df = pd.concat((df20, df21))
# unique dates
dates = df.date.unique()

df.head()

Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,metro_area,iso_3166_2_code,census_fips_code,place_id,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
0,IT,Italy,,,,,,ChIJA9KNRIL-1BIRb15jJFz1LOI,2020-02-15,3.0,-1.0,35.0,9.0,0.0,-1.0
1,IT,Italy,,,,,,ChIJA9KNRIL-1BIRb15jJFz1LOI,2020-02-16,3.0,2.0,26.0,11.0,1.0,-2.0
2,IT,Italy,,,,,,ChIJA9KNRIL-1BIRb15jJFz1LOI,2020-02-17,0.0,1.0,7.0,3.0,1.0,0.0
3,IT,Italy,,,,,,ChIJA9KNRIL-1BIRb15jJFz1LOI,2020-02-18,3.0,1.0,16.0,4.0,1.0,0.0
4,IT,Italy,,,,,,ChIJA9KNRIL-1BIRb15jJFz1LOI,2020-02-19,0.0,-1.0,11.0,3.0,1.0,0.0


In [7]:
df_north_west = weighted_avg_by_regions(north_west_regions) 
df_north_east = weighted_avg_by_regions(north_east_regions) 
df_center = weighted_avg_by_regions(center_regions)
df_south = weighted_avg_by_regions(south_regions)
df_sicily = weighted_avg_by_regions(sicily_regions) 
df_sardinia = weighted_avg_by_regions(sardinia_regions)

In [8]:
save_reductions(df_north_west, "Italy-northwest")
save_reductions(df_north_east, "Italy-northeast")
save_reductions(df_center, "Italy-center")
save_reductions(df_south, "Italy-south")
save_reductions(df_sicily, "Italy-sicily")
save_reductions(df_sardinia, "Italy-sardinia")