In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

pd.set_option('display.max_rows', 500)
import os
plots_folder = "./plots/mortality"
os.system(f"mkdir -p {plots_folder}")


df_mor=pd.read_csv("https://raw.githubusercontent.com/akarlinsky/world_mortality/main/world_mortality.csv")
df_mor["country_name"] = df_mor["country_name"].apply(lambda x: "Bosnia and Herzegovina" if x == "Bosnia" else x)
df_mor["country_name"] = df_mor["country_name"].apply(lambda x: "Czech Republic" if x == "Czechia" else x)
df_mor["country_name"] = df_mor["country_name"].apply(lambda x: "Macau" if x == "Macao" else x)
df_mor["country_name"] = df_mor["country_name"].apply(lambda x: "Reunion" if x == "Réunion" else x)

In [None]:
df_mor.country_name.unique()

In [None]:
df_mor_baseline = df_mor[df_mor.year < 2020]
df_mor_baseline = df_mor_baseline.groupby(["country_name","time"]).agg(
    mean_deaths=("deaths", "mean"),
    std_deaths=("deaths", "std"),
    var_deaths=("deaths", "var"),
).reset_index()

In [None]:
df_mor_baseline

In [None]:
df_mor = df_mor.merge(df_mor_baseline, on=["country_name","time"],how="left")
df_mor["excess_deaths"] = df_mor["deaths"] - df_mor["mean_deaths"]
df_mor["excess_deaths_hi"] = df_mor["deaths"] - (df_mor["mean_deaths"] - 1.645 * df_mor["std_deaths"])
df_mor["excess_deaths_lo"] = df_mor["deaths"] - (df_mor["mean_deaths"] + 1.645 * df_mor["std_deaths"])
df_mor["cumulative_excess_deaths"] = df_mor.groupby(['country_name','year'])['excess_deaths'].apply(lambda x: x.cumsum())
df_mor["std_cumulative_excess_deaths"] = df_mor.groupby(['country_name','year'])['var_deaths'].apply(lambda x: np.sqrt(x.cumsum()))
df_mor["cumulative_excess_deaths_hi"] = df_mor["cumulative_excess_deaths"] + 1.645 * df_mor["std_cumulative_excess_deaths"]
df_mor["cumulative_excess_deaths_lo"] = df_mor["cumulative_excess_deaths"] - 1.645 * df_mor["std_cumulative_excess_deaths"]


In [None]:
df_mor[["cumulative_excess_deaths","excess_deaths","var_deaths","std_cumulative_excess_deaths"]]

In [None]:
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
colors

In [None]:
def plot_mortality(varname: str, country: str):
   
    plt.figure(figsize=(10,5),dpi=100)
    
    years = df_mor[df_mor.country_name==country]["year"].unique()
    years = [y for y in years if y >= 2020]
    i = 0
    for year in years:
        df_plt = df_mor[(df_mor.year==year) & (df_mor.country_name==country)]
        if len(df_plt.index) == 0:
            raise ValueError(f"Wrong country name: {country}")
        x_max = 55
        interval = 2
        if df_plt.time_unit.unique() == "monthly":
            x_max = 13
            interval = 1
        plt.plot(df_plt.time, df_plt[varname], label=year, linewidth=1.8)
        if varname != "deaths":
            plt.fill_between(df_plt.time, 
                             df_plt[varname+"_lo"], 
                             df_plt[varname+"_hi"],
                             alpha=0.1,
                             color=colors[i])
        i+=1
    df_base = df_mor_baseline[df_mor_baseline.country_name==country]
    if varname == "deaths":
        plt.plot(df_base.time, df_base.mean_deaths, label="baseline", 
             linewidth=1.8, color="black",linestyle="dashed")
        plt.fill_between(df_base.time, 
                         df_base.mean_deaths - 1.645*df_base.std_deaths, 
                         df_base.mean_deaths + 1.645*df_base.std_deaths,
                         alpha=0.1,
                         color="black"
                        )
    else:
        plt.axhline(y=0, color='black', linestyle='dashed',linewidth=1.8)
    plt.grid()
    plt.legend()
    label = "Week of the year"
    if x_max < 50:
        label = "Month of the year"
    plt.xlabel(label, fontsize=12)
    plt.title(f"{country} {varname}".replace("_"," "), fontsize=15)
    plt.ylabel("N. deaths", fontsize=12)
    plt.xticks(np.arange(0, x_max ,interval))
    plt.savefig(f"{plots_folder}/{country}_{varname}.png", bbox_inches="tight", facecolor="w")

In [None]:
countries = ["Israel", "Netherlands", "South Korea", "Italy", "United States", "Denmark", "Singapore"]

In [None]:
for country in sorted(countries):
    for varname in ["deaths", "excess_deaths", "cumulative_excess_deaths"]:
        plot_mortality(varname=varname, country=country)

In [None]:
plot_mortality(varname="deaths",
               country="Japan")

In [None]:
plot_mortality(varname="cumulative_excess_deaths",
               country="Albania")

### Population

In [None]:
df_pop = pd.read_csv("./data/population-by-country.csv")
df_pop = df_pop.rename(columns={"name":"country_name"})[["country_name", "pop2020"]]

In [None]:
df_mor = df_mor.merge(df_pop, on="country_name", how="left")
df_mor["cumulative_excess_deaths_pop"] = df_mor["cumulative_excess_deaths"]/(1000*df_mor["pop2020"])

In [None]:
df_mor

In [None]:
df_mor[df_mor.country_name=="Italy"][["year","time","country_name","cumulative_excess_deaths_pop"]]

In [None]:
sorted(df_pop.name.unique())