In [1]:
import pandas as pd
import numpy as np
import openpyxl

import warnings
warnings.filterwarnings("ignore")

import sys
import os

# Get the directory where this notebook is located
notebook_dir = os.path.dirname(os.path.abspath(''))

# Add the PythonPrep directory to the path
pythonprep_dir = os.path.join(os.path.dirname(notebook_dir), 'PythonPrep') if 'PythonPrep' not in notebook_dir else notebook_dir
sys.path.append(pythonprep_dir)

from paths import main_path

In [6]:
# ------------------------------------------------------------------
# Load and tidy the raw data
# ------------------------------------------------------------------
file_path = main_path + "/Democracy/PythonData/excess-deaths-cumulative-per-100k.csv"
df = (
    pd.read_csv(file_path, usecols=["Entity", "Day",
                                    "Cumulative excess deaths per 100,000 people (central estimate)"])
      .rename(columns={"Entity": "country"})
)

# Make sure Day is datetime
df["Day"] = pd.to_datetime(df["Day"])

# ------------------------------------------------------------------
# Convert cumulative to daily series
# ------------------------------------------------------------------
df["excess_mortality_daily_100k"] = (
    df.groupby("country")["Cumulative excess deaths per 100,000 people (central estimate)"]
      .diff()                                   # day-to-day change
      .fillna(                                  # first obs. in each group
          df["Cumulative excess deaths per 100,000 people (central estimate)"]
      )
)

# ------------------------------------------------------------------
# Aggregate to yearly totals and take the 2020-22 mean
# ------------------------------------------------------------------
df["Year"] = df["Day"].dt.year

df_yearly = (
    df.groupby(["country", "Year"], as_index=False)["excess_mortality_daily_100k"]
      .sum()
)

# Keep only 2020-2022 and take the mean across those three years
df1 = (
    df_yearly[df_yearly["Year"].between(2020, 2022)]
      .groupby("country", as_index=False)["excess_mortality_daily_100k"]
      .mean()
      .rename(columns={"excess_mortality_daily_100k": "excess_mortality_rate_100k"})
      .sort_values("excess_mortality_rate_100k", ascending=False)
      .reset_index(drop=True)
)

# df1 now has the desired result
print(df1.head())

           country  excess_mortality_rate_100k
0         Bulgaria                  363.855800
1           Russia                  316.301767
2           Serbia                  306.731667
3        Lithuania                  306.558567
4  North Macedonia                  278.824633


In [7]:
countries_only = ['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina',
                  'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados',
                  'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana',
                  'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon', 'Canada',
                  'Cape Verde', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo', 'Costa Rica', "Cote d'Ivoire",
                  'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic',
                  'East Timor', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
                  'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana',
                  'Greece', 'Grenada', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Honduras', 'Hungary',
                  'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan',
                  'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', 'Kosovo', 'Kuwait', 'Kyrgyzstan', 'Laos', 'Latvia',
                  'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Madagascar',
                  'Malawi', 'Malaysia', 'Maldives', 'Mali', 'Malta', 'Mauritania', 'Mauritius', 'Mexico', 'Moldova',
                  'Monaco', 'Mongolia', 'Montenegro', 'Morocco', 'Mozambique', 'Myanmar', 'Namibia', 'Nepal',
                  'Netherlands', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'North Macedonia', 'Norway', 'Oman',
                  'Pakistan', 'Palestine', 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Poland',
                  'Portugal', 'Qatar', 'Romania', 'Russia', 'Rwanda', 'Saint Kitts and Nevis', 'Saint Lucia',
                  'Saint Vincent and the Grenadines', 'Samoa', 'San Marino', 'Sao Tome and Principe', 'Saudi Arabia',
                  'Senegal', 'Serbia', 'Seychelles', 'Sierra Leone', 'Singapore', 'Slovakia', 'Slovenia', 'Solomon Islands',
                  'Somalia', 'South Africa', 'South Korea', 'South Sudan', 'Spain', 'Sri Lanka', 'Sudan', 'Suriname',
                  'Sweden', 'Switzerland', 'Syria', 'Taiwan', 'Tajikistan', 'Tanzania', 'Thailand', 'Togo', 'Tonga',
                  'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Uganda', 'Ukraine', 'United Arab Emirates',
                  'United Kingdom', 'United States', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Venezuela', 'Vietnam', 'Yemen',
                  'Zambia', 'Zimbabwe']
len(countries_only)

187

In [8]:
df1.excess_mortality_rate_100k.mean()

86.41256396005625

In [None]:
df1.to_csv(main_path + '/Democracy/Democracy_Main/MainAnalysis/input/outcomes/old_outcomes/excess_deaths_who.csv', index=False)

