In [1]:
import pandas as pd
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import pyprind
import eurostat
import os
import datetime as D

plt.style.use("ggplot")
%matplotlib inline
%load_ext jupyter_black

In [2]:
def analyse_dataset(dataset_code):
    parameters = eurostat.get_pars(dataset_code)
    print(parameters)
    for param in parameters:
        param_values = eurostat.get_par_values(dataset_code, param)
        print(param)
        print(param_values)

# Download data

## Excess mortality

In [3]:
dataset_code = "demo_mexrt"
analyse_dataset(dataset_code)

['freq', 'unit', 'geo']
freq
['M']
unit
['PC']
geo
['EU27_2020', 'BE', 'BG', 'CZ', 'DK', 'DE', 'EE', 'IE', 'EL', 'ES', 'FR', 'HR', 'IT', 'CY', 'LV', 'LT', 'LU', 'HU', 'MT', 'NL', 'AT', 'PL', 'PT', 'RO', 'SI', 'SK', 'FI', 'SE', 'IS', 'LI', 'NO', 'CH']


In [4]:
dic = eurostat.get_dic(dataset_code, "unit")
df = pd.DataFrame(dic, columns=["val", "descr"])
df = df.set_index("val")
df.loc["PC"]

descr    Percentage
Name: PC, dtype: object

In [5]:
dataset_code = "demo_mexrt"

path = f"data/ds_{dataset_code}.nc"
if not os.path.exists(path):
    df = eurostat.get_data_df(dataset_code)
    df = df.set_index("geo\TIME_PERIOD")
    df.index.name = "location"
    df = df[[col for col in df.columns if "20" in col]].T
    df.index = pd.to_datetime(df.index)
    df.index.name = "date"
    ds = df.stack().to_xarray()
    ds.to_netcdf(f"data/ds_{dataset_code}.nc")
    print("Data downloaded")
else:
    print("Data already downloaded")

Data downloaded


## Death by week

In [6]:
dataset_code = "demo_r_mwk_05"

In [7]:
analyse_dataset(dataset_code)

['freq', 'age', 'sex', 'unit', 'geo']
freq
['W']
age
['TOTAL', 'Y_LT5', 'Y5-9', 'Y10-14', 'Y15-19', 'Y20-24', 'Y25-29', 'Y30-34', 'Y35-39', 'Y40-44', 'Y45-49', 'Y50-54', 'Y55-59', 'Y60-64', 'Y65-69', 'Y70-74', 'Y75-79', 'Y80-84', 'Y85-89', 'Y_GE90', 'UNK']
sex
['T', 'M', 'F']
unit
['NR']
geo
['EU27_2020', 'BE', 'BG', 'CZ', 'DK', 'DE', 'EE', 'IE', 'EL', 'ES', 'FR', 'HR', 'IT', 'CY', 'LV', 'LT', 'LU', 'HU', 'MT', 'NL', 'AT', 'PL', 'PT', 'RO', 'SI', 'SK', 'FI', 'SE', 'IS', 'LI', 'NO', 'CH', 'UK', 'ME', 'GE', 'AL', 'RS', 'AD', 'AM']


In [8]:
dic = eurostat.get_dic(dataset_code, "unit")
df = pd.DataFrame(dic, columns=["val", "descr"])
df = df.set_index("val")
df.loc["NR"]

descr    Number
Name: NR, dtype: object

In [9]:
dataset_code = "demo_r_mwk_05"

path = f"data/ds_{dataset_code}.nc"
if not os.path.exists(path):
    df = eurostat.get_data_df(dataset_code)
    df = df.set_index(["age", "sex", "geo\TIME_PERIOD"])
    df.index.names = ["age", "sex", "location"]
    df = df[[col for col in df.columns if "20" in col]].T
    df.index = pd.date_range("2000-01-01", "2025-01-01", freq="W")[0 : df.shape[0]]
    # df.index = pd.to_datetime([D.datetime.strptime(date, "%Y-W%W") for date in df.index])
    df.index.name = "date"
    ds = df.unstack().to_xarray()
    ds.to_netcdf(f"data/ds_{dataset_code}.nc")
    print("Data downloaded")
else:
    print("Data already downloaded")

Data downloaded


## Population by age

In [10]:
dataset_code = "demo_pjan"
analyse_dataset(dataset_code)

['freq', 'unit', 'age', 'sex', 'geo']
freq
['A']
unit
['NR']
age
['TOTAL', 'Y_LT1', 'Y1', 'Y2', 'Y3', 'Y4', 'Y5', 'Y6', 'Y7', 'Y8', 'Y9', 'Y10', 'Y11', 'Y12', 'Y13', 'Y14', 'Y15', 'Y16', 'Y17', 'Y18', 'Y19', 'Y20', 'Y21', 'Y22', 'Y23', 'Y24', 'Y25', 'Y26', 'Y27', 'Y28', 'Y29', 'Y30', 'Y31', 'Y32', 'Y33', 'Y34', 'Y35', 'Y36', 'Y37', 'Y38', 'Y39', 'Y40', 'Y41', 'Y42', 'Y43', 'Y44', 'Y45', 'Y46', 'Y47', 'Y48', 'Y49', 'Y50', 'Y51', 'Y52', 'Y53', 'Y54', 'Y55', 'Y56', 'Y57', 'Y58', 'Y59', 'Y60', 'Y61', 'Y62', 'Y63', 'Y64', 'Y65', 'Y66', 'Y67', 'Y68', 'Y69', 'Y70', 'Y71', 'Y72', 'Y73', 'Y74', 'Y75', 'Y76', 'Y77', 'Y78', 'Y79', 'Y80', 'Y81', 'Y82', 'Y83', 'Y84', 'Y85', 'Y86', 'Y87', 'Y88', 'Y89', 'Y90', 'Y91', 'Y92', 'Y93', 'Y94', 'Y95', 'Y96', 'Y97', 'Y98', 'Y99', 'Y_OPEN', 'UNK']
sex
['T', 'M', 'F']
geo
['EU27_2020', 'EU28', 'EU27_2007', 'EA20', 'EA19', 'BE', 'BG', 'CZ', 'DK', 'DE', 'DE_TOT', 'EE', 'IE', 'EL', 'ES', 'FR', 'FX', 'HR', 'IT', 'CY', 'LV', 'LT', 'LU', 'HU', 'MT', 'NL', 'AT', 'PL'

In [11]:
dataset_code = "demo_pjan"

path = f"data/ds_{dataset_code}.nc"
if not os.path.exists(path):
    df = eurostat.get_data_df(dataset_code)
    df = df.set_index(["age", "sex", "geo\TIME_PERIOD"])
    df.index.names = ["age", "sex", "location"]
    df = df[[str(date) for date in list(np.arange(1960, 2024))]].T
    df.index = pd.to_datetime(df.index, format="%Y")
    df.index.name = "date"
    ds = df.unstack().to_xarray()
    ds.to_netcdf(f"data/ds_{dataset_code}.nc")
    print("Data downloaded")
else:
    print("Data already downloaded")

Data downloaded
