# Weekly ASMRs for Europe

Notebook to attempt to reproduce and understand the computation of __A__ge __S__tandardised __M__ortality __R__ates

Once this can be produced at the national level, I have all the data to then drill down by:
 - Sex
 - Age
 - NUTS3 regional code

In [1]:
import datetime as dt
import math
import numpy as np
import pandas as pd
import seaborn as sns
import sys
import warnings

%config Completer.use_jedi = False
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 500)

## 1. Import Data

To compute we need the following:
 - Weekly population data for the countries in Europe
 - Weekly Death figures for the countries in Europe
 - ESP Table: European Standard Population Table to do the 'Age Standardisation'

### 1a. Import the Weekly Death Data

In [2]:
# pull in all data
df_death = pd.read_csv('demo_r_mweek3.tsv', sep='\t')
# rename and fix id data column
df_death = df_death.rename(columns={"unit,sex,age,geo\\time": "Headings"})
# parse to 4 cols
df_death["Headings"] = df_death["Headings"].apply(lambda x: x.split(','))
df_death[['Unit', 'Sex', 'Age', 'Code']] = pd.DataFrame(df_death.Headings.tolist(), index= df_death.index)
df_death = df_death.drop(columns=['Headings', 'Unit'])

Select only codes with 2 digits as these denom countries and this cuts the data size down hugely

In [3]:
df_death = df_death[df_death.Code.str.len() == 2]
df_death.head(10)

Unnamed: 0,2021W99,2021W01,2020W99,2020W53,2020W52,2020W51,2020W50,2020W49,2020W48,2020W47,...,2000W07,2000W06,2000W05,2000W04,2000W03,2000W02,2000W01,Sex,Age,Code
0,:,:,:,:,:,:,:,:,:,:,...,:,:,:,:,:,:,:,F,TOTAL,AL
20,:,:,:,:,:,1175 p,1270 p,1318 p,1199 p,1169 p,...,914,967,1071,1076,1141,1062,1053,F,TOTAL,AT
68,:,:,:,1234 p,1336 p,1364 p,1385 p,1397 p,1474 p,1584 p,...,1103 p,1147 p,1172 p,1324 p,1369 p,1387 p,1434 p,F,TOTAL,BE
127,:,1211 p,:,1315 p,1494 p,1765 p,1964 p,2107 p,2114 p,1975 p,...,:,:,:,:,:,:,:,F,TOTAL,BG
164,:,:,:,:,:,:,938 p,1037 p,1008 p,1018 p,...,699,717,753,797,833,891,844,F,TOTAL,CH
199,:,:,:,:,:,47 p,62 p,69 p,52 p,46 p,...,:,:,:,:,:,:,:,F,TOTAL,CY
203,:,:,:,:,:,:,:,1455 p,1512 p,1648 p,...,:,:,:,:,:,:,:,F,TOTAL,CZ
230,:,561 p,:,624 p,591 p,649 p,517 p,536 p,558 p,500 p,...,:,:,:,:,:,:,:,F,TOTAL,DK
248,:,176 p,:,199 p,184 p,163 p,170 p,186 p,171 p,154 p,...,219,246,236,269,233,210,210,F,TOTAL,EE
256,:,:,:,:,:,:,:,1589 p,1589 p,1490 p,...,:,:,:,:,:,:,:,F,TOTAL,EL


In [4]:
# melt the table to purely col format, removing W99 columns
week_cols = [x for x in df_death.columns if 'W' in x]
#week_cols = [x for x in week_cols if any(str(y) in x for y in [2015, 2016, 2017, 2018, 2019, 2020, 2021]) and '99' not in x]
week_cols = [x for x in week_cols if '99' not in x]

# select only these cols
df_death = df_death[['Sex', 'Age', 'Code'] + week_cols]
# melt data
df_death = pd.melt(df_death, id_vars=['Sex', 'Age', 'Code'], var_name=['Week'], value_vars=week_cols, value_name='Deaths')
df_death.head()

Unnamed: 0,Sex,Age,Code,Week,Deaths
0,F,TOTAL,AL,2021W01,:
1,F,TOTAL,AT,2021W01,:
2,F,TOTAL,BE,2021W01,:
3,F,TOTAL,BG,2021W01,1211 p
4,F,TOTAL,CH,2021W01,:


Now we need to:
 - Convert the deaths column to a number type
 - Split the YYYYWWW column to 'Year' and 'Week' and as number format

In [5]:
# remove iregs from number col (e.g. p means provisional)
num_iregs = [":", "b", "p", "e", " "]
for ireg in num_iregs:
    df_death['Deaths'] = df_death['Deaths'].str.replace(ireg, '')
    
# split the col to Year and Week
df_death[['Year', 'Week']] = pd.DataFrame(df_death['Week'].str.split('W').tolist(), index=df_death.index)

# cast to numeric
num_cols = ['Deaths', 'Year', 'Week']
for col in num_cols:
    df_death[col] = pd.to_numeric(df_death[col])
    
df_death.head()

Unnamed: 0,Sex,Age,Code,Week,Deaths,Year
0,F,TOTAL,AL,1,,2021
1,F,TOTAL,AT,1,,2021
2,F,TOTAL,BE,1,,2021
3,F,TOTAL,BG,1,1211.0,2021
4,F,TOTAL,CH,1,,2021


In [6]:
df_death.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2286148 entries, 0 to 2286147
Data columns (total 6 columns):
 #   Column  Dtype  
---  ------  -----  
 0   Sex     object 
 1   Age     object 
 2   Code    object 
 3   Week    int64  
 4   Deaths  float64
 5   Year    int64  
dtypes: float64(1), int64(2), object(3)
memory usage: 104.7+ MB


Now we only select 2015 onwards

In [7]:
df_death = df_death[df_death.Year > 2014]

### 1b. Import the Population Data

In [103]:
df_pop = pd.read_csv("demo_r_pjangrp3.tsv", sep='\t')

In [104]:
# rename and fix id data column
df_pop = df_pop.rename(columns={"sex,unit,age,geo\\time": "Headings"})
# parse to 4 cols
df_pop["Headings"] = df_pop["Headings"].apply(lambda x: x.split(','))
df_pop[['Sex', 'Unit', 'Age', 'Code']] = pd.DataFrame(df_pop.Headings.tolist(), index= df_pop.index)
df_pop = df_pop.drop(columns=['Headings', 'Unit'])

In [105]:
df_pop = df_pop[df_pop.Code.str.len() == 2]
df_pop.head(10)

Unnamed: 0,2019,2018,2017,2016,2015,2014,Sex,Age,Code
0,1432833,1431715,1423050,1417141,1424597,1430827,F,TOTAL,AL
20,4501742,4483749,4460424,4427918,4384529,4352447,F,TOTAL,AT
68,5810693,5784371,5762455,5741853,5713206,5687048 b,F,TOTAL,BE
127,3604338,3627625,3651881,3676607,3700183,3720732,F,TOTAL,BG
164,4307406,4277696,4246113,4205655,4163786,4117540,F,TOTAL,CH
199,448068,442728,438139,435627,435183,440462,F,TOTAL,CY
203,5405606,5390264,5378133,5367513,5361348,5350039,F,TOTAL,CZ
227,42052522,41948786,41824535,41661561,41362080,41210540,F,TOTAL,DE
683,2917008,2904717,2888591,2869364,2848701,2834956,F,TOTAL,DK
701,699185,698049,698097,699236,700166 b,700900,F,TOTAL,EE


In [106]:
df_pop = pd.melt(df_pop, id_vars=['Sex', 'Age', 'Code'], var_name=['Year'], value_vars=['2014 ', '2015 ', '2016 ', '2017 ', '2018 ', '2019 '], value_name='Pop')

In [107]:
# remove iregs from number col (e.g. p means provisional)
num_iregs = [":", "b", "p", "e", " "]
for ireg in num_iregs:
    df_pop.Pop = df_pop.Pop.str.replace(ireg, "")

# cast to numeric
num_cols = ['Pop', 'Year']
for col in num_cols:
    df_pop[col] = pd.to_numeric(df_pop[col])
    
df_pop.head()

Unnamed: 0,Sex,Age,Code,Year,Pop
0,F,TOTAL,AL,2014,1430827.0
1,F,TOTAL,AT,2014,4352447.0
2,F,TOTAL,BE,2014,5687048.0
3,F,TOTAL,BG,2014,3720732.0
4,F,TOTAL,CH,2014,4117540.0


### 1c. Create Weekly Population Data Using Linear and Constant Interp

We only have annual 1st Jan estimates, so we need to:
 - Create linear interp between for weekly data
 - Expand out beyond 2019 to 2020 and 2021 using constant interp

In [108]:
# df to map week to interpolation between previous 1st Jan pop and next 1st Jan pop
df_weeks = pd.DataFrame({'Week': [x for x in range(1,54)], 'Portion': [min((x-1)/52, 1) for x in range(1,54)]})

In [109]:
# create cross join so we form a weekly key
df_pop2 = df_pop.merge(df_weeks, how='cross')
df_pop2 = df_pop2.rename(columns={'Pop': 'PrevPop'})
df_pop2.head()

Unnamed: 0,Sex,Age,Code,Year,PrevPop,Week,Portion
0,F,TOTAL,AL,2014,1430827.0,1,0.0
1,F,TOTAL,AL,2014,1430827.0,2,0.019231
2,F,TOTAL,AL,2014,1430827.0,3,0.038462
3,F,TOTAL,AL,2014,1430827.0,4,0.057692
4,F,TOTAL,AL,2014,1430827.0,5,0.076923


In [110]:
# create a df with the shifted pop i.e. Year 2014 will show pop for 2015 and rename
#df_pop_shift = df_pop[['Sex', 'Age', 'Code', 'Year', 'Pop']].groupby(['Sex', 'Age', 'Code', 'Year']).sum().shift(-1).reset_index()
df_pop_shift = df_pop.copy()
df_pop_shift['Year'] = df_pop_shift['Year'] - 1
df_pop_shift = df_pop_shift.rename(columns={'Pop': 'NextPop'})
df_pop_shift.head()

Unnamed: 0,Sex,Age,Code,Year,NextPop
0,F,TOTAL,AL,2013,1430827.0
1,F,TOTAL,AT,2013,4352447.0
2,F,TOTAL,BE,2013,5687048.0
3,F,TOTAL,BG,2013,3720732.0
4,F,TOTAL,CH,2013,4117540.0


In [114]:
# merge this on so we have PrevPop, NextPop and linear blend factor
# can use this to compute interp'ed pop
df_pop = pd.merge(left=df_pop2, right=df_pop_shift, how='left', on=['Sex', 'Age', 'Code', 'Year'])
# pop NextPop with PrevPop if na
df_pop['NextPop'] = np.where(df_pop['NextPop'].isna(), df_pop['PrevPop'], df_pop['NextPop'])

# create const interp of 2019 for 2020 and 2021
df_const_interp_2020 = df_pop[df_pop.Year == 2019]
df_const_interp_2020['Year'] = 2020
df_const_interp_2021 = df_pop[df_pop.Year == 2019]
df_const_interp_2021['Year'] = 2021

df_pop = pd.concat([df_pop, df_const_interp_2020, df_const_interp_2021])
df_pop.head()

Unnamed: 0,Sex,Age,Code,Year,PrevPop,Week,Portion,NextPop
0,F,TOTAL,AL,2014,1430827.0,1,0.0,1424597.0
1,F,TOTAL,AL,2014,1430827.0,2,0.019231,1424597.0
2,F,TOTAL,AL,2014,1430827.0,3,0.038462,1424597.0
3,F,TOTAL,AL,2014,1430827.0,4,0.057692,1424597.0
4,F,TOTAL,AL,2014,1430827.0,5,0.076923,1424597.0


In [115]:
# create interp'ed pop
df_pop['Pop'] = df_pop['PrevPop'] * (1 - df_pop['Portion']) + df_pop['NextPop'] * df_pop['Portion']

### 1d. Join the Death and Population Data

In [119]:
df = pd.merge(left=df_death, right=df_pop, how='left', on=['Sex', 'Age', 'Code', 'Year', 'Week'])
df.head()

Unnamed: 0,Sex,Age,Code,Week,Deaths,Year,PrevPop,Portion,NextPop,Pop
0,F,TOTAL,AL,1,,2021,1432833.0,0.0,1432833.0,1432833.0
1,F,TOTAL,AT,1,,2021,4501742.0,0.0,4501742.0,4501742.0
2,F,TOTAL,BE,1,,2021,5810693.0,0.0,5810693.0,5810693.0
3,F,TOTAL,BG,1,1211.0,2021,3604338.0,0.0,3604338.0,3604338.0
4,F,TOTAL,CH,1,,2021,4307406.0,0.0,4307406.0,4307406.0


### 1e. Import ESP Data

As explained [here](https://www.isdscotland.org/products-and-services/gpd-support/population/standard-populations/), we need population standardised stats:

_"Different countries across Europe have different population structures - some have higher percentages of young people, whilst others have a greater proportion of old people. Therefore in order to compare more accurately information on the rates of illness and death it is helpful to adjust the figures for each country to show what would be happening if each country had the same population structure. The European Standard Population (ESP) is a theoretical population adding up to a total of 100,000 which is widely used to produce European age-standardised rates or EASRs."_

In [123]:
df_esp = pd.read_csv('esp.txt', sep='\t')

In [126]:
df = pd.merge(left=df, right=df_esp, how='left', on='Age')

In [128]:
df['ASMR'] = df['Deaths'] / df['Pop'] * df['ESP']

In [144]:
df[(~df.Age.isin(['TOTAL', 'UNK'])) & (df.Sex == 'T')][['Code', 'Year', 'Week', 'ASMR']].groupby(['Code', 'Year', 'Week']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ASMR
Code,Year,Week,Unnamed: 3_level_1
AL,2015,1,11.885110
AL,2015,2,22.364058
AL,2015,3,18.337410
AL,2015,4,22.461249
AL,2015,5,19.433535
...,...,...,...
UK,2020,50,21.951750
UK,2020,51,23.064653
UK,2020,52,0.000000
UK,2020,53,0.000000
