# Manual - Netherlands

In [None]:
import datetime
import itertools

import numpy as np
import pandas as pd

In [None]:
from weekly_mort.core import DOWNLOAD_DIR_BASE, PROCESSED_DIR_BASE

## Manual steps

When I was writing this notebook (2020/04/21), the API was not working. To update the Netherlands data, do the following steps:

1. Go to [the Dutch statistics bureau website for the number of deaths](https://opendata.cbs.nl/portal.html?_la=en&_catalog=CBS&tableId=70895ENG&_theme=1052)
2. Click on `Dataset for Graphical Representation`, then `Download CSV` (all periods are auto-selected for me already).
3. Move the .csv file to `_downloads/Netherlands/all_deaths.csv`
4. Update the LAST_MODIFIED cell below with the latest date, then run all cells.

In [None]:
#export
LAST_MODIFIED = datetime.date(2020, 4, 21)
LAST_MODIFIED

datetime.date(2020, 4, 21)

## Process Data

In [None]:
down_dir, proc_dir = create_country_dirs('Netherlands')

In [None]:
df = pd.read_csv(down_dir/'all_deaths.csv', sep=';')

In [None]:
df.head()

Unnamed: 0,ID,Sex,Age31December,Periods,Deaths_1
0,0,T001038,10000,1995X000,394.0
1,1,T001038,10000,1995W101,2719.0
2,2,T001038,10000,1995W102,2823.0
3,3,T001038,10000,1995W103,2609.0
4,4,T001038,10000,1995W104,2664.0


In [None]:
df.rename({'Age31December':'Age', 'Deaths_1':'Deaths'}, axis=1, inplace=True)

In [None]:
sex_map = {'3000': 'Male', '4000': 'Female', 'T001038': 'Total'}

In [None]:
age_map = {'10000': 'Total', '21700': '80+', '41700': '0-64', '53950': '65-79'}

In [None]:
df['Sex'] = df.Sex.apply(lambda x: sex_map[x.strip()])
df['Age'] = df.Age.apply(lambda x: age_map[str(x)])

In [None]:
df.head()

Unnamed: 0,ID,Sex,Age,Periods,Deaths
0,0,Total,Total,1995X000,394.0
1,1,Total,Total,1995W101,2719.0
2,2,Total,Total,1995W102,2823.0
3,3,Total,Total,1995W103,2609.0
4,4,Total,Total,1995W104,2664.0


In [None]:
df['Year'] = df.Periods.apply(lambda x: int(x[:4]))

In [None]:
df = df[df.Year >= 2017]

In [None]:
assert all(df[df.Year < 2020].Year.value_counts() == 648)

The `JJ` coded values represent the entire year.

In [None]:
old_len = len(df)
df = df[df.Periods.apply(lambda x: 'JJ' not in x)]
assert old_len == len(df) + 36

In [None]:
df['Week'] = df.Periods.apply(lambda x: int(x[-2:]))

In [None]:
all(df.Week.value_counts().isin([48, 36, 24, 12]))

True

In [None]:
assert all(df[np.any(df.isna(), 1)].Year == 2020)
assert all(df[np.any(df.isna(), 1)].Week >= 15)

In [None]:
df = df[~np.any(df.isna(), 1)]

In [None]:
assert all(df.Deaths.apply(int).apply(float) == df.Deaths)

In [None]:
df['Deaths'] = df.Deaths.apply(int)

In [None]:
df.head()

Unnamed: 0,ID,Sex,Age,Periods,Deaths,Year,Week
1189,1189,Total,Total,2017X000,469,2017,0
1190,1190,Total,Total,2017W101,3568,2017,1
1191,1191,Total,Total,2017W102,3637,2017,2
1192,1192,Total,Total,2017W103,3487,2017,3
1193,1193,Total,Total,2017W104,3626,2017,4


In [None]:
cols = ['Week', 'Year', 'Age', 'Sex', 'Deaths']

In [None]:
df = df[cols]

In [None]:
df.head()

Unnamed: 0,Week,Year,Age,Sex,Deaths
1189,0,2017,Total,Total,469
1190,1,2017,Total,Total,3568
1191,2,2017,Total,Total,3637
1192,3,2017,Total,Total,3487
1193,4,2017,Total,Total,3626


In [None]:
df.to_csv(proc_dir/'deaths.csv', index=False)

## Save Week Dates

The Netherlands reports according to a standard week.

In [None]:
STANDARD_WEEK.to_csv(proc_dir/'week_dates.csv', index=False)