In [1]:
import pandas as pd
import numpy as np
import altair as alt
import locale

alt.data_transformers.enable('data_server')

#alt.data_transformers.enable('default')
#alt.data_transformers.disable_max_rows() # Warning: remove this after testing

locale.setlocale(locale.LC_ALL, 'sv_SE')
%config InlineBackend.figure_format = 'retina'

In [13]:
location="https://scb.se/hitta-statistik/statistik-efter-amne/befolkning/befolkningens-sammansattning/befolkningsstatistik/pong/tabell-och-diagram/preliminar-statistik-over-doda/"

data = pd.read_excel(location, sheet_name="Tabell 3", skiprows=5, header=1, usecols='A:W', na_values="..")
data['date'] = pd.to_datetime(data['Unnamed: 0'] + data['Year'].apply(str), format='%d %B%Y', errors='coerce')
data.drop(['Unnamed: 0', 'Year'], axis=1, inplace=True)
data.drop(data.index[[0,1]], inplace=True)
data.drop([850, 851, 852], inplace=True) # Drop summary rows

# This should really be done by combining column names with the first row. Brute force will do for now
rename_columns = {'Söderman-':'Södermanland', 'Öster-': 'Östergötland', 'Västra': 'Västra Götaland', 'Västman-': 'Västmanland', 'Västernorr-': 'Västernorrland', 'Väster-': 'Västerbotten', 'Norr-': 'Norrbotten'}
data.rename(columns=rename_columns, inplace=True)
data=data.set_index('date').stack().reset_index()
data.rename(columns={'level_1': 'county', 0: 'deaths'}, inplace=True)
data.deaths = data.deaths.astype(int)
data.county = data.county.astype('category')
weekly = data.groupby('county').resample('W', on='date').sum().reset_index()

In [22]:
df = weekly

selection = alt.selection_multi(fields=['county'], bind='legend')

alt.Chart(df, height=600, width=800).mark_bar().encode(
    alt.X('yearmonthdate(date):O'),
    alt.Y('sum(deaths):Q'),
    color='county:N',
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection(selection)

In [18]:
county_list = ['Skåne', 'Västra Götaland', 'Östergötland', 'Stockholm']

df = weekly
df = df[df['county'].isin(county_list)]

# Don't show last week due to lag in reporting
last_date_to_show = str(df.date.max() - pd.Timedelta('7d'))
current_filter = f'year(datum.date) == 2020 & datetime(datum.date) < datetime("{last_date_to_show}")'

base = alt.Chart(df, height=600, width=800, title="Weekly Deaths in Selected Counties").encode(
    color='county',
)

history = base.mark_point(opacity=0.4).encode(
    x='monthdate(date)',
    y='deaths',
).transform_filter(
    'year(datum.date) < 2020'
)

current = base.mark_line(point=True, interpolate='step-before').encode(
    x='monthdate(date)',
    y='deaths',
).transform_filter(
    current_filter
)

rule = base.mark_rule(opacity=0.5).encode(
    y='average(deaths)',
    size=alt.value(5)
).transform_filter(
    'year(datum.date) < 2020'
)

history + current + rule

In [19]:
df = data[data.date.notna()]

last_date_for_mean_calculation = str(df.date.max() - pd.Timedelta('7d'))
current = f'year(datum.date) == 2020 & datetime(datum.date) < datetime("{last_date_for_mean_calculation}")'
history_2019 = 'year(datum.date) == 2019'
history_2018 = 'year(datum.date) == 2018'

base = alt.Chart(
    df,
    height=600,
    width=800,
    title="Daily Deaths in Sweden 2018-2020"
).transform_aggregate(
    sum_deaths='sum(deaths)',
    groupby=['date']
).encode(
    x='monthdate(date)',
    color=alt.Color('year(date):N', title='Year', scale=alt.Scale(range=['#d9d9d9', '#b9b9b9', '#e6550d']))
)

points = base.mark_point().encode(
    y='sum_deaths:Q',
)

# https://vega.github.io/vega/docs/expressions/#datetime-functions
def plot_mean(time_filter):
    mean = base.transform_filter(
        time_filter
    ).transform_window(
        rolling_mean='mean(sum_deaths)',
        frame=[-10, 10]
    ).mark_line(
        size=4
    ).encode(
        y=alt.Y('rolling_mean:Q'),
    )

    return mean

plot_mean(history_2018) + plot_mean(history_2019) + points + plot_mean(current)