In [2]:
import pandas as pd
import numpy as np
import altair as alt
import locale

alt.data_transformers.enable('data_server')

#alt.data_transformers.enable('default')
#alt.data_transformers.disable_max_rows() # Warning: remove this after testing

locale.setlocale(locale.LC_ALL, 'sv_SE')
%config InlineBackend.figure_format = 'retina'

In [3]:
location="https://scb.se/hitta-statistik/statistik-efter-amne/befolkning/befolkningens-sammansattning/befolkningsstatistik/pong/tabell-och-diagram/preliminar-statistik-over-doda/"
#location="/Users/niklas/Downloads/2020-04-24---preliminar-statistik-over-doda-inkl-eng.xlsx" ## Work with local copy, remove before commiting to github

data = pd.read_excel(location, sheet_name="Tabell 3", skiprows=5, header=1, usecols='A:W', na_values="..")
data['date'] = pd.to_datetime(data['Unnamed: 0'] + data['Year'].apply(str), format='%d %B%Y', errors='coerce')
data.drop(['Unnamed: 0', 'Year'], axis=1, inplace=True)
data.drop(data.index[[0,1]], inplace=True)
data.drop([850, 851, 852], inplace=True) # Drop summary rows

# This should really be done by combining column names with the first row. Brute force will do for now
rename_columns = {'Söderman-':'Södermanland', 'Öster-': 'Östergötland', 'Västra': 'Västra Götaland', 'Västman-': 'Västmanland', 'Västernorr-': 'Västernorrland', 'Väster-': 'Västerbotten', 'Norr-': 'Norrbotten'}
data.rename(columns=rename_columns, inplace=True)
data=data.set_index('date').stack().reset_index()
data.rename(columns={'level_1': 'county', 0: 'deaths'}, inplace=True)
data.deaths = data.deaths.astype(int)
data.county = data.county.astype('category')

In [6]:
df = data[data.date.notna()]

alt.Chart(df, height=300, width=800).mark_area().encode(
#    alt.X='monthdate(date)',
    alt.X('monthdate(date):T'),
    y='sum(deaths)',
    color='county:N',
    row='year(date)'
)

In [7]:
county_list = ['Skåne', 'Stockholm', 'Västra Götaland', 'Norrbotten']

df = data[data['date'] > '2010-01-01']
df = df[df['county'].isin(county_list)]

df_history = df[df['date'] < '2020-01-01']
df_current = df[df['date'] >= '2020-01-01']
df_current = df_current[df_current['date'] < '2020-04-15']

history = alt.Chart(df_history, height=600, width=800).mark_point(opacity=0.3, size=15).encode(
    x=alt.X('monthdate(date)'),
    y=alt.Y('deaths'),
    color='county',
)

current = alt.Chart(df_current).mark_line().encode(
    x=alt.X('monthdate(date)'),
    y=alt.Y('deaths'),
    color='county',
)

history + current

In [201]:
df = data[data.date.notna()]

last_date_for_mean_calculation = str(df.date.max() - pd.Timedelta('7d'))
current = f'year(datum.date) == 2020 & datetime(datum.date) < datetime("{last_date_for_mean_calculation}")'
history = 'year(datum.date) < 2020'

base = alt.Chart(
    df,
    height=600,
    width=800,
    title="Daily Deaths in Sweden 2018-2020"
).transform_aggregate(
    sum_deaths='sum(deaths)',
    groupby=['date']
).mark_point().encode(
    x='monthdate(date)',
    color=alt.Color('year(date):N', title='Year', scale=alt.Scale(range=['#d9d9d9', '#b9b9b9', '#e6550d']))
)

points = base.encode(
    y='sum_deaths:Q',
)

# https://vega.github.io/vega/docs/expressions/#datetime-functions
def plot_mean(time_filter):
    mean = base.transform_filter(
        time_filter
    ).transform_window(
        rolling_mean='mean(sum_deaths)',
        frame=[-10, 10]
    ).mark_line(
        size=4
    ).encode(
        y=alt.Y('rolling_mean:Q'),
    )

    return mean

points + plot_mean(history) + plot_mean(current)