In [1]:
import pandas as pd
import numpy as np
import altair as alt
import locale

# Needed for datasets with more than 5k entries. Does not work in all cloud environments.
#alt.data_transformers.enable('data_server')

# Uncomment these if 'data_server' is not working in your environment
# Might result in large notebooks, see https://altair-viz.github.io/user_guide/faq.html#why-does-altair-lead-to-such-extremely-large-notebooks
alt.data_transformers.enable('default') #
alt.data_transformers.disable_max_rows() # Warning: remove this after testing

locale.setlocale(locale.LC_ALL, 'sv_SE') # Needed to parse date strings from SCB
%config InlineBackend.figure_format = 'retina'

In [2]:
location="https://scb.se/hitta-statistik/statistik-efter-amne/befolkning/befolkningens-sammansattning/befolkningsstatistik/pong/tabell-och-diagram/preliminar-statistik-over-doda/"

source = pd.read_excel(location, sheet_name="Tabell 3", skiprows=5, header=1, usecols='A:W', na_values="..")
source['date'] = pd.to_datetime(source['Unnamed: 0'] + source['Year'].apply(str), format='%d %B%Y', errors='coerce')
source.drop(['Unnamed: 0', 'Year'], axis=1, inplace=True)
source.drop(source.index[[0,1]], inplace=True)
source.drop([850, 851, 852], inplace=True) # Drop summary rows

# This should really be done by combining column names with the first row. Brute force will do for now
rename_columns = {'Söderman-':'Södermanland', 'Öster-': 'Östergötland', 'Västra': 'Västra Götaland', 'Västman-': 'Västmanland', 'Västernorr-': 'Västernorrland', 'Väster-': 'Västerbotten', 'Norr-': 'Norrbotten'}
source.rename(columns=rename_columns, inplace=True)
source=source.set_index('date').stack().reset_index()
source.rename(columns={'level_1': 'county', 0: 'deaths'}, inplace=True)
source.deaths = source.deaths.astype(int)
source.county = source.county.astype('category')
weekly = source.groupby('county').resample('W', on='date').sum().reset_index()

In [3]:
df = weekly

selection = alt.selection_multi(fields=['county'], bind='legend')

alt.Chart(df, height=600, width=800).mark_bar().encode(
    alt.X('yearmonthdate(date):O'),
    alt.Y('sum(deaths):Q'),
    color='county:N',
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection(selection)

In [4]:
df = weekly

# Don't show last week due to lag in reporting
#last_date_to_show = str(df.date.max() - pd.Timedelta('7d'))
# This filter will not work when saving to HTML and rendering in browser. Will debug later if at all.
#current_filter = f'year(datum.date) == 2020 & datetime(datum.date) < datetime("{last_date_to_show}")'

# Don't show last week due to lag in reporting
last_week_to_show = df.date.max().week - 1
current_filter = f'year(datum.date) == 2020 & week(datum.date) < {last_week_to_show}'

click = alt.selection_multi(encodings=['color'])

base = alt.Chart(df, height=600, width=800, title="Weekly Deaths in 2020 Compared to Weekly Average for 2018-2019").encode(
    color=alt.Color('county', legend=None, scale=alt.Scale(scheme='category20')),
).transform_filter(
    click
)

history = base.mark_point(opacity=0.4).encode(
    alt.X('monthdate(date)', title='Date'),
    y='deaths',
).transform_filter(
    'year(datum.date) < 2020'
)

current = base.mark_line(point=True, interpolate='step-before').encode(
    x='monthdate(date)',
    y='deaths',
    tooltip=['county', 'deaths', 'date']
).transform_filter(
    current_filter
)

rule = base.mark_rule(opacity=0.5).encode(
    y=alt.Y('average(deaths)', title='Deaths'),
    size=alt.value(5)
).transform_filter(
    'year(datum.date) < 2020'
)

legend = alt.Chart(df).mark_bar().encode(
    y=alt.Y('county:N', axis=alt.Axis(orient='right'), title=None),
    color=alt.condition(click, 'county', alt.value('lightgray')),
).add_selection(
    click
)

chart = history + rule + current | legend
chart.save('charts/county.html')
chart

In [5]:
df=source[source.date.notna()]

last_date_for_mean_calculation = str(df.date.max() - pd.Timedelta('7d'))
current = f'year(datum.date) == 2020 & datetime(datum.date) < datetime("{last_date_for_mean_calculation}")'
history_2019 = 'year(datum.date) == 2019'
history_2018 = 'year(datum.date) == 2018'

base = alt.Chart(
    df,
    height=600,
    width=800,
    title="Daily Deaths in Sweden 2018-2020"
).transform_aggregate(
    sum_deaths='sum(deaths)',
    groupby=['date']
).encode(
    x='monthdate(date)',
    color=alt.Color('year(date):N', title='Year', scale=alt.Scale(range=['#d9d9d9', '#b9b9b9', '#e6550d']))
)

points = base.mark_point().encode(
    y='sum_deaths:Q',
)

# https://vega.github.io/vega/docs/expressions/#datetime-functions
def plot_mean(time_filter):
    mean = base.transform_filter(
        time_filter
    ).transform_window(
        rolling_mean='mean(sum_deaths)',
        frame=[-10, 10]
    ).mark_line(
        size=4
    ).encode(
        y=alt.Y('rolling_mean:Q'),
    )

    return mean

chart = plot_mean(history_2018) + plot_mean(history_2019) + points + plot_mean(current)
chart.save('charts/daily-deaths.html')
chart

In [6]:
df = weekly

click = alt.selection_multi(encodings=['color'])
brush = alt.selection_interval(encodings=['x'])

bars = alt.Chart(
    df,
    width=800,
    title="Weekly Deaths in Sweden 2018-2020"
).mark_bar(
).encode(
    alt.X('yearmonthdate(date):O', title='Date'),
    alt.Y('sum(deaths):Q', title="Weekly Deaths"),
    color=alt.Color('county:N', legend=None),
    opacity=alt.condition(brush, alt.value(1), alt.value(0.2)),
    tooltip=['county', 'deaths', 'date'],
).add_selection(
    brush
).transform_filter(
    click
).interactive()

county_list = alt.Chart(
    df,
    width=800,
    height=100
).mark_bar(
).encode(
    x=alt.X('county:N', title=''),
    y=alt.Y('sum(deaths)', title='Total Deaths'),
    opacity=alt.condition(click, alt.value(1), alt.value(0.2)),
    color='county',
    tooltip=['sum(deaths)']
).add_selection(
    click
).transform_filter(
    brush
)

legend = alt.Chart(df, height=300).mark_point().encode(
    y=alt.Y('county:N', axis=alt.Axis(orient='right'), title=None),
    color=alt.condition(click, 'county', alt.value('lightgray')),
).add_selection(
    click
).interactive()

chart = bars & county_list | legend
chart.save('charts/weekly-deaths.html')
chart

In [7]:
df=weekly[weekly['county'] != 'Stockholm']

last_date_to_use = str(df.date.max() - pd.Timedelta('7d'))
filter = f'year(datum.date) == 2020 & datetime(datum.date) < datetime("{last_date_to_use}")'

base = alt.Chart(df, title='Weekly Deaths in Sweden excl. Stockholm 2018-2020').mark_line(opacity=0.4).encode(
    x='monthdate(date)',
    y='sum(deaths)',
    color='year(date):N'
)

history = base.encode().transform_filter(
    "year(datum.date) < 2020"
)

current = base.encode().mark_line(opacity=1).transform_filter(
    filter
)

history + current

In [8]:
df=source[source.date.notna()]

last_date_for_mean_calculation = str(df.date.max() - pd.Timedelta('7d'))
current = f'year(datum.date) == 2020 & datetime(datum.date) < datetime("{last_date_for_mean_calculation}")'
history_2019 = 'year(datum.date) == 2019'
history_2018 = 'year(datum.date) == 2018'

click = alt.selection_multi(encodings=['color'])

base = alt.Chart(
    df,
    height=600,
    width=800,
    title="Daily Deaths in Sweden 2018-2020"
).transform_aggregate(
    sum_deaths='sum(deaths)',
    groupby=['date']
).encode(
    x='monthdate(date)',
)

points = base.mark_point().encode(
    y='sum_deaths:Q',
    color=alt.Color('year(date):N', title='Year', scale=alt.Scale(range=['#d9d9d9', '#b9b9b9', '#e6550d']))
)

legend = alt.Chart(df, height=400).mark_point().encode(
    y=alt.Y('county:N', axis=alt.Axis(orient='right'), title=None),
#    color=alt.Color('county:N', scale=alt.Scale(scheme='category20')),
#    color=alt.condition(click, 'county', alt.value('lightgray')),
#    color=alt.Color('county', legend=None, scale=alt.Scale(range=['#e6550d']))
    color=alt.Color('year(date):N', legend=None, scale=alt.Scale(range=['#e6550d']))

).add_selection(
    click
).interactive()


# https://vega.github.io/vega/docs/expressions/#datetime-functions
def plot_mean(time_filter):
    mean = base.transform_filter(
        time_filter
    ).transform_window(
        rolling_mean='mean(sum_deaths)',
        frame=[-10, 10]
    ).mark_line(
        size=4
    ).encode(
        y=alt.Y('rolling_mean:Q'),
        color=alt.Color('year(date):N', title='Year', scale=alt.Scale(range=['#d9d9d9', '#b9b9b9', '#e6550d']))
    )

    return mean

chart = plot_mean(history_2018) + plot_mean(history_2019) + points + plot_mean(current) #| legend
#chart.save('charts/daily-deaths.html')
#chart = legend
chart

In [11]:
date_cols = ['date', 'publication_date']
data_fhm = pd.read_csv('https://raw.githubusercontent.com/adamaltmejd/covid/master/data/covid_deaths_latest.csv', parse_dates=date_cols)

df = source[source.date.notna()].groupby('date').sum()
df2 = data_fhm.set_index('date').max(level='date')[['N']]

result = pd.concat([df, df2], axis=1, sort=False)
result = result[result.index.notna()]
result.columns=['non-covid', 'covid']
result['non-covid'] = result['non-covid'].fillna(0) - result['covid'].fillna(0)
result = result.reset_index()
result = result.set_index('date').stack().reset_index()
result.columns = ['date', 'death_cause', 'deaths']
result

Unnamed: 0,date,death_cause,deaths
0,2018-01-01,non-covid,273.0
1,2018-01-02,non-covid,307.0
2,2018-01-03,non-covid,292.0
3,2018-01-04,non-covid,276.0
4,2018-01-05,non-covid,288.0
...,...,...,...
913,2020-05-03,covid,31.0
914,2020-05-04,non-covid,-19.0
915,2020-05-04,covid,19.0
916,2020-05-05,non-covid,-4.0


In [12]:
df = result[result.date < '2020-04-20']
#df = df[df.death_cause == 'non-covid']

normal = alt.Chart(df, height=600, width=800).mark_bar().encode(
    x='date',
    y=alt.Y('deaths', stack=True),
    color=alt.Color('death_cause', scale=alt.Scale(scheme='dark2'), sort='descending'),
    tooltip=['date']
).transform_filter(
    'year(datum.date) == 2020'
).interactive()

normal