In [1]:
import altair as alt
import pandas as pd
import numpy as np
import datetime as dt
from covid import dataimport

In [2]:
fhm_data, labels = dataimport.get_lag_data()
local_url = 'data/fhm.json'
fhm_data.to_json(local_url, orient='records')
url = "https://raw.githubusercontent.com/morberg/covid-notebook/master/data/fhm.json"


# Covid-19 graphs for Sweden

## Covid-19 death date, reporting date, and lag

Interactive graph showing the number of reported Covid-19 deaths in Sweden. Select dates in the bottom *Publication Date* graph by clicking and dragging. Gray bars is a prediction based on average lag for the past three weeks. See [Adam Altmejd's repo](https://github.com/adamaltmejd/covid) for details.

In [3]:
labels.reverse() # Makes the color scheme look better
total_deaths = fhm_data[fhm_data.publication_date == fhm_data.publication_date.max()].N.sum()
print(f'Total deaths reported by {fhm_data.publication_date.max():%A, %B %d}: {total_deaths}')

Total deaths reported by Wednesday, October 28: 5927


In [167]:
domain = [
    '2020-03-13',
    (fhm_data.date.max() + pd.Timedelta("1D")).date().isoformat(),
]

brush = alt.selection(type="interval", encodings=["x"])

deceased = (
    alt.Chart(url, height=400)
    .mark_bar()
    .encode(
        x=alt.X(
            "yearmonthdate(date):T",
            title="Date Deceased",
            scale=alt.Scale(domain=domain),
            axis=alt.Axis(grid=False),
        ),
        y=alt.Y(
            "sum(n_diff):Q",
            title="Deceased",
            scale=alt.Scale(domain=[0, fhm_data.N.max()]),
            axis=alt.Axis(gridDash=[0.2]),
        ),
        order=alt.Order(
            # Sort the segments of the bars by this field
            "days_since_publication:N",
            sort="ascending",
        ),
        color=alt.Color(
            "lag:O",
            title="Lag in Days",
            sort=labels,
#            scale=alt.Scale(scheme="category20c"),
        ),
    )
    .transform_filter(brush)
).transform_filter(
    'datum.date >= datetime(2020,2,13)'
)

reported = (
    alt.Chart(url, height=100)
    .mark_bar()
    .encode(
        x=alt.X(
            "yearmonthdate(publication_date):T",
            title="Publication Date",
            scale=alt.Scale(domain=domain),
            axis=alt.Axis(grid=False),
        ),
        y=alt.Y("sum(n_diff):Q", title="Reported Deaths", axis=alt.Axis(gridDash=[0.2])),
        tooltip=[
            alt.Tooltip("sum(n_diff):Q", title="Reported Deaths"),
            "publication_date:T",
        ],
    )
    .add_selection(brush)
)

legend_vert = (
    alt.Chart(url, width=50, title="Lag in Days")
    .mark_bar()
    .encode(
        x=alt.X("sum(n_diff):Q", title="Reported Deaths", axis=alt.Axis(gridDash=[0.2])),
        y=alt.Y("lag:O", title="", sort=labels),
        color=alt.Color("lag:O", sort=labels, legend=None),
    )
    .transform_filter(brush)
)

text = (
    alt.Chart(url)
    .transform_filter(brush)
    .transform_aggregate(sum_deaths="sum(n_diff):Q")
    .transform_calculate(text="Total deaths: " + alt.datum.sum_deaths)
    .mark_text(align="right", x=692, y=19, fontSize=18)
    .encode(text="text:N")
)

prediction = (
    alt.Chart(url)
    .mark_bar(color="#E8E8E8")
    .encode(
        x="yearmonthdate(date):T",
        y=alt.Y("prediction:Q", aggregate={"argmax": "publication_date"}),
    )
    .transform_filter(brush)
)

((prediction + deceased + text).properties(width=750) & reported.properties(width=750) | legend_vert).configure_view(strokeWidth=0)

## Deaths for the last five weeks

Same info as in the graph above showing only death dates for the last five weeks.

In [173]:
# Show the last 35 days of death dates
cutoff = fhm_data.date.max().timetuple().tm_yday - 35

((deceased.mark_bar().encode(
    x=alt.X(
        "yearmonthdate(date):T",
        title="Date Deceased",
        scale=alt.Scale(),
        axis=alt.Axis(grid=False),
    ),
    y=alt.Y(
        "sum(n_diff):Q",
        title="Deceased",
        scale=alt.Scale(),
        axis=alt.Axis(tickMinStep=1, grid=False),
    ),
    color=alt.Color(
        "lag:O",
        title="Lag in Days",
        sort=labels,
    ),
 )
 + text.mark_text(align="right", x=585, y=30, fontSize=18)).properties(width=600)
 & reported.encode(
        x=alt.X(
            "yearmonthdate(publication_date):T",
            title="Publication Date",
            scale=alt.Scale(),
            axis=alt.Axis(grid=False)
        ),
        y=alt.Y(
            "sum(n_diff):Q",
            title="Reported Deaths",
            axis=alt.Axis(gridDash=[0.1])
        ),
).properties(width=600)
| legend_vert
).transform_filter(
    f'dayofyear(datum.date) > {cutoff}'    
).configure_view(strokeWidth=0)

## Weekly reported deaths

Number of deaths reported each week with each publication weekday shown individually. Since week 25 there are no numbers published on weekends. Since September 14 numbers are only published Tuesdays to Fridays. 

In [166]:
week_order = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

base = alt.Chart(url, title={
            'text': 'Weekly Reported Covid-19  Deaths in Sweden',
            'subtitle': f'Updated on {fhm_data.date.max().date():%A, %B %d}'}).mark_bar().encode(
    x=alt.X('publication_week:O', title='Publication Week', axis=alt.Axis(offset=-30)),
    y=alt.Y('sum(n_diff):Q', stack='zero', axis=None),
)

bars = base.mark_bar().encode(
    order=alt.Order('publication_date:N'),
    color=alt.Color(
        "day(publication_date):N", title="Publication Day", sort=week_order
    ),
    tooltip=[
        alt.Tooltip("sum(n_diff):Q", title="Reported Deaths"),
        "publication_date:T",
    ],
)

text = base.mark_text(color='black', baseline='bottom', fontSize=9).encode(
    text=alt.Text('sum(n_diff):Q')
)

(bars + text).properties(width=700, height=400).configure_view(strokeWidth=0).configure_legend(orient='top-right')

## Why is the total number different from official numbers?

Total death toll in these graphs differ from official total numbers since:

* Old data points with few deaths have been trimmed to make graphs more legible
* Some death dates are unknown and are not included in these numbers
