In [1]:
import datetime as dt
from functools import wraps

import altair as alt
import altair_morberg.core as morberg
import numpy as np
import pandas as pd
from covid import dataimport

#%load_ext lab_black

In [2]:
fhm_data, labels = dataimport.get_lag_data()
local_url = "data/fhm.json"
fhm_data.to_json(local_url, orient="records")
url = "https://raw.githubusercontent.com/morberg/covid-notebook/master/data/fhm.json"

alt.themes.register("morberg_theme", morberg.theme)
alt.themes.enable("morberg_theme");

In [3]:
# Functions for processing cases data
def log_step(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        tic = dt.datetime.now()
        result = func(*args, **kwargs)
        time_taken = str(dt.datetime.now() - tic)
        #print(f"{func.__name__} shape={result.shape} took {time_taken}s")
        return result

    return wrapper


@log_step
def start_pipeline(dataf):
    return dataf.copy()


@log_step
def remove_columns(dataf):
    return dataf.drop(columns="Totalt_antal_fall")


@log_step
def to_long_format(dataf):
    return dataf.melt(id_vars="Statistikdatum", var_name="Län", value_name="Fall")


@log_step
def add_week_numbers(dataf):
    dataf["Vecka"] = dataf.Statistikdatum.dt.strftime('%g-%V')
    return dataf


# Data from FHM for number of cases
input_excel = "https://www.arcgis.com/sharing/rest/content/items/b5e7488e117749c19881cce45db13f7e/data"
df = pd.read_excel(input_excel)

cases = (
    df.pipe(start_pipeline)
    .pipe(remove_columns)
    .pipe(to_long_format)
    .pipe(add_week_numbers)
)

local_cases_url = "data/cases.json"
cases.to_json(local_cases_url, orient="records")
cases_url = (
    "https://raw.githubusercontent.com/morberg/covid-notebook/master/data/cases.json"
)

# Covid-19 graphs for Sweden

## Covid-19 death date, reporting date, and lag

Interactive graph showing the number of reported Covid-19 deaths in Sweden. Select dates in the bottom *Publication Date* graph by clicking and dragging. Gray bars is a prediction based on average lag for the past three weeks. See [Adam Altmejd's repo](https://github.com/adamaltmejd/covid) for details.

In [4]:
total_deaths = fhm_data[
    fhm_data.publication_date == fhm_data.publication_date.max()
].N.sum()
print(
    f"Total deaths reported by {fhm_data.publication_date.max():%A, %B %d}: {int(total_deaths)}"
)

Total deaths reported by Friday, February 05: 12115


In [18]:
domain = [
    "2020-03-13",
    (fhm_data.date.max() + pd.Timedelta("1D")).date().isoformat(),
]

brush = alt.selection(type="interval", encodings=["x"])

deceased = (
    alt.Chart(url, height=400, title="Daily Covid-19 deaths in Sweden")
    .mark_bar(binSpacing=0)
    .encode(
        x=alt.X(
            "yearmonthdate(date):T",
            title="Occured",
            scale=alt.Scale(domain=domain),
        ),
        y=alt.Y(
            "sum(n_diff):Q",
            title="",
            scale=alt.Scale(domain=[0, fhm_data.N.max()]),
        ),
        order=alt.Order(
            # Sort the segments of the bars by this field
            "days_since_publication:N",
            sort="ascending",
        ),
        color=alt.Color(
            "lag:N",
            title="Lag in Days",
            sort=labels,
#            scale=alt.Scale(scheme="viridis"),
        ),
    )
    .transform_filter(brush)
).transform_filter("datum.date >= datetime(2020,2,13)")

reported = (
    alt.Chart(url, height=100)
    .mark_bar(binSpacing=0)
    .encode(
        x=alt.X(
            "yearmonthdate(publication_date):T",
            title="Reported",
            scale=alt.Scale(domain=domain),
            axis=alt.Axis(format="%b %Y", tickCount=12),
        ),
        y=alt.Y("sum(n_diff):Q", title="", scale=alt.Scale(domain=[0, 350]),),
        tooltip=[
            alt.Tooltip("sum(n_diff):Q", title="Reported Deaths"),
            "publication_date:T",
        ],
    )
    .add_selection(brush)
)

legend_vert = (
    alt.Chart(url, width=50, title="")
    .mark_bar()
    .encode(
        x=alt.X("sum(n_diff):Q", title=""),
        y=alt.Y("lag:O", title="Reporting lag in days", sort=labels),
        color=alt.Color("lag:O", sort=labels, legend=None),
    )
    .transform_filter(brush)
)

text = (
    alt.Chart(url)
    .transform_filter(brush)
    .transform_aggregate(sum_deaths="sum(n_diff):Q")
    .transform_calculate(text="Total deaths: " + alt.datum.sum_deaths)
    .mark_text(align="right", x=692, y=19, fontSize=16)
    .encode(text="text:N")
)

prediction = (
    alt.Chart(url)
    .mark_bar(color="#E8E8E8")
    .encode(
        x=alt.X("yearmonthdate(date):T", axis=alt.Axis(format="%b %Y", tickCount=12),),
        y=alt.Y("prediction:Q", aggregate={"argmax": "publication_date"}),
    )
    .transform_filter(brush)
)

(
    (prediction + deceased + text).properties(width=750)
    & reported.properties(width=750)
    | legend_vert
)

## Deaths for the past six weeks

Same info as in the graph above showing only death dates for the past six weeks.

In [17]:
# Show the last 42 days of death dates
cutoff = (fhm_data.date.max().timestamp() - 42 * 24 * 60 * 60) * 1000

deceased = (
    alt.Chart(
        url,
        title={
            "text": "Daily Covid-19 Deaths in Sweden Past Six Weeks",
            "subtitle": f"Updated on {fhm_data.date.max().date():%A, %B %d}",
        },
    )
    .mark_bar()
    .encode(
        x=alt.X("yearmonthdate(date):T", title="Occurred", axis=alt.Axis(tickCount=5),),
        y=alt.Y("sum(n_diff):Q", title="",),
        order=alt.Order(
            # Sort the segments of the bars by this field
            "days_since_publication:N",
            sort="ascending",
        ),
        color=alt.Color(
            "lag:N",
            title="Lag in Days",
            sort=labels,
        )
    )
)

(
    deceased
    & reported.mark_bar().encode(
        x=alt.X(
            "yearmonthdate(publication_date):T",
            title="Reported",
            scale=alt.Scale(),
            axis=alt.Axis(tickCount=5),
        ),
        y=alt.Y("sum(n_diff):Q", title="",),
    )
).transform_filter(alt.datum.date > cutoff)

## Weekly reported deaths

Number of deaths reported each week with each publication weekday shown individually. Since week 25 no numbers are published on weekends. Since week 38 numbers are only published Tuesdays trough Fridays. 

In [7]:
# Only show label for every five weeks on x-axis
weeks2020 = [f'20-{week}' for week in range(4,54,5)]
weeks2021 = [f'21-{week:02}' for week in range(1, 54, 5)]
values = weeks2020 + weeks2021

In [8]:
week_order = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

base = (
    alt.Chart(
        url,
        title={
            "text": "Weekly Reported Covid-19  Deaths in Sweden",
            "subtitle": f"Updated on {fhm_data.date.max().date():%A, %B %d}",
        },
    )
    .mark_bar()
    .encode(
        x=alt.X(
            "publication_week:O", title=None, axis=alt.Axis(offset=-30, values=values)
        ),
        y=alt.Y("sum(n_diff):Q", stack="zero", axis=None),
    )
)

bars = base.mark_bar().encode(
    order=alt.Order("publication_date:N"),
    color=alt.Color(
        "day(publication_date):N", title="Publication Day", sort=week_order
    ),
    tooltip=[
        alt.Tooltip("sum(n_diff):Q", title="Reported Deaths"),
        "publication_date:T",
    ],
)

text = base.mark_text(color="black", baseline="bottom", fontSize=9).encode(
    text=alt.Text("sum(n_diff):Q")
)

(bars + text).properties(width=700).configure_legend(orient="right")

## Weekly number of cases

Number of Covid-19 cases. Date is when test was performed. Cases are reported Tue-Fri.

In [9]:
week_order = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

alt.Chart(
    cases_url,
    title={
        "text": "Weekly Covid-19 Cases in Sweden",
        "subtitle": f"Updated on {cases.Statistikdatum.max().date():%A, %B %d}",
    },
    width=700,
).mark_bar().encode(
    x=alt.X("Vecka:O", title=None, axis=alt.Axis(values=values)),
    y=alt.Y("sum(Fall):Q", axis=alt.Axis(title=None, orient="right", offset=10),),
    order=alt.Order("Statistikdatum:N"),
    color=alt.Color(
        "day(Statistikdatum):N",
        title="Day reported",
        sort=week_order,
        legend=alt.Legend(orient="top-left"),
    ),
    tooltip=[
        alt.Tooltip("sum(Fall):Q", title="Number of Cases"),
        alt.Tooltip("Statistikdatum:T", title="Date reported"),
    ],
)

## Why is the total number different from official numbers?

Total death toll in these graphs differ from official total numbers since:

* Old data points with few deaths have been trimmed to make graphs more legible
* Some death dates are unknown and are not included in these numbers
