# Covid data dashboard for Norway

collecting national metrics (tests, deaths), regional metrics (hopital admissions), and local metrics (new case counts) into charts with altair,
trying to make a dashboard with voilá.

Data from https://www.covid19data.no


[EU travel status thresholds](https://www.ecdc.europa.eu/en/covid-19/situation-updates/weekly-maps-coordinated-restriction-free-movement)


> - Green:
>   - if the 14-day notification rate is less than 50 and the test positivity rate is less than 4%; or
>   - if the 14-day notification rate is less than 75 and the test positivity rate less than 1%
> - Orange:
>   - if the 14-day notification rate is less than 50 and the test positivity rate is 4% or more; or
>   - the 14-day notification rate is 50 or more and less than 75 and the test positivity rate is 1% or more; or
>   - the 14-day notification rate is between 75 and 200 and the test positivity rate is less than 4%
> - Red:
>   - if the 14-day cumulative COVID-19 case notification rate ranges from 75 to 200 and the test positivity rate of tests for COVID-19 infection is 4% or more, or
>   - if the 14-day cumulative COVID-19 case notification rate is more than 200 but less than 500
> - Dark red:
>   - if the 14-day cumulative COVID-19 case notification rate is 500 or more
> - Grey:
>   if there is insufficient information or if the testing rate is lower than 300 cases per 100 000.


US CDC defines transmission levels  on a *28*-day sum:

- low: < 50
- moderate: < 100
- high: < 500


Currently, data is presented as daily average, not 14-day sums, so need to divide thresholds by 14:

| 14-day | daily |
| ------ | ----- |
| 50     | 3.6  |
| 75 | 5.4 |
| 200 | 14.3 |
| 500 | 36 |



In [1]:
import io

import altair as alt 
import pandas as pd
import requests
import requests_cache

# cache data for 1 day since that's how often they refresh
requests_cache.install_cache(expire_after=24 * 3600)

In [2]:
data_url = "https://raw.githubusercontent.com/thohan88/covid19-nor-data/HEAD/data"
# temporary fix waiting for PR https://github.com/thohan88/covid19-nor-data/pull/12
test_data_url = data_url.replace("thohan88", "minrk")

def download_dataset(path, parse_dates=["date",], data_url=data_url):
    """Download a dataset from covid19-nor-data archive"""
    url = f"{data_url}/{path}"
    print(f"Downloading {url}")
    r = requests.get(f"{data_url}/{path}")
    r.raise_for_status()
    print(f"Downloaded {len(r.content) // 1024}kB")
    return pd.read_csv(io.BytesIO(r.content), parse_dates=parse_dates)

In [3]:
datasets = {
   "lookup": download_dataset("00_lookup_tables_and_maps/01_lookup_tables/msis.csv", parse_dates=None),
    "tests": download_dataset("03_covid_tests/national_tests_lab.csv", data_url=test_data_url),
    "cases": download_dataset("01_infected/msis/municipality_and_district.csv"),
    "deaths": download_dataset("04_deaths/deaths_total_fhi.csv", data_url=test_data_url),
    "admissions": download_dataset("02_admissions/admissions.csv"),
    "respirator-admissions": download_dataset("02_admissions/admissions_with_respirators.csv"),
}

Downloading https://raw.githubusercontent.com/thohan88/covid19-nor-data/HEAD/data/00_lookup_tables_and_maps/01_lookup_tables/msis.csv
Downloaded 18kB
Downloading https://raw.githubusercontent.com/minrk/covid19-nor-data/HEAD/data/03_covid_tests/national_tests_lab.csv
Downloaded 24kB
Downloading https://raw.githubusercontent.com/thohan88/covid19-nor-data/HEAD/data/01_infected/msis/municipality_and_district.csv
Downloaded 15420kB
Downloading https://raw.githubusercontent.com/minrk/covid19-nor-data/HEAD/data/04_deaths/deaths_total_fhi.csv
Downloaded 5kB
Downloading https://raw.githubusercontent.com/thohan88/covid19-nor-data/HEAD/data/02_admissions/admissions.csv
Downloaded 425kB
Downloading https://raw.githubusercontent.com/thohan88/covid19-nor-data/HEAD/data/02_admissions/admissions_with_respirators.csv
Downloaded 49kB


In [4]:
# Transform some columns for consistency
datasets["respirator-admissions"]["region"] = datasets["respirator-admissions"]["health_reg_name"].str.slice(6)

In [5]:
# make deaths daily instead of cumulative
datasets["deaths"].sort_values("date", inplace=True)
datasets["deaths"]["daily deaths"] = datasets["deaths"]["deaths"].diff()

In [6]:
# add cases_per_100k column
cases = datasets["cases"]
cases["cases_per_100k"] = cases["cases"] / (cases["population"] / 1e5)

In [7]:
# Construct a national case table from the regional dataset
national_cases = cases.groupby("date").cases.sum()
norway_population = 5367580
per_100k = national_cases / (norway_population / 1e5)


norway = pd.DataFrame(
    {
        "total cases": national_cases,
        "total cases per 100k": per_100k,
        "new cases": national_cases.diff(),
        "new cases per 100k": per_100k.diff(),
        "date": national_cases.index,
    }
)
max_new = norway["new cases"].max()
norway;

In [8]:
# ukjent bydel reports bizarre population
# in order to get sums right.
# After groupby, use kommune populations for per_100k calculations

kommune_populations = {}
for kommune in ("Oslo", "Bergen"):
    kommune_populations[kommune] = cases[cases["kommune_name"] == kommune].groupby("date")["population"].sum().max()


In [9]:
def get_subset(key, column="kommune_name"):
    """Extract a matching subset and compute the new cases"""
    cases = datasets["cases"]
    if column == "bydel_name":
        cases = cases[cases["kommune_name"] == "Oslo"]
        if key == "Ukjent":
            cases = cases.copy()
            cases.loc[cases[column] == key, "population"] = kommune_populations["Oslo"]

    subset = cases[cases[column] == key].groupby("date")

    total_cases = subset["cases"].sum()
    date = total_cases.index.to_series()
    total_cases.name = "total cases"
    total_per_100k = (subset["cases"].sum() / ((subset["population"].sum() + 1) / 1e5)).round(0)
    total_per_100k.name = "total cases per 100k"

    new_cases = total_cases.diff()
    new_cases.name = "new cases"
    new_per_100k = total_per_100k.diff()
    new_per_100k.name = "new cases per 100k"

    return pd.concat(
        [date, total_cases, new_cases, total_per_100k, new_per_100k], axis=1
    )


# oslo = get_subset("Oslo")
# oslo["new cases"].plot(x="date", linestyle=":", label="new cases")
# oslo["new cases per 100k"].plot(x="date", linestyle=":", label="new cases/100k")
# viken = get_subset("Viken", column="fylke_name")
# viken["new cases"].plot(x="date", label="Viken cases")


In [10]:
# add single, shared x_domain for all charts!
x_domain = alt.selection_interval(bind="scales", encodings=["x"])

In [11]:
def plot_daily_deaths(label=None, window=7):
    """Altair plot of a daily deaths

    scatter-plot daily cases with a trendline for rolling average
    """
    df = datasets["deaths"]
    key = "daily deaths"

    line = (
        alt.Chart(df, title=f"Daily deaths ({window}-day rolling average)")
        .mark_line(
            size=3,
            tooltip=alt.TooltipContent("encoding"),
        )
        .transform_window(
            avg=f"mean(daily deaths)",
            frame=[-window // 2, window // 2],
        )
        .encode(x="date:T", y=alt.Y("avg:Q", axis=alt.Axis(title="Daily deaths")))
    )

    points = (
        alt.Chart(df)
        .mark_point(opacity=0.3)
        .encode(
            x="date:T",
            y=alt.Y(f"daily deaths:Q"),
            tooltip=[
                "date",
                "daily deaths",
                "deaths",
            ],
        )
    )

    return (line + points).add_selection(x_domain).interactive()


# plot_daily_deaths()

In [12]:
def plot_daily_tests(label=None, window=7, ymax=None, per_capita=False):
    """Altair plot of a subset of data

    scatter-plot daily cases with a trendline for rolling average
    """
    df = datasets["tests"]
    key = "n_tests"

    n_tests = (
        alt.Chart(df, title=f"Daily tests ({window}-day rolling average)")
        .mark_line(
            color="red",
            size=3,
            tooltip=alt.TooltipContent("encoding"),
        )
        .transform_window(
            avg=f"mean(n_tests)",
            frame=[-window // 2, window // 2],
        )
        .encode(
            x="date:T",
            y=alt.Y("avg:Q", axis=alt.Axis(title="Daily tests", titleColor="red")),
        )
    )

    test_points = (
        alt.Chart(df)
        .mark_point(opacity=0.3, color="red")
        .encode(
            x="date:T",
            y=alt.Y(f"n_tests:Q"),
            tooltip=[
                "date",
                "pr100_pos",
                "n_tests",
                "n_pos",
                "n_neg",
            ],
        )
    )

    pos_rate = (
        alt.Chart(df)
        .mark_line(
            color="green",
            size=2,
            tooltip=alt.TooltipContent("encoding"),
        )
        .transform_window(
            pos_rate=f"mean(pr100_pos)",
            frame=[-window // 2, window // 2],
        )
        .encode(
            x="date:T",
            y=alt.Y(
                "pos_rate:Q",
                axis=alt.Axis(title="Positive tests rate (%)", titleColor="blue"),
            ),
        )
    )
    # add 1%, 4% threshold marks

    pos_rate += alt.Chart(pd.DataFrame({'y': [1]})).mark_rule(color="green").encode(y='y')
    pos_rate += alt.Chart(pd.DataFrame({'y': [4]})).mark_rule(color="orange").encode(y='y')


    pos_points = (
        alt.Chart(df)
        .mark_point(opacity=0.3, color="blue")
        .encode(
            x="date:T",
            y=alt.Y(f"pr100_pos:Q"),
            tooltip=[
                "date",
                "pr100_pos",
                "n_tests",
                "n_pos",
                "n_neg",
            ],
        )
    )

    return (
        alt.layer(pos_rate + pos_points, n_tests)
        .resolve_scale(y="independent")
        .add_selection(x_domain)
    ).interactive()


plot_daily_tests()

In [13]:
plot_daily_tests() | plot_daily_deaths()

In [14]:
def plot_daily_admissions(
    label=None,
    window=7,
):
    """Plot daily hostpital admissions, incl respirators"""
    df = datasets["respirator-admissions"]

    charts = []
    for key in ("admissions", "respirators"):
        #         label = key
        #         if key == "respirators":
        #             label = "a"
        chart = (
            alt.Chart(df, title=f"Daily {key} ({window}-day rolling average)")
            .mark_area(
                tooltip=alt.TooltipContent("encoding"),
            )
            .transform_window(
                avg=f"mean({key})",
                frame=[-window // 2, window // 2],
            )
            .encode(
                x="date:T",
                y=alt.Y("avg:Q", axis=alt.Axis(title=f"Daily {key}")),
                color="region",
            )
        )
        charts.append(chart.interactive())
    return alt.hconcat(*charts).add_selection(x_domain)


plot_daily_admissions()

In [15]:
def collect_regions(min_population=5000, min_cases=100):
    """Collect regions at kommune, fylke, bydel levels
    
    Rank by max
    """
    groups = []

    def get_last_count_pop(g):
        """Collect the population, case count

        For multi-row sources, such as Oslo kommune

        Picks the last day and computes population, case count
        """
        last_day = g['date'].max()
        return g[g['date'] == last_day][["cases", "population"]].sum()
        # alternate: compute highest value
        # this always returns the last day anyway, so no point
        # by_date = g.groupby("date")
        # return g.groupby("date")[["cases", "population"]].sum().max()

    for level in ("kommune", "fylke", "bydel"):
        grouped = cases.groupby(f"{level}_name").apply(get_last_count_pop)
        grouped = grouped[grouped["population"] > 0]
        grouped["per 100k"] = (1e5 * grouped["cases"] / (grouped["population"])).astype(int)
        grouped.index += " " + level
        groups.append(grouped)
    df = pd.concat(groups).sort_values("per 100k", ascending=False)
    # drop Ukjent, redundant Oslo fylke
    df = df[~df.index.str.startswith("Ukjent") & (df.index != "Oslo fylke")]
    # drop empty rows
    df = df[(df.cases > min_cases) & (df.population > min_population)]
    return df
all_regions = collect_regions()
all_regions.head(10)

Unnamed: 0,cases,population,per 100k
Stovner bydel,3416,33316,10253
Grorud bydel,2284,27707,8243
Alna bydel,4057,49801,8146
Søndre Nordstrand bydel,3174,39066,8124
Bjerke bydel,2391,33422,7153
Lørenskog kommune,2672,41460,6444
Gamle Oslo bydel,3518,58671,5996
Ullensaker kommune,2349,39625,5928
Oslo kommune,38034,693494,5484
Rælingen kommune,1015,18530,5477


In [16]:
fortnight_thresholds = {
    "orange": [50],
    "red": [200],
    "darkred": [500],
}

daily_thresholds = {
    color: [n / 14. for n in counts]
    for color, counts in fortnight_thresholds.items()
}



def plot_daily_cases(subset, label=None, window=7, ymax=None, per_capita=False, key="new cases"):
    """Altair plot of a subset of case data

    scatter-plot daily cases with a trendline for rolling average
    """
    if per_capita:
        key += " per 100k"
        label += " (per 100k)"
    if ymax is None:
        # use 300 for uniform comparability of all kommuner
        if per_capita:
            min_max = 50
        else:
            min_max = 300
        ymax = max(
            min_max,
            # round up to next smooth 100 line
            (subset[key].max() + 50) // 100 * 100,
        )

    line = (
        alt.Chart(
            subset, title=f"Daily {key} in {label} ({window}-day rolling average)"
        )
        .mark_line(
            color="red",
            size=3,
            tooltip=alt.TooltipContent("encoding"),
        )
        .transform_window(
            rolling_mean=f"mean({key})",
            frame=[-window // 2, window // 2],
        )
        .encode(
            x="date:T",
            y="rolling_mean:Q",
        )
    )

    points = (
        alt.Chart(subset)
        .mark_point(opacity=0.5)
        .encode(
            x="date:T",
            y=alt.Y(f"{key}:Q", scale=alt.Scale(domain=(0, ymax))),
            tooltip=[
                "date",
                "new cases",
                "total cases",
                "new cases per 100k",
                "total cases per 100k",
            ],
        )
    )
    
    chart = points + line

    if per_capita:
        threshold_chart = None
        for color, values in daily_thresholds.items():
            c = alt.Chart(pd.DataFrame({'y': values})).mark_rule(color=color).encode(y='y')
            if threshold_chart:
                threshold_chart += c
            else:
                threshold_chart = c
        chart += threshold_chart
        

    return chart.interactive()

In [17]:
from IPython.display import display
from ipywidgets import Select, SelectMultiple, interact, interactive, HBox, VBox, Layout


item_selector = SelectMultiple(
    value = ("Søndre Nordstrand bydel", "Oslo kommune", "Viken fylke", "Bergen kommune"),
    options=tuple(all_regions.index),
)

# store recent values so when we switch category, we remember the last selection
# item_selector._recent_values = {
#     "fylke": ("Oslo", "Viken"),
#     "bydel": ("Søndre Nordstrand", "Ukjent", "Alna", "Stovner"),
#     "kommune": item_selector.value,
# }

# group_selector = MultiSelect(value="kommune", options=["fylke", "kommune", "bydel"])


# def change_group(change):
#     # remember previous value for when we switch back
#     item_selector._recent_values[change.old] = item_selector.value
#     print("Saving", item_selector.value, change.old, change.new)
#     if change.new == "fylke":
#         new_options = tuple(fylker.index)
#     elif change.new == "kommune":
#         new_options = tuple(kommuner.index)
#     elif change.new == "bydel":
#         new_options = tuple(bydeler.index)

#     else:
#         return
#     print(
#         "Loading",
#         change.old,
#         change.new,
#         item_selector._recent_values.get(change.new, ()),
#     )

#     item_selector.options = new_options
#     item_selector.value = item_selector._recent_values.get(change.new, ())


# group_selector.observe(change_group, "value")


# @interact(
#     level=group_selector,
#     regions=item_selector,
#     window=(1, 30),
# )
def plot_trends(regions, window=7):
#     x_domain = alt.selection_interval(bind="scales", encodings=["x"])
    charts = [
        alt.hconcat(
            *(
                plot_daily_cases(
                    norway,
                    label="Norway",
                    window=window,
                    per_capita=per_capita,
                ).add_selection(x_domain)
                for per_capita in (False, True)
            )
        )
    ]
    if isinstance(regions, str):
        regions = [regions]
    for label in regions:
        r, level = label.rsplit(None, 1)
        
        subset = get_subset(r, column=level + "_name")
        charts.append(
            alt.hconcat(
                *(
                    plot_daily_cases(
                        subset,
                        label=r,
                        window=window,
                        per_capita=per_capita,
                    ).add_selection(x_domain)
                    for per_capita in (False, True)
                )
            )
        )
    display(alt.vconcat(*charts))


widget = interactive(
    plot_trends,
#     level=group_selector,
    regions=item_selector,
    window=(1, 30),
)
controls = HBox(widget.children[:-1], layout=Layout(flex_flow="row wrap"))
output = widget.children[-1]
display(VBox([controls, output]))
widget.update()
# plot_trends(**widget.kwargs);

VBox(children=(HBox(children=(SelectMultiple(description='regions', index=(3, 8, 25, 51), options=('Stovner by…