### Setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

import numpy as np
import pandas as pd

import altair as alt

from covidviz import data, plotting

In [3]:
DATA_PATH = Path.cwd() / "data"
MIN_N_CASES = 50
day_counter = f"days_since_{MIN_N_CASES}_cases"

### Data preparation

In [4]:
# Unfortunately, the mopo data seems to be refreshed only once a week at most
# mopo = pd.read_csv(
#     "https://interaktiv.morgenpost.de/corona-virus-karte-infektionen-deutschland-weltweit/data/Coronavirus.history.v2.csv"
# ).query("parent == 'Deutschland'")
# mopo["date"].max()

In [5]:
# We'll use the RKI data instead and combine it with Google's mobility reports: https://www.google.com/covid19/mobility/
plot_data = data.PlotData(out_path=DATA_PATH)
plot_df = plot_data.df

In [6]:
plot_df.head(2)

Unnamed: 0,Bundesland,Meldedatum,Neuinfektionen,infections_cumulative,date_50_cases,days_since_50_cases,Maßnahmen,absolute_growth,cases_logratio,num_measures,Retail And Recreation,Grocery And Pharmacy,Parks,Transit Stations,Workplaces,Residential,total_activity,total_neg_activity,total_pos_activity
8,Baden-Württemberg,2020-03-01,1.0,23.0,2020-03-03,-2.0,,1.0,0.044452,0.0,9.0,20.0,40.0,6.0,-1.0,0.0,0.74,1.0,75.0
9,Baden-Württemberg,2020-03-02,10.0,33.0,2020-03-03,-1.0,,10.0,0.361013,0.0,0.0,8.0,-2.0,-3.0,-1.0,1.0,0.03,6.0,9.0


#### Relative increase

In [None]:
X_VARIABLE = "days_since_50_cases"
Y_VARIABLE = "daily_increase"

if Y_VARIABLE == "daily_increase":
    expression = "pow(E, datum.cases_logratio) - 1"
    y_title = "Daily Increase in Cumulative Cases"
    y_format = "%"
    y_domain = (0, 0.9)
    measure_level = "0.85"
    title = "Daily Increase of COVID-19 Cases in German States"
elif Y_VARIABLE == "doubling_time":
    expression = "log(2) / datum.cases_logratio"
    y_title = "Doubling Time (Days)"
    y_format = ""
    y_domain = (50, 0)
    measure_level = "45"
    title = "Doubling Time of COVID-19 Cases in German States"
elif Y_VARIABLE == "absolute_growth":
    raise NotImplementedError("This doesn't work yet.")
    expression = "datum.absolute_increase"
    y_title = "Absolute Growth in Cumulative Cases"
    y_format = ""
    title = "Absolute Growth of COVID-19 Cases in German States"
else:
    raise NotImplementedError(f"y variable {Y_VARIABLE} is not implemented.")

combined_charts = []
line_charts = []
for state in plot_df["Bundesland"].unique():
    base = alt.Chart(plot_df.query(f"Bundesland == '{state}'"), title=state).encode(
        x=alt.X(
            X_VARIABLE,
            axis=alt.Axis(title=X_VARIABLE.replace("_", " ").title(), offset=5),
        ),
        y=alt.Y("cases_logratio:Q"),
    )
    points = (
        base.transform_calculate(as_=Y_VARIABLE, calculate=expression)
        .mark_point()
        .encode(
            y=alt.Y(
                f"{Y_VARIABLE}:Q",
                scale=alt.Scale(domain=y_domain),
                axis=alt.Axis(format=y_format, title=y_title),
            ),
            color="Bundesland:N",
        )
    )
    measure_points = (
        base.mark_point(size=300, shape="diamond", color="grey", fill=None)
        .transform_calculate(y_level=measure_level)
        .encode(
            y="y_level:Q",
            size=alt.Size("Anzahl Maßnahmen:Q"),
            tooltip=["Meldedatum", "Maßnahmen"],
        )
        .interactive()
    )
    lines = (
        points.transform_loess(
            on=X_VARIABLE,
            loess=Y_VARIABLE,
            as_=[X_VARIABLE, f"{Y_VARIABLE}_loess"],
            groupby=["Bundesland"],
        )
        .mark_line()
        .encode(
            y=alt.Y(
                f"{Y_VARIABLE}_loess:Q",
                scale=alt.Scale(domain=y_domain),
                axis=alt.Axis(format=y_format, title=y_title),
            ),
            tooltip=[X_VARIABLE],
        )
    )
    line_charts.append(lines.properties(width=900, height=300, title=title))
    combined_charts.append(
        (points + measure_points + lines).properties(width=900, height=300)
    )

In [None]:
alt.layer(*line_charts)

In [None]:
alt.vconcat(*combined_charts)

#### Absolute increase

In [None]:
X_VARIABLE = "days_since_50_cases"
Y_VARIABLE = "absolute_growth"
y_title = "Absolute Growth in Cumulative Cases"
y_format = ""
title = "Absolute Growth of COVID-19 Cases in German States"

In [None]:
combined_charts = []
line_charts = []
for state in plot_df["Bundesland"].unique():

    base = alt.Chart(plot_df.query(f"Bundesland == '{state}'"), title=state).encode(
        x=alt.X(
            X_VARIABLE, axis=alt.Axis(title=X_VARIABLE.replace("_", " ").title(), offset=5)
        ),
        y=alt.Y(f"{Y_VARIABLE}:Q"),
    )
    points = base.mark_point().encode(y=alt.Y(f"{Y_VARIABLE}:Q"), color="Bundesland:N")
    measure_points = (
        base.mark_point(size=600, shape="diamond", color="grey", fill=None)
        .transform_calculate(y_level="0")
        .encode(
            y="y_level:Q",
            size=alt.Size("Anzahl Maßnahmen:Q"),
            tooltip=["Meldedatum", "Maßnahmen"],
        )
        .interactive()
    )
    lines = (
        points.transform_loess(
            on=X_VARIABLE,
            loess=Y_VARIABLE,
            as_=[X_VARIABLE, f"{Y_VARIABLE}_loess"],
            groupby=["Bundesland"],
        )
        .mark_line()
        .encode(
            y=alt.Y(f"{Y_VARIABLE}_loess:Q", axis=alt.Axis(format="", title=y_title)),
            tooltip=[X_VARIABLE],
        )
    )
    combined_charts.append(
        (points + measure_points + lines).properties(width=900, height=300)
    )

In [None]:
alt.vconcat(*combined_charts)

#### Absolute increase + mobility data

In [None]:
X_VARIABLE = "Meldedatum"
Y_VARIABLE = "absolute_growth"
y_title = "Absolute Growth in Cumulative Cases"
y_format = ""
title = "Absolute Growth of COVID-19 Cases in German States"

In [None]:
combined_charts = []
line_charts = []

activity_fields = plot_df[list(percentage_change_cols_mapper.values())]
max_activity = (
    max(
        abs(activity_fields[activity_fields < 0].sum(axis=1).min()),
        abs(activity_fields[activity_fields > 0].sum(axis=1).max()),
    )
    // 50
    + 1
) * 0.5

for state in plot_df["Bundesland"].unique():
    base = alt.Chart(plot_df.query(f"Bundesland == '{state}'"), title=state).encode(
        x=alt.X(
            X_VARIABLE,
            axis=alt.Axis(title=X_VARIABLE.replace("_", " ").title(), offset=5),
        ),
        y=alt.Y(f"{Y_VARIABLE}:Q"),
    )
    points = base.mark_point(color="DarkSlateBlue").encode(
        y=alt.Y(f"{Y_VARIABLE}:Q"), tooltip=list(set(["Meldedatum", X_VARIABLE]))
    )
    measure_points = (
        base.mark_point(
            size=400, shape="diamond", color="DarkSlateGrey", fill="DarkSlateGrey"
        )
        .transform_calculate(y_level="0")
        .encode(y="y_level:Q", tooltip=["Meldedatum", "Maßnahmen"])
        .transform_filter("datum.num_measures > 0")
    )
    lines = (
        points.transform_loess(
            on=X_VARIABLE,
            loess=Y_VARIABLE,
            as_=[X_VARIABLE, f"{Y_VARIABLE}_loess"],
            groupby=["Bundesland"],
        )
        .mark_line(color="DarkSlateBlue")
        .encode(
            y=alt.Y(f"{Y_VARIABLE}_loess:Q", axis=alt.Axis(format="", title=y_title))
        )
    )
    activity = (
        base.mark_area()
        .transform_fold(
            fold=list(percentage_change_cols_mapper.values()),
            as_=["Mobility Category", "mobility_change_percent"],
        )
        .transform_calculate(
            as_="Mobility Change", calculate="datum.mobility_change_percent / 100"
        )
        .encode(
            y=alt.Y(
                "Mobility Change:Q",
                axis=alt.Axis(format="%", orient="right"),
                scale=alt.Scale(domain=(-max_activity, max_activity)),
            ),
            color=alt.Color("Mobility Category:N", scale=alt.Scale(scheme="blues")),
            opacity=alt.value(0.5),
        )
    )
    combined_charts.append(
        (activity + (points + measure_points + lines))
        .resolve_scale(y="independent")
        .properties(width=900, height=300)
    )

In [None]:
alt.vconcat(*combined_charts)

#### Absolute increase + interactive activity data

In [7]:
max_activity = (
    plot_df[["total_neg_activity", "total_pos_activity"]].abs().max().max() // 50 + 1
) * 0.5

In [9]:
plotting.combine_summary_plots(
    df=plot_df,
    x_var="Meldedatum",
    x_title="Date",
    y_var="absolute_growth",
    y_title="Absolute Growth",
    max_activity=max_activity,
    width=250,
    height=150,
)

In [11]:
plotting.plot_infection_details(
    df=plot_df,
    state="Bayern",
    title="Infection Details: Bayern",
    x_var="Meldedatum",
    x_title="Date",
    y_var="absolute_growth",
    y_title="Absolute Growth in Cumulative Cases",
    max_activity=max_activity,
)

In [12]:
plotting.plot_activity_details(
    df=plot_df,
    state="Bayern",
    title="Mobility Details: Bayern",
    x_var="Meldedatum",
    x_title="Date",
    activity_cols=plot_data.activity_cols,
    max_activity=max_activity,
    width=830,
)