In [1]:
import pickle
import pandas as pd
import numpy as np
import datetime
from scipy import stats
from matplotlib import pyplot as plt
from matplotlib import dates as mdates
from ipywidgets import interact
import seaborn as sn
%matplotlib inline

from scrape_data import query

In [2]:
delta_dt = datetime.timedelta(hours=6)
# Download data from the last 24 hours
initial_dt = datetime.datetime.now() - 4 * delta_dt
query(initial_dt, delta_dt)

Requested page from 2020-03-08 19:13:34.575540. Status code: 200
Found 1 table(s)
Stand: 8.3.2020, 15:00 Uhr
Requested page from 2020-03-09 01:13:34.575540. Status code: 200
Found 1 table(s)
Stand: 8.3.2020, 15:00 Uhr
Requested page from 2020-03-09 07:13:34.575540. Status code: 200
Found 1 table(s)
Stand: 9.3.2020, 8:00 Uhr
Requested page from 2020-03-09 13:13:34.575540. Status code: 200
Found 1 table(s)
Stand: 9.3.2020, 8:00 Uhr
Requested page from 2020-03-09 19:13:34.575540. Status code: 200
Found 1 table(s)
Stand: 9.3.2020, 8:00 Uhr


In [3]:
with open("data.pkl", "rb") as file:
    df_list, dt_list = pickle.load(file)

In [4]:
series_list = []

for df, dt in zip(df_list, dt_list):
    if "Bundesland" not in df.columns:
        df.columns = df.iloc[0]
    df = df[df["Bundesland"] != "Bundesland"]
    series = df.set_index("Bundesland")["Fälle"]
    series.name = dt
    series_list.append(series)
    #print(dt, "\n", series, "\n")

df = pd.concat(series_list, axis=1, sort=True).transpose()
df = df.fillna(0).astype(int)
df = df.drop(columns=["Repatriierte"])
df["Sachsen-Anhalt"] = 0
df = df.reindex(sorted(df.columns), axis=1)
df.insert(0, "Total", df.pop("Gesamt"))
df = df.reset_index().rename(columns={"index": "Time"})
df["Time"] = pd.to_datetime(df["Time"])

df.to_csv("covid19-germany-lands.csv")

df

Unnamed: 0,Time,Total,Baden-Württemberg,Bayern,Berlin,Brandenburg,Bremen,Hamburg,Hessen,Mecklenburg-Vorpommern,Niedersachsen,Nordrhein-Westfalen,Rheinland-Pfalz,Saarland,Sachsen,Sachsen-Anhalt,Schleswig Holstein,Thüringen
0,2020-02-28 10:00:00,53,10,15,0,0,0,0,0,0,0,25,0,0,0,0,1,0
1,2020-02-29 10:00:00,66,14,15,0,0,0,0,3,0,0,30,1,0,0,0,1,0
2,2020-03-01 10:00:00,117,15,19,0,0,1,1,8,0,1,66,2,0,0,0,2,0
3,2020-03-01 15:00:00,129,15,23,0,0,1,1,8,0,1,74,2,0,0,0,2,0
4,2020-03-02 10:00:00,150,19,25,1,0,1,1,10,0,1,86,2,0,0,0,2,0
5,2020-03-02 15:00:00,157,20,26,1,0,1,2,10,0,1,90,2,0,0,0,2,0
6,2020-03-03 10:00:00,188,26,35,3,1,1,2,10,0,1,101,2,0,1,0,2,1
7,2020-03-03 15:00:00,196,28,37,3,1,2,2,12,0,2,103,2,0,1,0,2,1
8,2020-03-04 10:00:00,240,44,48,6,1,2,2,12,3,4,111,2,1,1,0,2,1
9,2020-03-04 15:00:00,262,50,48,7,1,3,3,12,4,7,115,7,1,1,0,2,1


In [11]:
@interact(land=['Total', 'Baden-Württemberg', 'Bayern', 'Berlin', 'Brandenburg',
       'Bremen', 'Hamburg', 'Hessen', 'Mecklenburg-Vorpommern',
       'Niedersachsen', 'Nordrhein-Westfalen', 'Rheinland-Pfalz', 'Saarland',
       'Sachsen', 'Sachsen-Anhalt', 'Schleswig Holstein', 'Thüringen'])
def plot_cases(land="Total", log_scale=False):
    fig, ax = plt.subplots(figsize=(6,5), dpi=120)
    sn.lineplot(df["Time"], df[land], marker="o", ax=ax)

    if log_scale:
        ax.set_yscale('log')
    else:
        ax.set_ylim([0, None])
            
    ax.xaxis.set_minor_locator(mdates.DayLocator())
    plt.xticks(rotation=45)
    ax.grid()
    ax.set_ylabel("Cases")
    
    doubling_time = 0
    
    ## Fit an exponential curve
    non_zero_data = df[["Time", land]][df[land] > 0]
    if len(non_zero_data) > 0:
        t = np.array([dt.timestamp() for dt in non_zero_data["Time"]])
        log_n = np.log2(non_zero_data[land].to_numpy())
        a, b, r_value, p_value, std_err = stats.linregress(t, log_n)
        t_dense = np.linspace(t[0], t[-1], 100)
        log_n_model = a * t_dense + b
        datetimes = np.array([datetime.datetime.fromtimestamp(ts) for ts in t_dense])
        n_model = 2**log_n_model
        doubling_time = 1. / (a * 3600 * 24)
        ax.plot(datetimes, n_model, zorder=-1)

    if land == "Total":
        land = "Germany"
    ax.set_title(f"COVID-19 cases in {land} (RKI data) \n Doubling time: {doubling_time:.1f} days")
    fig.tight_layout()
    fig.savefig("covid19-cases-Germany.png")
    plt.show()

interactive(children=(Dropdown(description='land', options=('Total', 'Baden-Württemberg', 'Bayern', 'Berlin', …