In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.dates as dates
import datetime

pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib notebook

Deutsche COVID Daten von https://npgeo-corona-npgeo-de.hub.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0

In [2]:
ger = pd.read_csv('RKI_COVID19.csv') 
bav_data = ger[ger["IdBundesland"] == 8] # BaWu
nieders_data = ger[ger["IdBundesland"] == 3] # Niedersachsen  

bav1 = bav_data[["AnzahlFall", "Meldedatum"]]
bav1[["Date","Uhrzeit"]] = bav1["Meldedatum"].str.split(" ", expand = True)
bav1["Date"]= pd.DatetimeIndex(bav1['Date']) 
bav = bav1.groupby("Date").sum()   

ns1 = nieders_data[["AnzahlFall", "Meldedatum"]]
ns1[["Date","Uhrzeit"]] = ns1["Meldedatum"].str.split(" ", expand = True)
ns1["Date"]= pd.DatetimeIndex(ns1['Date']) 
ns = ns1.groupby("Date").sum()   

date_column = pd.Series(np.arange('2020-01-28', '2020-08-23', dtype='datetime64[D]'))
step0 = date_column.to_frame().rename(columns={0:"Date"})
step0.reset_index(drop = True).set_index("Date")

step1 = (pd.merge(step0, bav, how="outer",left_on="Date", right_on="Date")).set_index("Date")
ger = (pd.merge(step1, ns, how="outer",left_on="Date", right_on="Date"))
ger = ger.fillna(0).rename(columns={"AnzahlFall_x": "Bayern", "AnzahlFall_y" : "Niedersachsen"}) 
ger = ger.drop(ger.index[-1]) 

Österreichische COVID Daten von https://www.data.gv.at/katalog/dataset/covid-19-epidemiologische-kurve/resource/a04e4c69-ed72-4fd8-92b5-68eac2299286

In [428]:
def date_convert(date_ger):
    data_conv = date_ger.split(".")
    return data_conv[2]+"-"+data_conv[1]+"-"+data_conv[0] 

aut_raw = pd.read_csv("Epikurve.csv")
aut_raw.rename(columns={'time;tägliche Erkrankungen;Timestamp':'Col'}, inplace=True)
aut_raw[["Date","Cases","Timestamp"]] = aut_raw["Col"].str.split(";", expand = True)

aut_raw["Date"] = aut_raw["Date"].apply(date_convert)
aut_data = aut_raw[['Date', 'Cases']] 
aut_data["Date"] = pd.DatetimeIndex(aut_data['Date']) 
aut_data["Cases"] = aut_data["Cases"].astype(int)
aut_data.set_index("Date", inplace=True)
aut_data = aut_data.drop(aut_data.index[[-1]])
aut_data
aut = ((pd.merge(step0, aut_data, how="outer",left_on="Date", right_on="Date")).set_index("Date")).fillna(0) 
aut

Unnamed: 0_level_0,Cases
Date,Unnamed: 1_level_1
2020-01-28,0.0
2020-01-29,0.0
2020-01-30,0.0
2020-01-31,0.0
2020-02-01,0.0
...,...
2020-08-18,245.0
2020-08-19,358.0
2020-08-20,314.0
2020-08-21,272.0


In [429]:
dates_x = np.arange('2020-01-28', '2020-08-23', dtype='datetime64[D]')
dates_x = list(map(pd.to_datetime, dates_x))  

In [482]:
plt.style.use('seaborn-white')

fig = plt.figure()
ax = plt.gca()  

plt.plot(dates_x, ger['Bayern'], label="Baden-Württemberg, GER") 
plt.plot(dates_x, ger['Niedersachsen'], label="Lower Saxony, GER")  
plt.plot(dates_x, aut['Cases'], label="Austria")  
plt.legend(facecolor='white', framealpha=1)
plt.title("Daily New SARS2 Cases in 2020 in selected German States and Austria") 
 
ax.xaxis.set_major_locator(dates.MonthLocator())
ax.xaxis.set_major_formatter(dates.DateFormatter('%b')) 
ax.set_xlim([datetime.date(2020, 2, 20), datetime.date(2020, 8, 22)])
ax.set_ylim([0,1500])
ax.set_xlabel('Date')
ax.set_ylabel('Daily New Cases')

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False) 

<IPython.core.display.Javascript object>

The graph shows the daily new COVID cases of the three regions of interest: Lower Saxony, GER (Population: 7.9 Mio), Baden-Württemberg, GER (Population: 11.1 Mio), Austria (Population: 8.8 Mio). I chose a line plot instead of a bar plot, despite of the COVID cases being discrete numbers due to better visibility, otherwise the graph would be just ugly with semi-transparent bars. 

As expected, the Austrian peak of the first wave was earlier than in Germany, as Austria was quicker with preventive hygiene measures like a lockdown and mandatory masks. In July, in accordance to Austria opening their economy earlier than Germany, we see more cases arise after July and the trend is rising for all three regions. Lower Saxony, as a low density rural state, has never had a high peak and has low incidence numbers throughout the whole pandemic, with fewer new cases in recent times. 

Note that the weekly dips in the graph come from health ministries of counties not processing new cases on the weekends. 

In [535]:
germany = pd.read_csv('RKI_COVID19.csv') 
germany = germany[["AnzahlFall", "Meldedatum"]] 
germany[["Date","Uhrzeit"]] = germany["Meldedatum"].str.split(" ", expand = True)
germany["Date"]= pd.DatetimeIndex(germany['Date']) 
germany = germany.groupby("Date").sum().reset_index()
germany["day"] = germany["Date"].dt.dayofweek 
germany = germany[(germany["day"] < 5)&(germany["day"] > 1) & (germany["Date"] > '2020-07-01')]


from scipy.optimize import curve_fit

def func(x, a, b):
    # Exponential
    return a * np.exp(b * x)

popt, pcov = curve_fit(func, germany.index, germany["AnzahlFall"])

fig1 = plt.figure()
ax1 = plt.gca()   
plt.bar(germany.index, germany["AnzahlFall"]) 


x = germany.index
y = func(x, *popt)

plt.plot(x, y, c='g')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1c3a94690a0>]