In [1]:
from coronadata import getdata
import pandas as pd

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

import numpy as np
import datetime
from tqdm import tqdm

%matplotlib inline

In [2]:
%load_ext autoreload
%autoreload 2

# RKI data preparation

In [3]:
rki = getdata.get_rki()

  0%|          | 1/788 [00:00<02:00,  6.52it/s]

Querying data from RKI...


100%|██████████| 788/788 [01:29<00:00,  8.77it/s]
100%|██████████| 789/789 [00:00<00:00, 17389.94it/s]


Creating pandas dataframe...


In [4]:
df = rki.raw_data.groupby(['Meldedatum']).sum()
df.reset_index(inplace=True)
df['Cases confirmed'] = df['AnzahlFall'].cumsum()
df['Cases deaths'] = df['AnzahlTodesfall'].cumsum()

df['Cases non-lethal'] = df['AnzahlFall'] - df['AnzahlTodesfall']

In [5]:
dates = []
for i in df[['Meldedatum', 'Cases non-lethal']].iterrows():
    for n in range(i[1]['Cases non-lethal']):
        dates.append(i[1]['Meldedatum'])

In [7]:
len(dates)

148496

In [8]:
df.shape

(75, 12)

In [35]:
df.tail(5)

Unnamed: 0,Meldedatum,IdBundesland,AnzahlFall,AnzahlTodesfall,ObjectId,NeuerFall,NeuerTodesfall,NeuGenesen,AnzahlGenesen,Cases confirmed,Cases deaths,Cases non-lethal
70,2020-04-21,12917,2171,39,6570035099,6,-15105,-13414,228,147486,5582,2132
71,2020-04-22,13615,2418,27,6908251059,23,-16008,-14816,156,149904,5609,2391
72,2020-04-23,11589,1985,14,5804192002,143,-13523,-12552,121,151889,5623,1971
73,2020-04-24,9484,1650,12,4699592866,584,-10945,-10446,65,153539,5635,1638
74,2020-04-25,3394,594,2,1733390243,452,-4057,-3967,11,154133,5637,592


In [38]:
df.head()

Unnamed: 0,Meldedatum,IdBundesland,AnzahlFall,AnzahlTodesfall,ObjectId,NeuerFall,NeuerTodesfall,NeuGenesen,AnzahlGenesen,Cases confirmed,Cases deaths,Cases non-lethal
0,2020-01-28,18,2,0,7693792,0,-18,0,2,2,0,2
1,2020-01-29,18,2,0,7684083,0,-18,0,2,4,0,2
2,2020-01-31,36,4,0,15408726,0,-36,0,4,8,0,4
3,2020-02-03,9,1,0,3838927,0,-9,0,1,9,0,1
4,2020-02-04,42,5,0,19185700,0,-45,-9,4,14,0,5


# JH data preparation

In [23]:
file_path = r'../COVID-19/csse_covid_19_data/csse_covid_19_time_series'

In [24]:
data = getdata.DataLoader(file_path)

In [26]:
df_jh_de = data.df[data.df['Country/Region']=='Germany']

In [42]:
df_jh_de.head(10)

Unnamed: 0,Country/Region,Date,Cases confirmed,Cases deaths,Cases Recovered,Cases active,Cases non-lethal
Germany_01/22/2020,Germany,2020-01-22,0,0,0.0,0.0,0
Germany_01/23/2020,Germany,2020-01-23,0,0,0.0,0.0,0
Germany_01/24/2020,Germany,2020-01-24,0,0,0.0,0.0,0
Germany_01/25/2020,Germany,2020-01-25,0,0,0.0,0.0,0
Germany_01/26/2020,Germany,2020-01-26,0,0,0.0,0.0,0
Germany_01/27/2020,Germany,2020-01-27,1,0,0.0,1.0,1
Germany_01/28/2020,Germany,2020-01-28,4,0,0.0,4.0,4
Germany_01/29/2020,Germany,2020-01-29,4,0,0.0,4.0,4
Germany_01/30/2020,Germany,2020-01-30,4,0,0.0,4.0,4
Germany_01/31/2020,Germany,2020-01-31,5,0,0.0,5.0,5


In [36]:
df_jh_de.tail()

Unnamed: 0,Country/Region,Date,Cases confirmed,Cases deaths,Cases Recovered,Cases active,Cases non-lethal
Germany_04/21/2020,Germany,2020-04-21,148291,5033,95200.0,48058.0,143258
Germany_04/22/2020,Germany,2020-04-22,150648,5279,99400.0,45969.0,145369
Germany_04/23/2020,Germany,2020-04-23,153129,5575,103300.0,44254.0,147554
Germany_04/24/2020,Germany,2020-04-24,154999,5760,109800.0,39439.0,149239
Germany_04/25/2020,Germany,2020-04-25,156513,5877,109800.0,40836.0,150636


In [28]:
df_jh_de.shape

(95, 6)

In [33]:
df_jh_de['Cases non-lethal'] = df_jh_de['Cases confirmed'] - df_jh_de['Cases deaths']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



# Comparing the datasets 
The JH data seems to be much more up-to-date than the official RKI German dataset. 
In the following a comparison of the behaviour of the two datasets. 

 1. Compare the number of cases for both sets

In [54]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df['Meldedatum'], 
                         y=df['Cases confirmed'], 
                         mode='lines', 
                         name='RKI'))
fig.add_trace(go.Scatter(x=df_jh_de['Date'], 
                         y=df_jh_de['Cases confirmed'],
                         mode='lines',
                         name='JH'))
fig.update_layout(title='Cases confirmed')

In [50]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df['Meldedatum'], 
                         y=df['Cases confirmed'], 
                         mode='lines', 
                         name='RKI'))
fig.add_trace(go.Scatter(x=df_jh_de['Date'], 
                         y=df_jh_de['Cases confirmed'],
                         mode='lines',
                         name='JH'))
fig.update_layout(yaxis_type="log", 
                  title='Cases confirmed')

In [53]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df['Meldedatum'], 
                         y=df['Cases deaths'], 
                         mode='lines', 
                         name='RKI'))
fig.add_trace(go.Scatter(x=df_jh_de['Date'], 
                         y=df_jh_de['Cases deaths'],
                         mode='lines',
                         name='JH'))
fig.update_layout(title='Cases deaths')