In [1]:
import pandas as pd
from pandas.api.types import is_numeric_dtype
pd.options.display.float_format = '{0:,.0f}'.format
pd.options.display.max_rows = 20
pd.options.display.max_rows = 100

In [2]:
# John Hopkins Source
# https://github.com/CSSEGISandData/COVID-19/tree/web-data/data/
source = 'COVID-19/data/'

In [3]:
df_curr = pd.read_csv(source + 'cases_country.csv')
df_time = pd.read_csv(source + 'cases_time.csv')

In [4]:
df_curr = df_curr.drop(['Lat', 'Long_'], axis=1)
df_time = df_time.drop(['Delta_Confirmed', 'Delta_Recovered'], axis=1)

In [5]:
df_time['Last_Update'] = df_time['Last_Update'] + ' 23:59:59'

In [6]:
df = pd.concat([df_curr, df_time])

In [7]:
df['date'] = pd.to_datetime(df['Last_Update'])
df = df.drop(['Last_Update'], axis=1)

In [8]:
df.columns = ['country', 'confirmed', 'deaths', 'recovered', 'active', 'date']

In [9]:
df = df.sort_values(['country', 'date'])
df = df.reset_index()

In [10]:
df = df.fillna(0)

In [11]:
# convert all numeric columns to float for nice formatting
for key, dtype in df.dtypes.items():
    if is_numeric_dtype(dtype):
        df[key] = df[key].astype('float')

In [12]:
# add date string
df['date_string'] = df['date'].dt.strftime('%d. %b')

In [13]:
# order columns
df = df[['date', 'date_string', 'country', 'confirmed', 'active', 'deaths', 'recovered']]

In [14]:
print('Last data for germany:', df[df['country'] == 'Germany']['date'].max())

Last data for germany: 2020-03-28 10:37:41
