In [None]:
import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import subprocess
import os

pd.set_option('display.max.rows', 500)

* RKI, webscrape (webscraping) https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Fallzahlen.html/
* John Hopkins (GITHUB) https://github.com/CSSEGISandData/COVID-19
* REST API services to retrieve data https://npgeo-corona-npgeo-de.hub.arcgis.com/

## Data from Github

git clone/pull: https://github.com/CSSEGISandData/COVID-19.git

In [None]:
# Update Data by git pull from: https://github.com/CSSEGISandData/COVID-19.git

git_pull = subprocess.Popen( "/usr/bin/git pull" , 
                     cwd = os.path.dirname( './ads_covid-19/data/raw/COVID-19/' ), 
                     shell = True, 
                     stdout = subprocess.PIPE, 
                     stderr = subprocess.PIPE )
(out, error) = git_pull.communicate()
print("Error : " + str(error)) 
print("out : " + str(out))


In [None]:
# Create an dictionary with the Number of Inhibitants of the corresponding country
# Sources: Italy: https://de.statista.com/statistik/daten/studie/19304/umfrage/gesamtbevoelkerung-in-italien/
#          US: https://de.statista.com/statistik/daten/studie/19320/umfrage/gesamtbevoelkerung-der-usa/
#          Germany: https://de.statista.com/statistik/daten/studie/1217/umfrage/entwicklung-der-gesamtbevoelkerung-seit-2002/
inhibitants = {
    'Italy':'59190000',
    'US':'333390000',
    'Germany':'83200000'
}

In [None]:
# read data from csv file
data_path = './ads_covid-19/data/raw/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
pd_raw = pd.read_csv(data_path)
pd_raw

In [None]:
# put dates of pd_raw into time_idx
time_idx = pd_raw.columns[4:]

In [None]:
# create DataFrame
df_plot = pd.DataFrame({'date':time_idx})
df_plot

In [None]:
# choose countries we want to plot
country_list = ['Italy',
                'US',
                'Germany'
]

In [None]:
# calculate relative cases over time and put it into df_plot
for each in country_list:
    df_plot[each] = np.array(pd_raw[pd_raw['Country/Region'] == each].iloc[:,4::].sum(axis=0)/int(inhibitants[each]))

In [None]:
# create plot and save as jpg
ax = df_plot.set_index('date').plot()

ax.set_ylabel('Relative cases of Covid infectors')
ax.set_xlabel('Date')

plt.savefig('relative-cases-over-time.jpg', dpi=300)