# Robert Koch-Institut: COVID-19-Dashboard
 Data from https://experience.arcgis.com/experience/478220a4c454480e823b17327b2bf1d4 dashboard using the arcgis REST API.

In [1]:
import pandas as pd
import requests
import json
from datetime import datetime
import re

In [2]:
# papermill parameters
output_folder = "../output/"

### Get record count, set query parameters

In [3]:
# send get request to endpoint
response = requests.get("https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/RKI_COVID19/FeatureServer/0/query?where=1=1&returnCountOnly=true&f=json")

# check request status is HTTP_OK
assert response.status_code is 200

# load to json
data = json.loads(response.text)

# set params
record_count = data['count']
offset = 0
record_fetch_count = 2000
fields = None
records = []
print(record_count)

37766


### Retrieve data

In [4]:
while offset < record_count:
    response = requests.get(f"https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/RKI_COVID19/FeatureServer/0/query?where=1=1&outFields=*&f=pjson&resultRecordCount={record_fetch_count}&resultOffset={offset}")
    
    # check if request status code is HTTP_OK 
    assert response.status_code is 200
    
    # load to json
    data = json.loads(response.text)
    
    # set fields
    if not fields:
        fields = list(map(lambda f: f['name'],data['fields']))
    
    data = list(map(lambda record: record['attributes'].values(), data['features']))
    
    # add to records
    records += data
    
    # set params
    offset += record_fetch_count
    
assert len(records) == record_count
records = dict(zip(range(len(records)), records)) 
df = pd.DataFrame.from_dict(records, orient="index", columns=fields)

### Cleanse data

In [5]:
column_name_map = {
    "Bundesland": "federal_state",
    "Landkreis": "district",
    "Altersgruppe": "age_group",
    "Geschlecht": "sex",
    "AnzahlFall": "number_of_cases",
    "AnzahlTodesfall": "number_of_deaths",
    "Meldedatum": "epoch_reporting_date",
    "IdLandkreis": "district_code",
    "NeuerFall": "new_cases",
    "NeuerTodesfall": "new_deaths",
    "Datenstand": "last_update"
}
df.rename(columns=column_name_map, inplace=True)
df.drop(columns=['ObjectId'], inplace=True)

# last_update -> epoch
def resolve_last_update(row):
    date_regex = re.search(r'(\d{2})\.(\d{2})\.(\d{4}), (\d{2}):(\d{2})', row)
    return int(datetime.fromisoformat(f'{date_regex.group(3)}-{date_regex.group(2)}-{date_regex.group(1)}T{date_regex.group(4)}:{date_regex.group(5)}+02:00').timestamp()) * 1000

df['last_update'] = list(map(lambda row: resolve_last_update(row), df['last_update']))
df['age_group'].replace('A', 'Y', regex=True, inplace=True)
df['sex'].replace('W', 'F', regex=True, inplace=True)
df['sex'].replace('unbekannt', 'None', inplace=True)

In [6]:
df.to_csv(output_folder + "RKI_GER_COVID19_DASHBOARD.csv", index=False)