# Robert Koch-Institut: COVID-19-Dashboard
 Data from https://experience.arcgis.com/experience/478220a4c454480e823b17327b2bf1d4 dashboard using the arcgis REST API.

In [None]:
import pandas as pd
import requests
import json
from datetime import datetime
import re
import pycountry

In [None]:
# papermill parameters
output_folder = "../output/"

### Get record count, set query parameters

In [None]:
endpoint = "https://services7.arcgis.com/mOBPykOjAyBO2ZKk/ArcGIS/rest/services/RKI_Landkreisdaten/FeatureServer/0/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=false&returnCentroid=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson"

# send get request to endpoint
response = requests.get(endpoint + "&returnCountOnly=true")

# check request status is HTTP_OK
assert response.status_code is 200

# load to json
data = json.loads(response.text)

# set params
record_count = data['count']
offset = 0
record_fetch_count = 2000
fields = None
records = []
print(record_count)

### Retrieve data

In [None]:
while offset < record_count:
    response = requests.get(endpoint + f"&resultRecordCount={record_fetch_count}&resultOffset={offset}")
    
    # check if request status code is HTTP_OK 
    assert response.status_code is 200
    
    # load to json
    data = json.loads(response.text)
    
    # set fields
    if not fields:
        fields = list(map(lambda f: f['name'],data['fields']))
    
    data = list(map(lambda record: record['attributes'].values(), data['features']))
    
    # add to records
    records += data
    
    # set params
    offset += record_fetch_count
    
assert len(records) == record_count
records = dict(zip(range(len(records)), records)) 
df = pd.DataFrame.from_dict(records, orient="index", columns=fields)

In [None]:
df = pd.DataFrame.from_dict(records, orient="index", columns=fields)

### Cleanse data

In [None]:
# rename columns
column_name_map = {
    "RS": "district_id",
    "county": "county",
    "BL_ID": "state_id",
    "BL": "state",
    "BEZ": "district_type",
    "EWZ": "population",
    "death_rate": "death_rate",
    "cases": "cases",
    "deaths": "deaths",
    "cases_per_100k": "cases_per_100k",
    "cases_per_population": "cases_per_population",
    "cases7_per_100k": "cases7_per_100k",
    "last_update": "date",   
}
df.rename(columns=column_name_map, inplace=True)

# map district types
district_type_map = {
    "Landkreis" :"Rural District",
    "Kreis": "Rural District",
    "Kreisfreie Stadt": "District-free City",
    "Stadtkreis": "District-free City"
}

df['district_type'] = df['district_type'].apply(lambda d: district_type_map.get(d))

# set district id as index
df.set_index("district_id")

# select columns by column_name_map
df = df.loc[:, tuple(column_name_map.values())]


In [None]:
# parse last_update
df['date'] = pd.to_datetime(df['date'], format="%d.%m.%Y, %H:%M Uhr")
df['date'] = df['date'].apply(lambda row: row.tz_localize("Europe/Berlin"))


In [None]:
# set ISO3166_1, ISO3166_2 fields
df['ISO3166_1'] = 'DE'
subdivisions = {k.name: k.code.replace("DE-", "") for k in pycountry.subdivisions.get(country_code="DE")}
df['ISO3166_2'] = df['state'].apply(lambda c: subdivisions.get(c))

# set Last_Update
df['Last_Update_Date'] = datetime.utcnow()

# set LAST_REPORTED_DATE
df['Last_Reported_Date'] = df['date'] == df['date'].max()

In [None]:
df.to_csv(output_folder + "RKI_GER_COVID19_DASHBOARD.csv", index=False)