# Robert Koch-Institut: COVID-19-Dashboard
 Data from https://experience.arcgis.com/experience/478220a4c454480e823b17327b2bf1d4 dashboard using the arcgis REST API.

In [1]:
import pandas as pd
import requests
import json
from datetime import datetime
import re

In [2]:
# papermill parameters
output_folder = "../output/"

### Get record count, set query parameters

In [3]:
endpoint = "https://services7.arcgis.com/mOBPykOjAyBO2ZKk/ArcGIS/rest/services/RKI_Landkreisdaten/FeatureServer/0/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=false&returnCentroid=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson"

# send get request to endpoint
response = requests.get(endpoint + "&returnCountOnly=true")

# check request status is HTTP_OK
assert response.status_code is 200

# load to json
data = json.loads(response.text)

# set params
record_count = data['count']
offset = 0
record_fetch_count = 2000
fields = None
records = []
print(record_count)

412


### Retrieve data

In [4]:
while offset < record_count:
    response = requests.get(endpoint + f"&resultRecordCount={record_fetch_count}&resultOffset={offset}")
    
    # check if request status code is HTTP_OK 
    assert response.status_code is 200
    
    # load to json
    data = json.loads(response.text)
    
    # set fields
    if not fields:
        fields = list(map(lambda f: f['name'],data['fields']))
    
    data = list(map(lambda record: record['attributes'].values(), data['features']))
    
    # add to records
    records += data
    
    # set params
    offset += record_fetch_count
    
assert len(records) == record_count
records = dict(zip(range(len(records)), records)) 
df = pd.DataFrame.from_dict(records, orient="index", columns=fields)

### Cleanse data

In [5]:
column_name_map = {
    "RS": "district_id",  # rename this field
    "county": "county",
    "GEN": "GEN",  # rename this field
    "BL_ID": "state_id",
    "BL": "state",
    "BEZ": "BEZ",  # rename this field
    "EWZ": "population",
    "death_rate": "death_rate",
    "cases": "cases",
    "deaths": "deaths",
    "cases_per_100k": "cases_per_100k",
    "cases_per_population": "cases_per_population",
    "cases7_per_100k": "cases7_per_100k",
    "last_update": "last_update",   
}
df.rename(columns=column_name_map, inplace=True)
df.set_index("district_id")
df = df.loc[:, tuple(column_name_map.values())]

# last_update -> epoch
def resolve_last_update(row):
    date_regex = re.search(r'(\d{2})\.(\d{2})\.(\d{4}), (\d{2}):(\d{2})', row)
    return int(datetime.fromisoformat(f'{date_regex.group(3)}-{date_regex.group(2)}-{date_regex.group(1)}T{date_regex.group(4)}:{date_regex.group(5)}+02:00').timestamp()) * 1000

df['last_update'] = list(map(lambda row: resolve_last_update(row), df['last_update']))

In [6]:
df.dtypes  # leave dataframe dtype as "object" (string as dtype is not supported in current version of pandas)

district_id              object
county                   object
GEN                      object
state_id                 object
state                    object
BEZ                      object
population                int64
death_rate              float64
cases                     int64
deaths                    int64
cases_per_100k          float64
cases_per_population    float64
cases7_per_100k         float64
last_update               int64
dtype: object

In [7]:
df

Unnamed: 0,district_id,county,GEN,state_id,state,BEZ,population,death_rate,cases,deaths,cases_per_100k,cases_per_population,cases7_per_100k,last_update
0,01001,SK Flensburg,Flensburg,1,Schleswig-Holstein,Kreisfreie Stadt,89504,3.571429,28,1,31.283518,0.031284,6.703611,1586124000000
1,01002,SK Kiel,Kiel,1,Schleswig-Holstein,Kreisfreie Stadt,247548,0.609756,164,1,66.249778,0.066250,21.006027,1586124000000
2,01003,SK Lübeck,Lübeck,1,Schleswig-Holstein,Kreisfreie Stadt,217198,0.990099,101,1,46.501349,0.046501,7.826960,1586124000000
3,01004,SK Neumünster,Neumünster,1,Schleswig-Holstein,Kreisfreie Stadt,79487,0.000000,32,0,40.258155,0.040258,21.387145,1586124000000
4,01051,LK Dithmarschen,Dithmarschen,1,Schleswig-Holstein,Kreis,133210,2.500000,40,1,30.027776,0.030028,12.761805,1586124000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
407,11005,SK Berlin Spandau,Berlin Spandau,11,Berlin,Kreisfreie Stadt,243977,2.040816,147,3,60.251581,0.060252,22.952983,1586124000000
408,11006,SK Berlin Steglitz-Zehlendorf,Berlin Steglitz-Zehlendorf,11,Berlin,Kreisfreie Stadt,308697,0.000000,308,0,99.774212,0.099774,36.605474,1586124000000
409,11001,SK Berlin Mitte,Berlin Mitte,11,Berlin,Kreisfreie Stadt,384172,0.000000,537,0,139.781140,0.139781,35.921410,1586124000000
410,11002,SK Berlin Friedrichshain-Kreuzberg,Berlin Friedrichshain-Kreuzberg,11,Berlin,Kreisfreie Stadt,289762,0.852273,352,3,121.479007,0.121479,31.750195,1586124000000


In [8]:
df.to_csv(output_folder + "RKI_GER_COVID19_DASHBOARD.csv", index=False)