In [1]:
import atexit
import io
import os

In [2]:
import pandas as pd
import requests
from apscheduler.schedulers.background import BackgroundScheduler
from flask import Flask, jsonify, request
from flask_cors import CORS
from pushover import Client

In [3]:
last_updated = None
data = None

In [4]:
QUERY_DATE_FORM = "%Y-%m-%d"
UPDATE_DATE_FORM = "%d.%m.%Y, %H:%M Uhr"

In [5]:
DATE_COLUMNS = ["Meldedatum", "Refdatum", "Datenstand"]
SPLIT_OPTIONS = dict.fromkeys(["age", "gender"])
UMLAUTS = {'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'Ã¼': 'ue', 'ß': 'ss'}
TRANSLATE = {'age': 'Altersgruppe', 'gender': 'Geschlecht', 'time': 'Refdatum'}

In [6]:
# Setup pushover notifications
PushoverClient = Client("udy7tnmyfckqpwgos1cucuu723526x",
                        api_token="apv2jsrcmdjfowj2xa3anwgbgac8mm")

### Helper

In [7]:
def clean_string(s):
    s = s.lower()
    for new, initial in UMLAUTS.items():
        s = s.replace(new, initial)
    return s

### Update

In [8]:
def update():

    error = {"status": False, "message": ""}

    # Request .csv file
    def request_file():
        URL = "https://www.arcgis.com/sharing/rest/content/items/f10774f1c63e40168479a1feb6c7ca74/data"
        global last_updated

        try:
            r = requests.get(URL)

            # Check status status_code
            print("Request status code", r.status_code)
            if r.status_code != requests.codes.ok:
                error = {"status": True,
                         "message": f"Request status code {r.status_code}."}
                return r, error

            error = {"status": False, "message": "File retrieved"}
            return r, error
        except Exception as e:
            error = {"status": True, "message": "Error retrieving file"}
            PushoverClient.send_message(
                f"{error['message']} + {e}", title="Covid")
            print(f"{error['message']} + {e}")
            return False, error

    r, error = request_file()

    # Return on error
    if error["status"]:
        return error["message"]

    try:
        # Convert to dataframe
        try:
            encoding = "utf-8"
            df = pd.read_csv(io.StringIO(r.content.decode(
                encoding)), parse_dates=DATE_COLUMNS)
        except UnicodeDecodeError as e:
            encoding = "windows-1252"
            df = pd.read_csv(io.StringIO(r.content.decode(
                encoding)), parse_dates=DATE_COLUMNS)

        # Set set_index
        assert pd.Series(df["FID"]).is_unique == True
        df.set_index("FID", inplace=True)

        # Date formatting
        df["Datenstand"] = pd.to_datetime(
            df["Datenstand"], format=UPDATE_DATE_FORM)

        # Localize
        for c in DATE_COLUMNS:
            df[c] = df[c].dt.tz_localize(None)

        # Encode regions
        for c in ["Bundesland", "Landkreis"]:
            df[c].replace(UMLAUTS, regex=True, inplace=True)
            df[c] = df[c].str.replace(' ', '-')
            df[c] = df[c].str.lower()

        # Clean age range
        df["Altersgruppe"] = df["Altersgruppe"].str.replace("A", "")

        global data, last_updated
        data = df
        last_updated = pd.to_datetime(
            data["Datenstand"].max(), format=QUERY_DATE_FORM)
        data.to_csv("data/" + last_updated.strftime(QUERY_DATE_FORM) + ".csv")
        for split_option in list(SPLIT_OPTIONS.keys()):
            SPLIT_OPTIONS[split_option] = data[TRANSLATE[split_option]]

    except Exception as e:
        error = {"status": True, "message": "Error while cleaning data"}
        PushoverClient.send_message(f"{error['message']} + {e}", title="Covid")
        return f"{error['message']} + {e}"

    message = f"Data loaded from RKI (encoded: {encoding}, last update: {last_updated.strftime(QUERY_DATE_FORM)})"
    PushoverClient.send_message(message, title="Covid")
    return message

In [9]:
update()

Request status code 200


'Data loaded from RKI (encoded: utf-8, last update: 2020-05-06)'

In [10]:
data

Unnamed: 0_level_0,IdBundesland,Bundesland,Landkreis,Altersgruppe,Geschlecht,AnzahlFall,AnzahlTodesfall,Meldedatum,IdLandkreis,Datenstand,NeuerFall,NeuerTodesfall,Refdatum,NeuGenesen,AnzahlGenesen,IstErkrankungsbeginn,Altersgruppe2
FID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
5152083,1,schleswig-holstein,sk-flensburg,15-34,M,1,0,2020-03-14,1001,2020-05-06,0,-9,2020-03-16,0,1,1,Nicht übermittelt
5152084,1,schleswig-holstein,sk-flensburg,15-34,M,1,0,2020-03-19,1001,2020-05-06,0,-9,2020-03-13,0,1,1,Nicht übermittelt
5152085,1,schleswig-holstein,sk-flensburg,15-34,M,1,0,2020-03-19,1001,2020-05-06,0,-9,2020-03-16,0,1,1,Nicht übermittelt
5152086,1,schleswig-holstein,sk-flensburg,15-34,M,1,0,2020-03-21,1001,2020-05-06,0,-9,2020-03-13,0,1,1,Nicht übermittelt
5152087,1,schleswig-holstein,sk-flensburg,15-34,M,1,0,2020-03-27,1001,2020-05-06,0,-9,2020-03-22,0,1,1,Nicht übermittelt
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5280707,16,thueringen,lk-altenburger-land,60-79,W,1,0,2020-04-05,16077,2020-05-06,0,-9,2020-03-31,0,1,1,Nicht übermittelt
5280708,16,thueringen,lk-altenburger-land,60-79,W,1,0,2020-04-16,16077,2020-05-06,0,-9,2020-04-15,-9,0,1,Nicht übermittelt
5280709,16,thueringen,lk-altenburger-land,60-79,W,1,0,2020-04-22,16077,2020-05-06,0,-9,2020-04-17,0,1,1,Nicht übermittelt
5280710,16,thueringen,lk-altenburger-land,80+,M,1,1,2020-03-24,16077,2020-05-06,0,0,2020-03-16,-9,0,1,Nicht übermittelt


In [19]:
data[(data["Bundesland"] == "baden-wuerttemberg") & (data["Landkreis"].str.contains("stuttgart"))]

Unnamed: 0_level_0,IdBundesland,Bundesland,Landkreis,Altersgruppe,Geschlecht,AnzahlFall,AnzahlTodesfall,Meldedatum,IdLandkreis,Datenstand,NeuerFall,NeuerTodesfall,Refdatum,NeuGenesen,AnzahlGenesen,IstErkrankungsbeginn,Altersgruppe2
FID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
5206163,8,baden-wuerttemberg,sk-stuttgart,00-04,M,1,0,2020-03-14,8111,2020-05-06,0,-9,2020-03-14,0,1,0,Nicht übermittelt
5206164,8,baden-wuerttemberg,sk-stuttgart,00-04,M,1,0,2020-03-20,8111,2020-05-06,0,-9,2020-03-17,0,1,1,Nicht übermittelt
5206165,8,baden-wuerttemberg,sk-stuttgart,00-04,M,2,0,2020-03-24,8111,2020-05-06,0,-9,2020-03-24,0,2,0,Nicht übermittelt
5206166,8,baden-wuerttemberg,sk-stuttgart,00-04,M,1,0,2020-04-23,8111,2020-05-06,0,-9,2020-04-23,-9,0,0,Nicht übermittelt
5206167,8,baden-wuerttemberg,sk-stuttgart,00-04,M,1,0,2020-04-29,8111,2020-05-06,0,-9,2020-04-29,-9,0,0,Nicht übermittelt
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5207146,8,baden-wuerttemberg,sk-stuttgart,80+,W,1,0,2020-04-21,8111,2020-05-06,0,-9,2020-03-26,0,1,1,Nicht übermittelt
5207147,8,baden-wuerttemberg,sk-stuttgart,80+,W,1,0,2020-04-21,8111,2020-05-06,0,-9,2020-04-21,-9,0,0,Nicht übermittelt
5207148,8,baden-wuerttemberg,sk-stuttgart,80+,W,1,0,2020-04-26,8111,2020-05-06,0,-9,2020-04-26,-9,0,0,Nicht übermittelt
5207149,8,baden-wuerttemberg,sk-stuttgart,unbekannt,M,1,0,2020-03-16,8111,2020-05-06,0,-9,2020-03-10,0,1,1,Nicht übermittelt
