In [1]:
import numpy as np
import pandas as pd

import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

# %matplotlib inline

# Load Dataframes from CSV

[Download here.](https://npgeo-corona-npgeo-de.hub.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0/data)

In [2]:
rki_covid = pd.read_csv('/Users/nrahaman/Downloads/RKI_COVID19.csv')

# Add Geoinformation to RKI Data

[Download here.](https://public.opendatasoft.com/explore/dataset/landkreise-in-germany/api/)

In [3]:
geodata_bundesland = pd.read_csv('/Users/nrahaman/Downloads/landkreise-in-germany.csv', sep=';')

## Match Districts

The districts are identified by `IdLandkreis` in RKI Data and by `Cca 2` in opendatasoft's data. In what follows, we match the two but by fixing mismatches. 

In [4]:
geo_districts = geodata_bundesland[['Geo Point', 'Cca 2', 'Geo Shape']].to_dict()

latitude_district_ids = {float(geo_districts['Cca 2'][idx]): float(geo_districts['Geo Point'][idx].split(',')[0]) 
                         for idx in range(len(geodata_bundesland))}

longitude_district_ids = {float(geo_districts['Cca 2'][idx]): float(geo_districts['Geo Point'][idx].split(',')[1]) 
                          for idx in range(len(geodata_bundesland))}

polygons_district_ids = {float(geo_districts['Cca 2'][idx]): geo_districts['Geo Shape'][idx]
                          for idx in range(len(geodata_bundesland))}

These mismatches are: 
* Berlin is assigned  `IdLandkreis` 11001-11013 in RKI Data where each number corresponds to a borough (Stadtkreis), whereas in opendatasoft data the entire city is filed under 11000. 
* Göttingen is mismatched for some reason I don't understand. It should be 3152 according to opendatasoft, but is 3159 in RKI data. 
* Some entries in RKI data do not specify a district ID, so we assign them to -1.  

In [5]:
# Add extra coordinates for Berlin
berlin_district_id = 11000.0
for delta_berlin_district in range(1, 13): 
    latitude_district_ids[berlin_district_id + delta_berlin_district] = latitude_district_ids[berlin_district_id]
    longitude_district_ids[berlin_district_id + delta_berlin_district] = longitude_district_ids[berlin_district_id]
    polygons_district_ids[berlin_district_id + delta_berlin_district] = polygons_district_ids[berlin_district_id]

# Remap Göttingen
gtgn_district_id_rki = 3159.
gtgn_district_id_geo = 3152.
latitude_district_ids[gtgn_district_id_rki] = latitude_district_ids[gtgn_district_id_geo]
longitude_district_ids[gtgn_district_id_rki] = longitude_district_ids[gtgn_district_id_geo]
polygons_district_ids[gtgn_district_id_rki] = polygons_district_ids[gtgn_district_id_geo]

In [6]:
mean_latitude = np.mean(list(latitude_district_ids.values()))
mean_longitude = np.mean(list(longitude_district_ids.values()))

With the data fixed, we can now do the assignments. 

In [7]:
district_ids = [(float(lkid) if lkid != '0-1' else -1.) for lkid in rki_covid['IdLandkreis']]

In [8]:
latitude_districts = [(latitude_district_ids[idx] if idx != -1 else mean_latitude) for idx in district_ids]
longitude_districts = [(longitude_district_ids[idx] if idx != -1 else mean_longitude) for idx in district_ids]
polygons_districts = [(polygons_district_ids[idx] if idx != -1 else mean_longitude) for idx in district_ids]

In [9]:
rki_covid['Landkreis Breitengrad'] = pd.Series(latitude_districts)
rki_covid['Landkreis Längengrad'] = pd.Series(longitude_districts)
rki_covid['Landkreis Geopolygonen'] = pd.Series(polygons_districts)

# Trim Data

In [10]:
small_rki_covid = rki_covid[['Landkreis', 'AnzahlFall', 'AnzahlTodesfall', 'Meldedatum', 'Landkreis Breitengrad', 'Landkreis Längengrad']]
cumulative_rki_covid = small_rki_covid.groupby(['Landkreis', 'Landkreis Breitengrad', 'Landkreis Längengrad'], as_index=False)[['AnzahlFall', 'AnzahlTodesfall']].sum()

# Plot Magnitudes

In [12]:
fig = px.scatter_mapbox(cumulative_rki_covid, lat='Landkreis Breitengrad', lon='Landkreis Längengrad', 
                        size='AnzahlFall', hover_name='Landkreis', color='AnzahlTodesfall',
                        mapbox_style='stamen-terrain')
fig.show()

TODO: The animation needs to be fixed. 

In [14]:
fig = px.scatter_mapbox(rki_covid, lat='Landkreis Breitengrad', lon='Landkreis Längengrad', 
                        size='AnzahlFall', hover_name='Landkreis', color='AnzahlTodesfall',
                        mapbox_style='open-street-map', animation_frame='Meldedatum')
fig.show()

# Tables

In [141]:
rki_covid

Unnamed: 0,IdBundesland,Bundesland,Landkreis,Altersgruppe,Geschlecht,AnzahlFall,AnzahlTodesfall,ObjectId,Meldedatum,IdLandkreis,Landkreis Breitengrad,Landkreis Längengrad,Landkreis Geopolygonen
0,15,Sachsen-Anhalt,SK Magdeburg,A35-A59,M,2,0,154936,2020-03-18T00:00:00.000Z,15003,52.116598,11.641482,"{""type"": ""Polygon"", ""coordinates"": [[[11.52090..."
1,15,Sachsen-Anhalt,SK Magdeburg,A35-A59,W,1,0,154937,2020-03-12T00:00:00.000Z,15003,52.116598,11.641482,"{""type"": ""Polygon"", ""coordinates"": [[[11.52090..."
2,15,Sachsen-Anhalt,SK Magdeburg,A35-A59,W,1,0,154938,2020-03-17T00:00:00.000Z,15003,52.116598,11.641482,"{""type"": ""Polygon"", ""coordinates"": [[[11.52090..."
3,15,Sachsen-Anhalt,SK Magdeburg,A60-A79,M,1,0,154939,2020-03-10T00:00:00.000Z,15003,52.116598,11.641482,"{""type"": ""Polygon"", ""coordinates"": [[[11.52090..."
4,15,Sachsen-Anhalt,SK Magdeburg,A60-A79,M,1,0,154940,2020-03-19T00:00:00.000Z,15003,52.116598,11.641482,"{""type"": ""Polygon"", ""coordinates"": [[[11.52090..."
5,15,Sachsen-Anhalt,SK Magdeburg,A60-A79,W,1,0,154941,2020-03-18T00:00:00.000Z,15003,52.116598,11.641482,"{""type"": ""Polygon"", ""coordinates"": [[[11.52090..."
6,15,Sachsen-Anhalt,SK Magdeburg,A80+,M,1,0,154942,2020-03-17T00:00:00.000Z,15003,52.116598,11.641482,"{""type"": ""Polygon"", ""coordinates"": [[[11.52090..."
7,15,Sachsen-Anhalt,SK Magdeburg,A80+,W,1,0,154943,2020-03-13T00:00:00.000Z,15003,52.116598,11.641482,"{""type"": ""Polygon"", ""coordinates"": [[[11.52090..."
8,15,Sachsen-Anhalt,SK Magdeburg,A80+,W,1,0,154944,2020-03-18T00:00:00.000Z,15003,52.116598,11.641482,"{""type"": ""Polygon"", ""coordinates"": [[[11.52090..."
9,15,Sachsen-Anhalt,LK Altmarkkreis Salzwedel,A15-A34,M,1,0,154945,2020-03-12T00:00:00.000Z,15081,52.680048,11.227045,"{""type"": ""Polygon"", ""coordinates"": [[[11.51008..."
