In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# Preprocessing for time stamped Corona Cases on german county level

## Load Dataset from Robert-Koch-Institut with included time stamps.

In [3]:
df_time = pd.read_csv('data/df_time.csv')
df_time.head()

Unnamed: 0,IdBundesland,Bundesland,Landkreis,Altersgruppe,Geschlecht,AnzahlFall,AnzahlTodesfall,ObjectId,Meldedatum,IdLandkreis,NUTS3,Flaeche,Bevoelkerung,maennlich,weiblich,Dichte
0,15,Sachsen-Anhalt,SK Magdeburg,A35-A59,M,2,0,154936,2020-03-18T00:00:00.000Z,15003,,,,,,
1,15,Sachsen-Anhalt,SK Magdeburg,A35-A59,W,1,0,154937,2020-03-12T00:00:00.000Z,15003,,,,,,
2,15,Sachsen-Anhalt,SK Magdeburg,A35-A59,W,1,0,154938,2020-03-17T00:00:00.000Z,15003,,,,,,
3,15,Sachsen-Anhalt,SK Magdeburg,A60-A79,M,1,0,154939,2020-03-10T00:00:00.000Z,15003,,,,,,
4,15,Sachsen-Anhalt,SK Magdeburg,A60-A79,M,1,0,154940,2020-03-19T00:00:00.000Z,15003,,,,,,


In [109]:
df_time = df_time[df_time['IdLandkreis']!='0-1']

In [110]:
df_time.shape

(8230, 10)

### Load and merge data from other RKI_Corona_Landkreise.csv

In [111]:
nuts = pd.read_csv('data/RKI_Corona_Landkreise.csv')
nuts = nuts.filter(['county', 'NUTS'])

In [112]:
df_time = pd.merge(left=df_time, right=nuts, how='left', left_on='Landkreis', right_on='county').drop(columns=['county'])
df_time.head()

Unnamed: 0,IdBundesland,Bundesland,Landkreis,Altersgruppe,Geschlecht,AnzahlFall,AnzahlTodesfall,ObjectId,Meldedatum,IdLandkreis,NUTS
0,15,Sachsen-Anhalt,SK Magdeburg,A35-A59,M,2,0,154936,2020-03-18T00:00:00.000Z,15003,DEE03
1,15,Sachsen-Anhalt,SK Magdeburg,A35-A59,W,1,0,154937,2020-03-12T00:00:00.000Z,15003,DEE03
2,15,Sachsen-Anhalt,SK Magdeburg,A35-A59,W,1,0,154938,2020-03-17T00:00:00.000Z,15003,DEE03
3,15,Sachsen-Anhalt,SK Magdeburg,A60-A79,M,1,0,154939,2020-03-10T00:00:00.000Z,15003,DEE03
4,15,Sachsen-Anhalt,SK Magdeburg,A60-A79,M,1,0,154940,2020-03-19T00:00:00.000Z,15003,DEE03


### Load and merge data about population numbers, sex and density

In [113]:
BV = pd.read_csv('data/Landkreise1.csv', encoding = "utf-8", sep=';').drop(columns=['LandkreisId'])
BV.head()

Unnamed: 0,NUTS3,Flaeche,Bevoelkerung,maennlich,weiblich,Dichte
0,DEF01,5673,89 504,44 599,44 905,1 578
1,DEF02,11865,247 548,120 566,126 982,2 086
2,DEF03,21419,217 198,104 371,112 827,1 014
3,DEF04,7166,79 487,39 241,40 246,1 109
4,DEF05,"1 428,17",133 210,65 720,67 490,93


In [114]:
BV['NUTS3'].unique().shape

(245,)

In [115]:
df_time['NUTS'].unique().shape

(394,)

In [116]:
df_time = pd.merge(left=df_time, right=BV, how='left', left_on='NUTS', right_on='NUTS3').drop(columns=['NUTS'])
df_time.head()

Unnamed: 0,IdBundesland,Bundesland,Landkreis,Altersgruppe,Geschlecht,AnzahlFall,AnzahlTodesfall,ObjectId,Meldedatum,IdLandkreis,NUTS3,Flaeche,Bevoelkerung,maennlich,weiblich,Dichte
0,15,Sachsen-Anhalt,SK Magdeburg,A35-A59,M,2,0,154936,2020-03-18T00:00:00.000Z,15003,,,,,,
1,15,Sachsen-Anhalt,SK Magdeburg,A35-A59,W,1,0,154937,2020-03-12T00:00:00.000Z,15003,,,,,,
2,15,Sachsen-Anhalt,SK Magdeburg,A35-A59,W,1,0,154938,2020-03-17T00:00:00.000Z,15003,,,,,,
3,15,Sachsen-Anhalt,SK Magdeburg,A60-A79,M,1,0,154939,2020-03-10T00:00:00.000Z,15003,,,,,,
4,15,Sachsen-Anhalt,SK Magdeburg,A60-A79,M,1,0,154940,2020-03-19T00:00:00.000Z,15003,,,,,,


In [None]:
df_time.to_csv('data/df_time.csv')