## Corona Cases Forecasting for Germany on a County Level
### Visualize data based on county:

In [None]:
import pandas as pd 
import geopandas as gpd
import descartes # to plot
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn')
%matplotlib inline

# autoreload packages
%load_ext autoreload
%autoreload 2
import utils.plotting as plotting
import utils.preprocessing as prep

#### Get Corona data:
RKI_Corona_Landkreise.csv:

GEN                   | NUTS  | cases | deaths | county
--------------------- | ----- | ----- | ------ | ----------
Flensburg             | DEF06 | 21    | 0      |  SK Flensburg <br>
Herzogtum Lauenburg   | DEF07 | 15    | 0      |  LK Herzogtum Lauenburg


df_time: 

Landkreis                 | AnzahlFall | AnzahlTodesfall | Meldedatum               | IdLandkreis | NUTS3  | Bevoelkerung
------------------------- | ---------- | --------------- | ------------------------ | ----------- | ------ | ------------
SK Magdeburg              | 2          | 0               | 2020-03-18T00:00:00.000Z | 15003       | NaN    | NaN
SK Magdeburg              | 1          | 0               | 2020-03-12T00:00:00.000Z | 15003       | NaN    | NaN
LK Altmarkkreis Salzwedel | 1          | 0               | 2020-03-12T00:00:00.000Z | 15081       | NaN    | NaN

In [None]:
#data = pd.read_csv("data/RKI_Corona_Landkreise.csv") 
df_time = pd.read_csv("data/df_time.csv") 
#df_time.head(2)
#data.describe()

##### Get additional information about the Counties:
e.g. population density, public transport information, gender distribution, age distribution

### Load county shape information

https://public.opendatasoft.com/explore/dataset/landkreise-in-germany/export/

most important: cca_2 (= county identifier) and geometry:

name_2                   | cca_2  | geometry
------------------------ | ------ | -------------------------------------------------
Freiburg im Breisgau     | 08311  | POLYGON ((7.790447235107478 47.96881103515624,...
Dillingen an der Donau   | 09773  | POLYGON ((10.6144847869873 48.75083541870145, ...


In [None]:
county_df = gpd.read_file('data/landkreise-in-germany.shp', encoding='utf-8', dtype={'cca_2': str})
# convert cca_2 to float for merge later
county_df = county_df.astype({'cca_2': 'float64'})
#county_df.head(2)

### Aggregate Information for different dates

In [None]:
# aggregate number of cases by until any date
df_time  = df_time.astype({'IdLandkreis': 'float64'})
df_agg = df_time.sort_values(['IdLandkreis','Meldedatum'],ascending=False)\
    .groupby(['IdLandkreis','Meldedatum'])\
    .agg({'AnzahlFall':sum})\
    .groupby('IdLandkreis').cumsum().reset_index()

### Merge Geometric and Corona Information and plot:

#### Plot for individual date:

In [None]:
#Plot for individual date:
date = "2020-03-20T00:00:00.000Z"
df_date = prep.cases_at_date(df_agg, date)
df = pd.merge(left=county_df , right= df_date, how='left', left_on='cca_2', right_on='IdLandkreis')
df['AnzahlFall'][df['AnzahlFall'].isna()] = 0 # set na values to zero
#df.head(3)

In [None]:
plotting.plot_map(df, "AnzahlFall")

#### Plot for all dates:

In [None]:
date_1   = df_time['Meldedatum'].min()
date_cur = df_time['Meldedatum'].max()

In [None]:
# plot for all dates:
import time
dates = df_time['Meldedatum'].sort_values().astype({'Meldedatum': 'str'}).unique()

plt.rcParams['figure.figsize'] = [16, 11]
for date in dates:
    df_date = prep.cases_at_date(df_agg,date)
    df = pd.merge(left=county_df , right= df_date, how='left', left_on='cca_2', right_on='IdLandkreis')
    df['AnzahlFall'][df['AnzahlFall'].isna()] = 0 # set na values to zero
    plotting.plot_map(df, "AnzahlFall")
    time.sleep(2)