# Covid-19 Analysis Germany
What we do
1. Import Data 
2. Do some Analysis

In [None]:

import requests
from requests_ntlm import HttpNtlmAuth

import json
import geojson as gj

# Standard plotly imports
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import iplot, init_notebook_mode
# Using plotly + cufflinks in offline mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

import numpy as np

import pandas as pd
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

from datetime import datetime



## Import Data
Data is imported from NPGEO Corona https://npgeo-corona-npgeo-de.hub.arcgis.com/datasets/917fc37a709542548cc3be077a786c17_0
    1. This is the service with an aggregated statistics per Landkreis and area boundaries https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/RKI_Landkreisdaten/FeatureServer/0/query?where=1%3D1&outFields=*&returnDistinctValues=true&outSR=4326&f=json
    2. This is the service for the case numbers per Landkreis on a detailed level without geometric data https://opendata.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0.geojson
    3. This is the service for the boundaries of the Bundesländer of 2019 without inhabitants
    https://opendata.arcgis.com/datasets/9ae4f23075d340adb6580a6d9603f9fa_0.geojson
    4. Inhabitants of Bundesländer https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/Coronaf%C3%A4lle_in_den_Bundesl%C3%A4ndern/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=false&outSR=4326&f=json

In [None]:
url1 = 'https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/RKI_Landkreisdaten/FeatureServer/0/query?where=1%3D1&outFields=*&returnDistinctValues=true&outSR=4326&f=json'
r1 = requests.get(url1)
jsonData1 = r1.json()
r1.status_code

In [None]:
url2 = 'https://opendata.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0.geojson'
r2 = requests.get(url2)
jsonData2 = r2.json()
r2.status_code

In [None]:
url3 = 'https://opendata.arcgis.com/datasets/9ae4f23075d340adb6580a6d9603f9fa_0.geojson'
r3 = requests.get(url3)
jsonData3 = r3.json()
r3.status_code

In [None]:
url4 = 'https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/Coronaf%C3%A4lle_in_den_Bundesl%C3%A4ndern/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=false&outSR=4326&f=json'
r4 = requests.get(url4)
jsonData4 = r4.json()
r4.status_code

In [None]:
jsonData1['features'][0]['attributes'].keys()

In [None]:
jsonData1['features'][0].keys()

In [None]:
jsonData1['features'][0]['geometry'].keys()

In [None]:
jsonData2['features'][0]['properties'].keys()

In [None]:
jsonData3['features'][0].keys()

In [None]:
jsonData3['features'][0]['properties'].keys()

In [None]:
jsonData4['features'][0]['attributes'].keys()

In [None]:
dataKeys = list(jsonData2['features'][0]['properties'].keys())
all = (list (map (lambda y: (list(map (lambda x: y['properties'][x], dataKeys))), jsonData2['features'])))
df = pd.DataFrame(all, columns=dataKeys)
df.set_index('IdLandkreis', inplace = True)
df.sort_values(by=['Altersgruppe', 'Geschlecht', 'IdLandkreis', 'Meldedatum', ], inplace = True)
#df.to_excel("c:/tmp/output.xlsx")  
indexN = df['NeuerFall'] >= 0
dfC= df[indexN]
indexX = df['NeuerTodesfall'] >= 0
dfX = df[indexX]
print("Total Cases: " + str(dfC['AnzahlFall'].sum()))
print("Total Deaths: " + str(dfX['AnzahlTodesfall'].sum()))

In [None]:
countiesGermanyGeojson = gj.FeatureCollection (list (map (lambda feature: gj.Feature(id = feature['attributes']['RS'], geometry = 
               gj.Polygon(feature['geometry']['rings'])), jsonData1['features'])))

#with open('germany.json', 'w') as outfile:
#    json.dump(countiesGermanyGeojson, outfile)
    
objectIds = list(map(lambda x: x['attributes']['RS'], jsonData1['features']))
counties = list(map(lambda x: x['attributes']['county'], jsonData1['features']))
cases = list(map(lambda x: x['attributes']['cases'], jsonData1['features']))
deaths = list(map(lambda x: x['attributes']['deaths'], jsonData1['features']))
casesPer100k = list(map(lambda x: x['attributes']['cases_per_100k'], jsonData1['features']))
dfCases = pd.DataFrame.from_dict ({'lkid': objectIds, 'cases_per_100k' : casesPer100k, 'Counties': counties,
                                  'Cases' : cases, 'Deaths': deaths})
fig = px.choropleth_mapbox(dfCases, geojson=countiesGermanyGeojson, locations='lkid', color='cases_per_100k',
                           #color_continuous_scale="Blues",
                           color_continuous_scale=["white", "blue"],
                           mapbox_style="carto-positron", zoom = 5, center= {"lat": 51.0, "lon": 10.5},
                           opacity = 0.5,
                           hover_name="Counties",
                           hover_data =["Cases", "Deaths"],
                           labels={'cases_per_100k':'Cases per 100k'},
                           width = 800,
                           height = 800
                          )
fig.update_layout(margin={"r":0,"t":100,"l":0,"b":0}, title=('Covid-19 Cases in Germany as of ' + str(jsonData1['features'][0]['attributes']['last_update'])[0:10] + 
                                ': Cases: ' + str(dfCases.loc[:, "Cases"].sum()) + ', Deaths: ' + str(dfCases.loc[:, "Deaths"].sum())))            
fig.show()

In [None]:
cases7Per100k = list(map(lambda x: x['attributes']['cases7_per_100k'], jsonData1['features']))
countyCountZeroInfectionsLast7Days = cases7Per100k.count(0)
print (str(countyCountZeroInfectionsLast7Days) + ' counties with zero infections in the last 7 days marked in green')
dfCases = pd.DataFrame.from_dict ({'lkid': objectIds, 'cases_per_100k' : casesPer100k, 'cases7_per_100k' : cases7Per100k, 'Counties': counties,
                                  'Cases' : cases, 'Deaths': deaths})
fig = px.choropleth_mapbox(dfCases, geojson=countiesGermanyGeojson, locations='lkid', color='cases7_per_100k',
                           color_continuous_scale=[[0.0, "green"], [10**(-6), "green"],[10**(-6), "white"], [0.5, "blue"], [0.5, "red"], [1.0, "red"]],
                           color_continuous_midpoint = 50,
                           #color_continuous_scale=["white", "blue"],
                           mapbox_style="carto-positron", 
                           center= {"lat": 51.0, "lon": 10.5},
                           opacity = 1.0,
                           zoom = 5,
                           hover_name="Counties",
                           hover_data =["Cases", "Deaths", 'cases_per_100k'],
                           labels={'cases_per_100k':'Cases per 100k', 'cases7_per_100k' : 'New Cases in the last seven 7 days per 100k'},
                           width = 800,
                           height = 800
                          )
fig.update_layout(margin={"r":0,"t":100,"l":0,"b":0}, title=('New Covid-19 Cases in the last 7 das Germany per County and 100k Inhabitants as of ' + str(jsonData1['features'][0]['attributes']['last_update'])[0:10] + 
                                ': Cases: ' + str(dfCases.loc[:, "Cases"].sum()) + ', Deaths: ' + str(dfCases.loc[:, "Deaths"].sum())))            
fig.show()

In [None]:

#with open('countries.json', 'w') as outfile:
#    json.dump(jsonData3, outfile)

for feature in jsonData3['features']:
    feature ['id'] = feature['properties']['FID'] 



casesBundesland = dfC.groupby('IdBundesland')['AnzahlFall'].sum().sort_index()
deathsBundesland = dfX.groupby('IdBundesland')['AnzahlTodesfall'].sum().sort_index()
namesBundesland = dfC.groupby('IdBundesland')['Bundesland'].first().sort_index()

objectIds = list(map(lambda x: x['attributes']['OBJECTID_1'], jsonData4['features']))
inhabitants = list(map(lambda x: x['attributes']['LAN_ew_EWZ'], jsonData4['features']))
inhabitantsBundesland = pd.Series(inhabitants, index=objectIds).sort_index()
casesPer100kBundesland = casesBundesland / inhabitantsBundesland * 100000

dfCasesBundesland = pd.DataFrame.from_dict ({'IdBundesland': casesBundesland.index, 'Bundesland' : namesBundesland,
                                             'Cases per 100k' : casesPer100kBundesland,
                                             'Cases' : casesBundesland, 'Deaths': deathsBundesland})

fig = px.choropleth_mapbox(dfCasesBundesland, geojson=jsonData3, locations='IdBundesland', color='Cases per 100k',
                           #color_continuous_scale="Blues",
                           color_continuous_scale=["white", "blue"],
                           mapbox_style="carto-positron", zoom = 5, center= {"lat": 51.0, "lon": 10.5},
                           opacity = 0.5,
                           hover_name="Bundesland",
                           hover_data =["Cases", "Deaths"],
                           width = 800,
                           height = 800
                          )
fig.update_layout(margin={"r":0,"t":100,"l":0,"b":0}, title=('Covid-19 Cases in Germany as of ' + str(jsonData1['features'][0]['attributes']['last_update'])[0:10] + 
                                ': Cases: ' + str(dfCases.loc[:, "Cases"].sum()) + ', Deaths: ' + str(dfCases.loc[:, "Deaths"].sum())))            
fig.show()

In [None]:
fig = px.histogram(dfX, x='Altersgruppe', y='AnzahlTodesfall', color='Geschlecht', histfunc='sum')
fig.update_layout(title=('Covid-19 Deaths (Total: ' + str(dfX.loc[:,'AnzahlTodesfall'].sum()) + ') in Germany as of ' + dfX.loc[:, 'Meldedatum'].max()[0:10] + 
                         ' by Sex'))       
fig.show()

In [None]:
county = '08416'
dfCCounty = df.loc[county]
fig = px.histogram(dfCCounty, x='Altersgruppe', y='AnzahlFall', color='Geschlecht', histfunc="sum")
fig.update_layout(title=('Covid-19 Cases (Total: ' + str(dfCCounty.loc[:,'AnzahlFall'].sum()) + ') in ' +
                         str(dfCCounty['Landkreis'][0]) + 
                         ' as of ' + dfC.loc[:, 'Meldedatum'].max()[0:10] + 
                         ' by Sex'))   
fig.show()

In [None]:
fig = px.histogram(dfC, x='Altersgruppe', y='AnzahlFall', color='Geschlecht', histfunc="sum")
fig.update_layout(title=('Covid-19 Cases (Total: ' + str(dfC.loc[:,'AnzahlFall'].sum()) + ') in Germany as of ' + dfC.loc[:, 'Meldedatum'].max()[0:10] + 
                         ' by Sex'))   
fig.show()

In [None]:
fig = px.histogram(dfC, x='Bundesland', y='AnzahlFall', color='Altersgruppe', histfunc="sum").update_xaxes(categoryorder="total descending")
fig.update_layout(title=('Covid-19 Cases (Total: ' + str(dfC.loc[:,'AnzahlFall'].sum()) + ') in Germany as of ' + dfC.loc[:, 'Meldedatum'].max()[0:10] + 
                         ' by Bundesland and Age'))   
fig.show()

In [None]:
fig = px.histogram(dfX, x='Bundesland', y='AnzahlTodesfall', color='Altersgruppe', histfunc="sum").update_xaxes(categoryorder="total descending")
fig.update_layout(title=('Covid-19 Deaths (Total: ' + str(dfX.loc[:,'AnzahlTodesfall'].sum()) + ') in Germany as of ' + dfX.loc[:, 'Meldedatum'].max()[0:10] + 
                         ' by Bundesland and Age'))   
fig.show()

In [None]:
fig = px.histogram(dfX, x='Geschlecht', y='AnzahlTodesfall', histfunc="sum")
fig.update_layout(title=('Covid-19 Deaths (Total: ' + str(dfX.loc[:,'AnzahlTodesfall'].sum()) + ') in Germany as of ' + dfX.loc[:, 'Meldedatum'].max()[0:10] + 
                         ' by Sex'))   
fig.show()

In [None]:
dfC.sort_values(by=['Meldedatum', 'Altersgruppe'], inplace = True)
fig = px.histogram(dfC, x='Meldedatum', y='AnzahlFall', color='Altersgruppe', histfunc="sum")
fig.update_layout(title=('Covid-19 Cases (Total: ' + str(dfC.loc[:,'AnzahlFall'].sum()) + ') in Germany as of ' + dfC.loc[:, 'Meldedatum'].max()[0:10] + 
                         ' by Date and Age'))   
fig.show()

In [None]:
dfC.sort_values(by=['Meldedatum', 'Altersgruppe'], inplace = True)
fig = px.histogram(dfC, x='Meldedatum', y='AnzahlFall', color='NeuerFall', histfunc="sum")
fig.update_layout(title=('Covid-19 Cases (Total: ' + str(dfC.loc[:,'AnzahlFall'].sum()) + ') in Germany as of ' + dfC.loc[:, 'Meldedatum'].max()[0:10] + 
                         ' by new cases vs.existing cases'))   
fig.show()

In [None]:
rollingAvg = 7
dfByDate = dfC.set_index('Meldedatum')
dfByDate.sort_values(by=['Meldedatum'], inplace = True)
totalPerDay = dfByDate.groupby(['Meldedatum'])['AnzahlFall'].sum()
total = totalPerDay.cumsum()
factor = 1 + totalPerDay / total
growthFactor = factor.rolling(rollingAvg).mean()
timeToDouble = np.log(2) / np.log(growthFactor)
timeToDoubleRaw = np.log(2) / np.log(factor)
fig = px.line(x = timeToDouble.keys(), y=timeToDouble, range_x=['2020-03-01', datetime.now().strftime('%Y-%m-%d')])
fig.update_layout(title=('Covid-19 Time to Double Rolling Average' +
                         str(rollingAvg) +' Days in Germany as of ' + 
                         dfX.loc[:, 'Meldedatum'].max()[0:10])) 
fig.show()

In [None]:
fig = px.line(x = timeToDouble.keys(), y=timeToDoubleRaw, range_x=['2020-03-01', datetime.now().strftime('%Y-%m-%d')])
fig.update_layout(title=('Covid-19 Time to Double Raw in Germany as of ' + 
                         dfX.loc[:, 'Meldedatum'].max()[0:10])) 
fig.show()

In [None]:
fig = px.line(x = growthFactor.keys(), y=growthFactor, range_x=['2020-03-01', datetime.now().strftime('%Y-%m-%d')])
fig.update_layout(title=('Covid-19 Growth Factor in Germany as of ' + 
                         dfX.loc[:, 'Meldedatum'].max()[0:10])) 
fig.show()

In [None]:
fig = px.line(x = total.keys(), y=total)
fig.update_layout(title=('Covid-19 Cases (Total: ' + str(dfC.loc[:,'AnzahlFall'].sum()) + ') in Germany as of ' + 
                         dfX.loc[:, 'Meldedatum'].max()[0:10])) 
fig.show()

In [None]:
dfX.sort_values(by=['Meldedatum'], inplace = True)
fig = px.histogram(dfX, x='Meldedatum', y='AnzahlTodesfall',  color='Altersgruppe', histfunc="sum")
fig.update_layout(title=('Covid-19 Deaths (Total: ' + str(dfX.loc[:,'AnzahlTodesfall'].sum()) + ') in Germany as of ' + dfX.loc[:, 'Meldedatum'].max()[0:10] + 
                         ' by Date and Age')) 
fig.show()