In [53]:
import os
import descartes
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn')

dir = "."
dataDir = os.path.abspath(dir)

In [54]:
#Read CoVerage data lake
df_coverage = pd.read_csv(dataDir + "/TestLake.csv", delimiter=",")
df_coverage.head()

Unnamed: 0,id (S),timestamp (S),coronaVirus (N),coughing (N),diarrhea (N),gender (N),generalHealth (N),headache (N),insomnia (N),limbPain (N),loneliness (N),numberOfContacts (N),postalCode (S),runnyNose (N),soreThroat (N),temperature (N),userId (S),yearOfBirth (N)
0,002-81-8444,1584808212496,2,2,1,2,2,1,2,1,1,486,82278,3,2,2,Escher,2001
1,003-66-7460,1584808207788,3,1,1,1,5,2,5,2,4,39,31100,2,2,2,Pollock,1971
2,011-71-5874,1584808207109,1,2,2,1,4,2,4,2,4,129,65623,2,1,2,Seurat,1959
3,022-09-5822,1584808219209,3,2,1,2,2,2,5,3,5,208,76623,2,2,1,Rubens,1986
4,037-19-9715,1584808219473,2,2,2,2,3,2,3,2,2,888,51846,1,2,2,Titian,2000


In [55]:
# Read PLZ polygons
# Make sure you read postal codes as strings, otherwise 
# the postal code 01110 will be parsed as the number 1110. 
plz_shape_df = gpd.read_file('plz-gebiete/plz-gebiete.shp', dtype={'plz': str})
plz_shape_df.head()



Unnamed: 0,plz,note,geometry
0,52538,"52538 Gangelt, Selfkant","POLYGON ((5.86632 51.05110, 5.86692 51.05124, ..."
1,47559,47559 Kranenburg,"POLYGON ((5.94504 51.82354, 5.94580 51.82409, ..."
2,52525,"52525 Waldfeucht, Heinsberg","POLYGON ((5.96811 51.05556, 5.96951 51.05660, ..."
3,52074,52074 Aachen,"POLYGON ((5.97486 50.79804, 5.97495 50.79809, ..."
4,52531,52531 Ãbach-Palenberg,"POLYGON ((6.01507 50.94788, 6.03854 50.93561, ..."


In [56]:
#Init plot parameter
plt.rcParams['figure.figsize'] = [16, 11]

In [57]:
# Define cities to plot 
top_cities = {
    'Berlin': (13.404954, 52.520008), 
    'Cologne': (6.953101, 50.935173),
    'Düsseldorf': (6.782048, 51.227144),
    'Frankfurt am Main': (8.682127, 50.110924),
    'Hamburg': (9.993682, 53.551086),
    'Leipzig': (12.387772, 51.343479),
    'Munich': (11.576124, 48.137154),
    'Dortmund': (7.468554, 51.513400),
    'Stuttgart': (9.181332, 48.777128),
    'Nuremberg': (11.077438, 49.449820),
    'Hannover': (9.73322, 52.37052)
}

In [58]:
# Merge plz_df and Coverage lake.
plz_shape_df = pd.merge(
    left=plz_shape_df, 
    right=df_coverage, 
    left_on='plz',
    right_on='postalCode',
    how='left'
)

plz_shape_df.head()

KeyError: 'postalCode'

In [None]:
# Define plot feature
plz_shape_df = plz_shape_df \
    .assign(first_dig_plz = lambda x: x['plz'].str.slice(start=4, stop=5))

In [None]:
#Plot
fig, ax = plt.subplots()



#plot plz shape
plz_shape_df.plot(
    ax=ax, 
    column='first_dig_plz', 
    categorical=True, 
    legend=True, 
    legend_kwds={'title':'CoVerage Map', 'loc':'lower right'},
    cmap='tab20',
    alpha=0.9
)

#plot cities
for c in top_cities.keys():
    ax.text(
        x=top_cities[c][0], 
        y=top_cities[c][1] + 0.08, 
        s=c, 
        fontsize=12,
        ha='center', 
    )

    ax.plot(
        top_cities[c][0], 
        top_cities[c][1], 
        marker='o',
        c='black', 
        alpha=0.5
    )

ax.set(
    title='CoVerage', 
    aspect=1.3,
    facecolor='white'
);