In [1]:
import geopandas as gpd
import pandas as pd

In [2]:
# load source dataset containing point coordinates
df = gpd.read_file('raw/pflegeeinrichtungen.geojson')

# we want to find the respective plr for each point coordinate, so we need the plr polygons
plr = gpd.read_file('../plr/plr_only.geojson')

# plr without geometry (for merging later on)
plr_no_geo = plr.drop(columns='geometry').sort_values(by='PLR_ID')

# check for same crs (coordinate reference system)
df.crs == plr.crs

True

In [3]:
# Spatial join assigns each point to its respective plr
df_with_plr = gpd.sjoin(df, plr, how='inner', predicate='within')

print("df shape:", df.shape, "\ndf_with_plr shape:", df_with_plr.shape)
print(f"Capacity: {df['platzzahl'].sum()} in {df.shape[0]} nursing homes")

df shape: (274, 15) 
df_with_plr shape: (274, 18)
Capacity: 31154 in 274 nursing homes


In [4]:
df_with_plr.head()

Unnamed: 0,ik_nummer,einrichtung_name,gc_strasse,gc_haus,gc_plz,gc_ortsteil,platzzahl,traegername,traegerstrasse,traegerplz,traegerort,traegerverbandkurz,gc_xwert,gc_ywert,geometry,index_right,PLR_ID,PLR_NAME
0,511100229,Verbund Lichtenberger Seniorenheime Seniorenhe...,Judith-Auer-Straße,8,10369,Lichtenberg,122,Bezirksamt Lichtenberg von Berlin - Verbund Li...,Möllendorffstr. 6,10367,Berlin,n.o.-komm.,396096.8429,5821513.122,POINT (396096.843 5821513.122),541,11300616,Hohenschönhauser Straße
1,511100252,Haus Abendsonne,Volkradstraße,28,10319,Friedrichsfelde,113,Altenzentrum 'Erfülltes Leben' gGmbH,Volkradstr. 28,10319,Berlin,DPW,398648.8045,5817706.864,POINT (398648.805 5817706.864),315,11401136,Volkradstraße
2,511100309,Pflegewohnzentrum Wuhlepark,Bansiner Straße,21,12619,Kaulsdorf,296,Pflegewohnzentrum Kaulsdorf-Nord gGmbH,Tangermünder Straße 30,12627,Berlin,DPW,403984.4856,5819609.088,POINT (403984.486 5819609.088),160,10200629,Teterower Ring
3,511100321,Seniorenzentrum Werner-Bockelmann-Haus,Bundesallee,48,10715,Wilmersdorf,110,Werner-Bockelmann-Haus gGmbH Senioren-Zentrum,Bundesallee 49-50,10715,Berlin,DPW,386646.2325,5816277.863,POINT (386646.232 5816277.863),220,4501150,Wilhelmsaue
4,511100376,Sozialpsychiatrisches Pflegezentrum 'Am Körner...,Rübelandstraße,13,12051,Neukölln,62,Alloheim Senioren-Residenzen Fünfte SE & Co. KG,Am Seestern 1,40547,Düsseldorf,bpa,393966.0015,5814539.218,POINT (393966.002 5814539.218),300,8100208,Körnerpark


In [5]:
df_with_plr.columns

Index(['ik_nummer', 'einrichtung_name', 'gc_strasse', 'gc_haus', 'gc_plz',
       'gc_ortsteil', 'platzzahl', 'traegername', 'traegerstrasse',
       'traegerplz', 'traegerort', 'traegerverbandkurz', 'gc_xwert',
       'gc_ywert', 'geometry', 'index_right', 'PLR_ID', 'PLR_NAME'],
      dtype='object')

In [6]:
# only keep summary relevant features, get rid of geometry -> regular pandas dataframe
feature_list = ['PLR_ID', "platzzahl", 'einrichtung_name', 'traegerverbandkurz']
df_with_plr = df_with_plr[feature_list]
df_with_plr.head()

Unnamed: 0,PLR_ID,platzzahl,einrichtung_name,traegerverbandkurz
0,11300616,122,Verbund Lichtenberger Seniorenheime Seniorenhe...,n.o.-komm.
1,11401136,113,Haus Abendsonne,DPW
2,10200629,296,Pflegewohnzentrum Wuhlepark,DPW
3,4501150,110,Seniorenzentrum Werner-Bockelmann-Haus,DPW
4,8100208,62,Sozialpsychiatrisches Pflegezentrum 'Am Körner...,bpa


In [7]:
# Summarize by groupby
gb = df_with_plr.groupby('PLR_ID')

count = gb.size().reset_index(name='pe_count')
capacity = gb['platzzahl'].sum().reset_index(name='pe_capacity')
names = gb['einrichtung_name'].apply(list).reset_index(name='pe_names')
traeger = gb['traegerverbandkurz'].value_counts().unstack().reset_index().fillna(0)

In [8]:
# Merge
result_simple = plr_no_geo \
    .merge(count, on='PLR_ID', how='outer') \
    .merge(capacity, on='PLR_ID', how='outer') \
    .merge(names, on='PLR_ID', how='outer')

result_verbose = plr_no_geo \
    .merge(count, on='PLR_ID', how='outer') \
    .merge(capacity, on='PLR_ID', how='outer') \
    .merge(names, on='PLR_ID', how='outer') \
    .merge(traeger, on='PLR_ID', how='outer')

result_simple.shape

(542, 5)

In [9]:
result_simple.head()

Unnamed: 0,PLR_ID,PLR_NAME,pe_count,pe_capacity,pe_names
0,1100101,Stülerstraße,,,
1,1100102,Großer Tiergarten,,,
2,1100103,Lützowstraße,2.0,285.0,"[Pro Seniore Krankenheim Genthiner Straße, Pfl..."
3,1100104,Körnerstraße,,,
4,1100205,Wilhelmstraße,,,


In [10]:
result_verbose.head()

Unnamed: 0,PLR_ID,PLR_NAME,pe_count,pe_capacity,pe_names,APH,AVG,AWO,B.A.H.,BKG,Caritas,DPW,DRK,Diakonie,EVAP,VDAB,VPK,bpa,n.o.,n.o.-komm.
0,1100101,Stülerstraße,,,,,,,,,,,,,,,,,,
1,1100102,Großer Tiergarten,,,,,,,,,,,,,,,,,,
2,1100103,Lützowstraße,2.0,285.0,"[Pro Seniore Krankenheim Genthiner Straße, Pfl...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
3,1100104,Körnerstraße,,,,,,,,,,,,,,,,,,
4,1100205,Wilhelmstraße,,,,,,,,,,,,,,,,,,


In [12]:
result_simple.to_csv("pflegeeinrichtungen_per_plr_simple.csv", index=False)
result_simple.to_csv("pflegeeinrichtungen_per_plr_verbose.csv", index=False)