In [1]:
import geopandas as gpd
import pandas as pd

In [2]:
# load source dataset containing point coordinates
df = gpd.read_file('raw/apothekenstandorte.geojson')

# we want to find the respective plr for each point coordinate, so we need the plr polygons
plr = gpd.read_file('../plr/plr_only.geojson')

# plr without geometry (for merging later on)
plr_no_geo = plr.drop(columns='geometry').sort_values(by='PLR_ID')

# check for same crs (coordinate reference system)
df.crs == plr.crs

True

In [3]:
# Spatial join assigns each point to its respective plr
df_with_plr = gpd.sjoin(df, plr, how='inner', predicate='within')

print("df shape:", df.shape, "\ndf_with_plr shape:", df_with_plr.shape)

df shape: (732, 7) 
df_with_plr shape: (732, 10)


In [4]:
df_with_plr.head()

Unnamed: 0,gc_id,aponame,gc_strasse,gc_haus,gc_plz,gc_ortsteil,geometry,index_right,PLR_ID,PLR_NAME
0,DEBE00YY2DG0003W,Apotheke im Schultheiss Quartier,Stromstraße,14,10551,Moabit,MULTIPOINT (387650.663 5820963.508),514,1200625,Lübecker Straße
50,DEBE01YYK0002fB2,Alte Roland-Apotheke,Turmstraße,15,10559,Moabit,MULTIPOINT (388078.536 5820876.083),514,1200625,Lübecker Straße
1,DEBE00YY2dR001ZJ,Neptun Apotheke im S-Bahnhof Alexanderplatz,Dircksenstraße,2,10179,Mitte,MULTIPOINT (392585.920 5819674.175),528,1100311,Karl-Marx-Allee
28,DEBE01YYK0000VBL,Alexa-Apotheke,Grunerstraße,20,10179,Mitte,MULTIPOINT (392470.486 5820057.969),528,1100311,Karl-Marx-Allee
2,DEBE00YY2fJ0006W,Arena-Apotheke,Tamara-Danz-Straße,11,10243,Friedrichshain,MULTIPOINT (394487.479 5818426.037),231,2400625,Wriezener Bahnhof


In [5]:
df_with_plr.columns

Index(['gc_id', 'aponame', 'gc_strasse', 'gc_haus', 'gc_plz', 'gc_ortsteil',
       'geometry', 'index_right', 'PLR_ID', 'PLR_NAME'],
      dtype='object')

In [6]:
# only keep summary relevant features, get rid of geometry -> regular pandas dataframe
feature_list = ['PLR_ID', 'aponame']
df_with_plr = df_with_plr[feature_list]
df_with_plr.head()

Unnamed: 0,PLR_ID,aponame
0,1200625,Apotheke im Schultheiss Quartier
50,1200625,Alte Roland-Apotheke
1,1100311,Neptun Apotheke im S-Bahnhof Alexanderplatz
28,1100311,Alexa-Apotheke
2,2400625,Arena-Apotheke


In [7]:
# Summarize by groupby
gb = df_with_plr.groupby('PLR_ID')

count = gb.size().reset_index(name='pharmacy_count')
names = gb['aponame'].apply(list).reset_index(name='pharmacy_names')

In [8]:
# Merge
result = plr_no_geo \
    .merge(count, on='PLR_ID', how='outer') \
    .merge(names, on='PLR_ID', how='outer')

result.shape

(542, 4)

In [9]:
result.head()

Unnamed: 0,PLR_ID,PLR_NAME,pharmacy_count,pharmacy_names
0,1100101,Stülerstraße,,
1,1100102,Großer Tiergarten,1.0,[Pluspunkt Apotheke Potsdamer Platz]
2,1100103,Lützowstraße,,
3,1100104,Körnerstraße,2.0,"[Humboldt-Apotheke, Potsdamer-Apotheke]"
4,1100205,Wilhelmstraße,2.0,"[Apotheke am Leipziger Platz, Apotheke im Regi..."


In [10]:
result.to_csv("pharmacies_per_plr.csv", index=False)