# Spatial join between hospitals of regions and population grid

Import necessary libraries and modules

In [None]:
from libadalina_core.readers import geopackage_to_dataframe
import pathlib
import os

Read the geopackages containing the regions, hospitals and population grid data from a local directory.

In [None]:
base_path = pathlib.Path(os.environ.get("SAMPLES_DIR", ""))

population = geopackage_to_dataframe(
    str(base_path / "population-north-italy" / "nord-italia.gpkg"),
    "census2021"
)[['T', 'geometry']]

hospitals = geopackage_to_dataframe(
    str(base_path / "healthcare" / "EU_healthcare.gpkg"),
    "EU"
)[["hospital_name", "geometry", "city", "cap_beds"]]

regions = geopackage_to_dataframe(
    str(base_path / "regions" / "NUTS_RG_20M_2024_4326.gpkg"),
    "NUTS_RG_20M_2024_4326.gpkg"
)[["LEVL_CODE", "NUTS_NAME", "CNTR_CODE", "geometry"]]

Import libadalina-core spatial operators for performing spatial joins and aggregations.

In [None]:
from libadalina_core.spatial_operators import spatial_join, JoinType, spatial_aggregation, AggregationType, \
    AggregationFunction, polygonize

Select the Italian province of Cremona to obtain the hospitals that are located in this province.

In [None]:
# select province of Cremona
filtered_regions = regions[
    (regions['LEVL_CODE'] == 3) &
    (regions['CNTR_CODE'] == "IT") &
    (regions['NUTS_NAME'].str.contains('Cremona', case=False))
]

Join the province dataframe with the hospitals one to get all the hospitals in Cremona

In [None]:
result = (spatial_join(filtered_regions, hospitals, join_type=JoinType.LEFT)
          # join operator renames the geometries adding suffixes _left and _right to avoid conflicts
          .withColumnRenamed('geometry_left', 'geometry_provinces')
          .withColumnRenamed('geometry_right', 'geometry'))
result.show(truncate=False)

Transform the points representing the hospitals on the map to circle-like shaped polygons with a radius of 1000 meters.

In [None]:
result = (polygonize(result, 1000)
          .withColumnRenamed('geometry', 'original_geometry')
          .withColumnRenamed('polygonized_geometry', 'geometry'))


Join the polygonized hospitals with the population grid to obtain the amount of population within 1000 meters from each hospital.

In [None]:
result = spatial_aggregation(spatial_join(result, population, join_type=JoinType.INNER)
          # join operator renames the geometries adding suffixes _left and _right to avoid conflicts
          .withColumnRenamed('geometry_left', 'geometry'),
                             aggregate_functions=[
                                 AggregationFunction("T", AggregationType.SUM, 'population',
                                                     proportional='geometry_right'),
                             ])
result.show(truncate=False)