# Get POIs from OSM Data via Overpass API

**Import libraries**

In [1]:
# Import necessary libraries
import overpy
import geopandas as gpd
from shapely.geometry import Point
from shapely.wkt import loads
import pandas as pd
from pathlib import Path
import os

In [2]:
# Change working directory to parent folder
os.chdir("..")  # Move up one directory level

# Check current working directory
print(os.getcwd())

c:\Users\Johan\PycharmProjects\heal_paper


**Define function to get POIs from Overpass API**

In [3]:
def download_pois(area_name, key, tags, output_name):
    """
    Download and process POIs from OpenStreetMap using Overpass API.
    
    Parameters:
    - area_name (str): Name of the area to query (e.g., "Heidelberg").
    - key (str): The key for the POIs (e.g., "amenity", "shop").
    - tags (list): List of tag values to query (e.g., ["hospital", "clinic"]).
    - output_name (str): Name of the output GeoJSON file (e.g., "pois_hd_hospitals_clinics").
    """
    # Initialize Overpass API
    api = overpy.Overpass()
    
    # Build query for the provided key and tags
    tag_queries = "\n".join(
        f'node["{key}"="{tag}"](area.searchArea);\nway["{key}"="{tag}"](area.searchArea);\nrelation["{key}"="{tag}"](area.searchArea);'
        for tag in tags
    )
    
    query = f"""
        [out:json];
        area["name"="{area_name}"]["admin_level"="6"]->.searchArea;
        (
          {tag_queries}
        );
        out center;
    """
    
    # Execute query
    result = api.query(query)
    
    # Prepare data for GeoDataFrame
    data = []
    
    for element in result.nodes + result.ways + result.relations:
        name = element.tags.get('name', 'N/A')
        category = element.tags.get(key, 'N/A')
        all_tags = dict(element.tags)  
        
        if isinstance(element, overpy.Node):
            lat, lon = element.lat, element.lon
            osm_id = f"node/{element.id}"
            element_type = 'node'
        elif isinstance(element, overpy.Way):
            lat, lon = element.center_lat, element.center_lon
            osm_id = f"way/{element.id}"
            element_type = 'way'
        else:  # Relation
            lat, lon = element.center_lat, element.center_lon
            osm_id = f"relation/{element.id}"
            element_type = 'relation'
        
        data.append({
            'osm_id': osm_id,
            'name': name,
            'category': category,
            'tags': all_tags,
            'geom_type': element_type,
            'geometry': Point(lon, lat)
        })
    
    # Create GeoDataFrame
    gdf = gpd.GeoDataFrame(data, crs="EPSG:4326", geometry='geometry')
    gdf['priority'] = gdf['geom_type'].map({'node': 0, 'way': 1, 'relation': 2})
    gdf = gdf.sort_values(['name', 'priority'])
    
    # Filter nearby duplicates
    def filter_nearby_duplicates(group):
        if len(group) == 1:
            return group
        
        # Convert to a projected CRS for accurate distance calculation
        group_projected = group.to_crs(epsg=25832)
        
        # Create a 100m buffer around the first point
        buffer = group_projected.iloc[0].geometry.buffer(100)
        
        # Select points that are outside this buffer
        outside_buffer = group_projected[~group_projected.geometry.within(buffer)]
        
        # Combine the first point with those outside the buffer
        result = pd.concat([group_projected.iloc[[0]], outside_buffer])
        
        # Convert back to original CRS
        return result.to_crs(gdf.crs)
    
    gdf = gdf.groupby('name', group_keys=False).apply(filter_nearby_duplicates)
    gdf = gdf.drop(columns=['priority'])
    
    # Write the GeoDataFrame to a GeoJSON file
    output_path = Path.cwd() / 'data' / f"{output_name}.geojson"
    output_path.parent.mkdir(parents=True, exist_ok=True)
    gdf.to_file(output_path, driver='GeoJSON')
    
    print(f"Data saved to {output_path}")
    
    # Print statistics
    print("\nStatistics:")
    stats = gdf['category'].value_counts()
    print(stats)
    print(f"\nTotal number of POIs: {len(gdf)}")

    return gdf


In [4]:
# Call function for supermarkets
gdf_supermarkets = download_pois("Heidelberg", "shop", ["supermarket"], "pois_hd_supermarket_osm")

# Call function for hospitals, clinics, and doctors, pharmacies and kindergartens
gdf_pois = download_pois("Heidelberg", "amenity", ["hospital", "clinic", "doctors", "pharmacy", "kindergarten"], "pois_hd_health_osm") 

Data saved to c:\Users\Johan\PycharmProjects\heal_paper\data\pois_hd_supermarket_osm.geojson

Statistics:
supermarket    54
Name: category, dtype: int64

Total number of POIs: 54
Data saved to c:\Users\Johan\PycharmProjects\heal_paper\data\pois_hd_health_osm.geojson

Statistics:
doctors         172
kindergarten    147
pharmacy         38
hospital         32
clinic           10
Name: category, dtype: int64

Total number of POIs: 399


**Access transport stops in Heidelberg via overpass API  (bus stops, train stations, tram stops)**

In [6]:
# Initialize Overpass API
api = overpy.Overpass()

# Build Query for Transport Stops in Heidelberg
result = api.query("""
    [out:json];
    area["name"="Heidelberg"]["admin_level"="6"]->.searchArea;
    (
      node["highway"="bus_stop"](area.searchArea);
      node["railway"="halt"](area.searchArea);
      node["railway"="station"](area.searchArea);
      node["railway"="tram_stop"](area.searchArea);
      way["highway"="bus_stop"](area.searchArea);
      way["railway"="halt"](area.searchArea);
      way["railway"="station"](area.searchArea);
      way["railway"="tram_stop"](area.searchArea);
      relation["highway"="bus_stop"](area.searchArea);
      relation["railway"="halt"](area.searchArea);
      relation["railway"="station"](area.searchArea);
      relation["railway"="tram_stop"](area.searchArea);
    );
    out center;
""")

# Prepare data for GeoDataFrame
data = []

for element in result.nodes + result.ways + result.relations:
    name = element.tags.get('name', 'N/A')
    category = (
        element.tags.get('highway', element.tags.get('railway', 'N/A'))
    )
    all_tags = dict(element.tags)
    
    if isinstance(element, overpy.Node):
        lat, lon = element.lat, element.lon
        osm_id = f"node/{element.id}"
        element_type = 'node'
    elif isinstance(element, overpy.Way):
        lat, lon = element.center_lat, element.center_lon
        osm_id = f"way/{element.id}"
        element_type = 'way'
    else:  # Relation
        lat, lon = element.center_lat, element.center_lon
        osm_id = f"relation/{element.id}"
        element_type = 'relation'
    
    data.append({
        'osm_id': osm_id,
        'name': name,
        'category': 'transport',
        'tags': all_tags,
        'geom_type': element_type,
        'geometry': Point(lon, lat)
    })

# Create GeoDataFrame
gdf = gpd.GeoDataFrame(data, crs="EPSG:4326", geometry='geometry')
gdf['priority'] = gdf['geom_type'].map({'node': 0, 'way': 1, 'relation': 2})
gdf = gdf.sort_values(['name', 'priority'])

# Function to filter nearby duplicates using GeoPandas
def filter_nearby_duplicates(group):
    if len(group) == 1:
        return group
    
    # Convert to a projected CRS for accurate distance calculation
    group_projected = group.to_crs(epsg=25832)
    
    # Create a 100m buffer around the first point
    buffer = group_projected.iloc[0].geometry.buffer(100)
    
    # Select points that are outside this buffer
    outside_buffer = group_projected[~group_projected.geometry.within(buffer)]
    
    # Combine the first point with those outside the buffer
    result = pd.concat([group_projected.iloc[[0]], outside_buffer])
    
    # Convert back to original CRS
    return result.to_crs(gdf.crs)

# Apply the filter
gdf = gdf.groupby('name', group_keys=False).apply(filter_nearby_duplicates)

# Drop the temporary 'priority' column
gdf = gdf.drop(columns=['priority'])

# Write the GeoDataFrame to a GeoJSON file
output_path = Path.cwd() / 'data' / 'pois_hd_transport_osm.geojson'
output_path.parent.mkdir(parents=True, exist_ok=True)
gdf.to_file(output_path, driver='GeoJSON')

print(f"Data saved to {output_path}")

# Print statistics
print("\nStatistics:")
stats = gdf['category'].value_counts()
print(stats)
print(f"\nTotal number of POIs: {len(gdf)}")

Data saved to c:\Users\Johan\PycharmProjects\heal_paper\data\pois_hd_transport_osm.geojson

Statistics:
transport    356
Name: category, dtype: int64

Total number of POIs: 356


**Access senior living facilites in Heidelberg via overpass API**

In [7]:
# Build query to retrieve data for senior living facilities in Heidelberg from OpenStreetMap
api = overpy.Overpass()
result = api.query("""
    [out:json];
    area["name"="Heidelberg"]["admin_level"="6"]->.searchArea;
    (
      node["amenity"="retirement_home"](area.searchArea);
      way["amenity"="retirement_home"](area.searchArea);
      relation["amenity"="retirement_home"](area.searchArea);
      node["amenity"="nursing_home"](area.searchArea);
      way["amenity"="nursing_home"](area.searchArea);
      relation["amenity"="nursing_home"](area.searchArea);
      node["amenity"="social_facility"]["social_facility"="nursing_home"](area.searchArea);
      way["amenity"="social_facility"]["social_facility"="nursing_home"](area.searchArea);
      relation["amenity"="social_facility"]["social_facility"="nursing_home"](area.searchArea);
      node["amenity"="social_facility"]["social_facility"="assisted_living"](area.searchArea);
      way["amenity"="social_facility"]["social_facility"="assisted_living"](area.searchArea);
      relation["amenity"="social_facility"]["social_facility"="assisted_living"](area.searchArea);
    );
    out center;
""")

# Prepare data for GeoDataFrame
data = []

for element in result.nodes + result.ways + result.relations:
    name = element.tags.get('name', 'N/A')
    amenity_type = element.tags.get('amenity', 'N/A')
    social_facility_type = element.tags.get('social_facility', 'N/A')
    all_tags = dict(element.tags)
    
    if isinstance(element, overpy.Node):
        lat, lon = element.lat, element.lon
        geom_type = 'node'
    elif isinstance(element, overpy.Way):
        lat, lon = element.center_lat, element.center_lon
        geom_type = 'way'
    else:  # Relation
        lat, lon = element.center_lat, element.center_lon
        geom_type = 'relation'
    
    data.append({
        'osm_id': element.id,
        'name': name,
        'category': 'senior_facility',
        'tags': all_tags, 
        'geom_type': geom_type,
        'geometry': Point(lon, lat)
    })

# Create GeoDataFrame
gdf = gpd.GeoDataFrame(data, crs="EPSG:4326")

# Write the GeoDataFrame to a GeoJSON file
output_path = Path.cwd() / 'data' / 'pois_hd_senior_facility_osm.geojson'
output_path.parent.mkdir(parents=True, exist_ok=True)
gdf.to_file(output_path, driver='GeoJSON')

print(f"Data saved to {output_path}")

# Print statistics
print("\nStatistics:")
stats = gdf['category'].value_counts()
print(stats)
print(f"\nTotal number of POIs: {len(gdf)}")

Data saved to c:\Users\Johan\PycharmProjects\heal_paper\data\pois_hd_senior_facility_osm.geojson

Statistics:
senior_facility    14
Name: category, dtype: int64

Total number of POIs: 14


**Combine added senior living facilities from other sources to the list of senior living facilities**

Additional senior living facilities are recovered from [Office for Social Affairs and Seniors of the City of Heidelberg]([https://www.heidelberg.de/site/Heidelberg2021/get/documents_E261654398/heidelberg/Objektdatenbank/50/PDF/50_pdf_wegweiser_senioren_heidelberg.pdf), the [healthcare and career portal kliniken.de](https://www.kliniken.de/altenheim/deutschland/ort/heidelberg), the database of the [federal representation of interests for elderly and care-dependent people](https://www.biva.de/pflege-adressen/stationaer/baden-wuerttemberg/stadt-heidelberg/heidelberg/) and a Google Maps search using the tags ’Seniorenheim’, ’Altenheim’ and ’Pflegeheim’.

The additional data is found in the csv file seniorenheime_hd.csv in the data folder of the repository.


In [8]:
# Path to the CSV file
csv_path = Path.cwd() / 'data' / 'seniorenheime_hd.csv'

# Load CSV data
csv_df = pd.read_csv(csv_path)

# Convert WKT column to geometry
csv_df['geometry'] = csv_df['WKT'].apply(loads)

# Create a GeoDataFrame
csv_gdf = gpd.GeoDataFrame(csv_df, geometry='geometry', crs="EPSG:4326")

# Rename columns to match the OSM GeoDataFrame schema
csv_gdf = csv_gdf.rename(columns={
    'Name': 'name',
    'Beschreibung': 'tags'
})
csv_gdf['category'] = 'senior_facility'
csv_gdf['osm_id'] = None  # No OSM ID for CSV entries
csv_gdf['geom_type'] = 'node'  # Assuming all CSV entries are point geometries

# Load the existing GeoJSON data
geojson_path = Path.cwd() / 'data' / 'pois_hd_senior_facility_osm.geojson'
osm_gdf = gpd.read_file(geojson_path)

# Combine both GeoDataFrames
combined_gdf = gpd.GeoDataFrame(pd.concat([osm_gdf, csv_gdf], ignore_index=True), crs="EPSG:4326")

# Drop WKT column
combined_gdf = combined_gdf.drop(columns=['WKT'])

# Save the combined GeoDataFrame to a GeoJSON file
output_path = Path.cwd() / 'data' / 'pois_hd_senior_facility_combined.geojson'
combined_gdf.to_file(output_path, driver='GeoJSON')

print(f"Combined data saved to {output_path}")

Combined data saved to c:\Users\Johan\PycharmProjects\heal_paper\data\pois_hd_senior_facility_combined.geojson


**Combine all POIs (except transport) in one dataframe**

In [9]:
def combine_geojson_files(input_files, output_file):
    """
    Combine multiple GeoJSON files into one GeoJSON file.
    
    Parameters:
    - input_files (list of str): List of paths to input GeoJSON files.
    - output_file (str): Path to the output GeoJSON file.
    """
    combined_gdf = gpd.GeoDataFrame()

    for file in input_files:
        gdf = gpd.read_file(file)
        combined_gdf = pd.concat([combined_gdf, gdf], ignore_index=True)
    
    # Save combined GeoDataFrame to a new GeoJSON file
    combined_gdf.to_file(output_file, driver="GeoJSON")
    print(f"Combined GeoJSON saved to: {output_file}")

    # Print statistics
    print("\nStatistics:")
    stats = combined_gdf['category'].value_counts()
    print(stats)
    print(f"\nTotal number of POIs: {len(combined_gdf)}")
    

# List of input GeoJSON files
input_files = [
    "data/pois_hd_supermarket_osm.geojson",
    "data/pois_hd_health_osm.geojson",
    "data/pois_hd_senior_facility_combined.geojson"
]

# Path for the combined GeoJSON file
output_file = "data/pois_hd_osm.geojson"

# Combine files
combine_geojson_files(input_files, output_file)

Combined GeoJSON saved to: data/pois_hd_osm.geojson

Statistics:
doctors            172
kindergarten       147
supermarket         54
pharmacy            38
hospital            32
senior_facility     31
clinic              10
Name: category, dtype: int64

Total number of POIs: 484
