# Exposure database

This notebook provides a systematic approach for analyzing and visualizing infrastructure networks across different European countries. It iterates through a list of countries and network types, retrieves the relevant geographic data, processes it to handle missing values, and generates visualizations to provide insights into the infrastructure distribution and characteristics within each country. Raw geospatial data is sourced from multiple databases, including OSM. Integration with OSM data is achieved using the OSMnx Python library, which simplifies downloading, modeling, analyzing, and visualizing OSM data. The integrated data is associated with EU codes representing all European countries (e.g., FR for France, NL for Netherlands). 

In [1]:
# HIDE CODE
# Import necessary libraries
import geopandas as gpd
import os
import re
import shapely
from shapely.geometry import (
    MultiPolygon,
    GeometryCollection,
)
import pandas as pd
import numpy as np
import functools,operator
from pathlib import Path
from tqdm import tqdm
from matplotlib import pyplot as plt

In [2]:
# Define the folder where the database will be stored
# TODO: We could set a .miraca folder as a default for all packages
Miraca_Exposure_Database_path = Path("~/miraca_exposure_database").expanduser()

In [3]:
# Define the folder where the Geofabrik data is available
data_path = r"C:\Users\Paraskevi Tsoumani\OneDrive - Vrije Universiteit Amsterdam\Documenten - MIRACA\General\Deliverables and Milestones\Milestone_Intermediate version of harmonised exposure database\Data"

In [4]:
TENT_roads_path = data_path / "europe_road_edges_TENT.parquet"
TENT_rail_path = data_path / "europe_railways_edges_TENT.parquet"
LAU_path  = data_path / "LAU_RG_01M_2024_3035.geojson"
NUTS_path = data_path / "NUTS_RG_01M_2024_3035.geojson"

In [5]:
DICT_CIS_OSM = {
    "Roads": {
        "osm_keys": ["highway", "name", "maxspeed", "lanes", "surface","bridge","ref"],
        "osm_query": {
            "highway": [
                "motorway",
                "motorway_link",
                "trunk",
                "trunk_link",
                "primary",
                "primary_link",
                "secondary",
                "secondary_link",
                "tertiary",
                "tertiary_link",
                "residential",
                "road",
                "unclassified",
                "track",
            ]
        },
    },
    "Roadway": {
        "osm_keys": ["highway", "name", "maxspeed", "lanes", "surface"],
        "osm_query": {
            "highway": [
                "primary",
                "primary_link",
                "secondary",
                "secondary_link",
                "tertiary",
                "tertiary_link",
                "trunk",
                "trunk_link",
                "motorway",
                "motorway_link",
            ]
        },
    },
    "Railway": {
        "osm_keys": ["railway", "name", "gauge", "electrified", "voltage"],
        "osm_query": {"railway": ["rail", "narrow_gauge"]},
    },
    "Airports": {
        "osm_keys": ["aeroway", "name", ""],
        "osm_query": {"aeroway": ["aerodrome", "apron", "terminal", "runway"]},
    },
    "Telecommunication": {
        "osm_keys": ["man_made", "tower:type", "name"],
        "osm_query": {
            "man_made": ["mast", "communications_tower"],
            "tower:type": ["communication"],
        },
    },
    "Water_supply": {
        "osm_keys": ["man_made", "name"],
        "osm_query": {
            "man_made": [
                "water_works",
                "water_well",
                "water_tower",
                "reservoir_covered",
                "storage_tank",
            ]
        },
    },
    "Waste_solid": {
        "osm_keys": ["amenity", "name"],
        "osm_query": {"amenity": ["waste_transfer_station"]},
    },
    "Waste_water": {
        "osm_keys": ["man_made", "name"],
        "osm_query": {"man_made": ["wastewater_plant"]},
    },
    "Education": {
        "osm_keys": ["amenity", "building", "name"],
        "osm_query": {
            "building": ["school", "kindergarten", "college", "university", "library"],
            "amenity": ["school", "kindergarten", "college", "university", "library"],
        },
    },
    "Healthcare": {
        "osm_keys": ["amenity", "building", "healthcare", "name"],
        "osm_query": {
            "amenity": ["hospital", "clinic", "doctors", "dentist", "pharmacy"],
            "building": ["hospital", "clinic"],
            "healthcare": [
                "pharmacy",
                "dentist",
                "physiotherapist",
                "alternative",
                "laboratory",
                "optometrist",
                "rehabilitation",
                "blood_donation",
                "birthing_center",
            ],
        },
    },
    "Power": {
        "osm_keys": ["power", "voltage", "utility", "name", "source"],
        "osm_query": {
            "power": [
                "line",
                "cable",
                "minor_line",
                "plant",
                "generator",
                "substation",
                "transformer",
                "pole",
                "portal",
                "tower",
                "terminal",
                "switch",
                "catenary_mast",
            ]
        },
    },
    "Gas": {
        "osm_keys": ["man_made", "pipeline", "utility", "name", "substance", "content"],
        "osm_query": {
            "man_made": ["pipeline", "storage_tank"],
            "pipeline": ["substation"],
            "utility": ["gas"],
            "substance": ["gas"],
            "content": ["gas"],
        },
    },
    "Food": {
        "osm_keys": ["amenity", "building", "name"],
        "osm_query": {
            "amenity": ["restaurant", "fast_food", "cafe", "pub", "bar"],
            "building": ["restaurant", "fast_food", "cafe", "pub", "bar"],
        },
    },
    "Oil": {
        "osm_keys": ["pipeline", "man_made", "amenity", "name", "substance"],
        "osm_query": {
            "pipeline": ["substation"],
            "man_made": ["pipeline", "petroleum_well", "oil_refinery"],
            "amenity": ["fuel"],
            "substance": ["oil"],
        },
    },
    "Buildings": {
        "osm_keys": ["building", "amenity", "name"],
        "osm_query": {
            "building": [
                "yes",
                "house",
                "residential",
                "detached",
                "hut",
                "industrial",
                "shed",
                "apartments",
            ]
        },
    },
}

OBJECTS_TO_KEEP = {
    "Roads": ["motorway", "motorway_link", "trunk", "trunk_link", "primary", "primary_link", "secondary", "secondary_link", "tertiary", "tertiary_link", "residential", "road", "unclassified", "track"],
    "Roadway": ["primary", "primary_link", "secondary", "secondary_link", "tertiary", "tertiary_link", "trunk", "trunk_link", "motorway", "motorway_link"],
    "Railway": ["rail", "narrow_gauge"],
    "Airports": ["aerodrome", "apron", "terminal", "runway"],
    "Telecommunication": ["mast", "communications_tower", "communication"],
    "Water_supply": ["water_works", "water_well", "water_tower", "reservoir_covered", "storage_tank"],
    "Waste_solid": ["waste_transfer_station"],
    "Waste_water": ["wastewater_plant"],
    "Education": ["school", "kindergarten", "college", "university", "library"],
    "Healthcare": ["hospital", "clinic", "doctors", "dentist", "pharmacy", "physiotherapist", "alternative", "laboratory", "optometrist", "rehabilitation", "blood_donation", "birthing_center"],
    "Power": ["line", "cable", "minor_line", "plant", "generator", "substation", "transformer", "pole", "portal", "tower", "terminal", "switch", "catenary_mast"],
    "Gas": ["pipeline", "storage_tank", "substation", "gas"],
    "Food": ["restaurant", "fast_food", "cafe", "pub", "bar"],
    "Oil": ["substation", "pipeline", "petroleum_well", "oil_refinery", "fuel", "oil"],
    "Buildings": ["yes", "house", "residential", "detached", "hut", "industrial", "shed", "apartments"],
}

In [6]:
def _remove_contained_assets(features):
    """
    Remove assets whose geometries are fully contained within others.

    Args:
        features (gpd.GeoDataFrame): GeoDataFrame with point and polygon features.

    Returns:
        gpd.GeoDataFrame: Cleaned GeoDataFrame with unique geometries.
    """
    features = _remove_contained_polys(
        _remove_contained_points(features)
    )  # remove points and polygons within a (larger) polygon

    return features
def _remove_contained_points(gdf_p_mp):
    """
    Remove point features contained within any polygon in the dataset.

    Args:
        gdf_p_mp (gpd.GeoDataFrame): GeoDataFrame with point and polygon geometries.

    Returns:
        gpd.GeoDataFrame: GeoDataFrame without contained points.
    """
    gdf_p_mp = gdf_p_mp.reset_index(drop=True)

    ind_dupl = np.unique(
        gpd.sjoin(
            gdf_p_mp[gdf_p_mp.geometry.type == "Point"],
            gdf_p_mp[
                (gdf_p_mp.geometry.type == "MultiPolygon")
                | (gdf_p_mp.geometry.type == "Polygon")
            ],
            predicate="within",
        ).index
    )

    return gdf_p_mp.drop(index=ind_dupl).reset_index(drop=True)


def _remove_contained_polys(gdf):
    """
    From a GeoDataFrame containing (multi-)polygons (and potentially other
    geometries), remove those polygon entries that are already fully
    contained in another polygon entries. Removes smaller polygons within
    polygons and full duplicates, but leaves contained points untouched
    (see remove_contained_points() for this).

    Resets the index of the dataframe.

    Args:
        gdf (gpd.GeoDataFrame): GeoDataFrame with polygon geometries.

    Returns:
        gpd.GeoDataFrame: GeoDataFrame with outermost geometries.
    """

    gdf = gdf.reset_index(drop=True)

    contained = gpd.sjoin(
        gdf[(gdf.geometry.type == "MultiPolygon") | (gdf.geometry.type == "Polygon")],
        gdf[(gdf.geometry.type == "MultiPolygon") | (gdf.geometry.type == "Polygon")],
        predicate="contains",
    )

    subset = contained[contained.index != contained.index_right]
    to_drop = set(subset.index_right) - set(subset.index)

    return gdf.drop(index=to_drop).reset_index(drop=True)


def extract_first_geom(geom):
    """
    Extract the first geometry from a GeometryCollection.

    Args:
        geom (shapely.Geometry): Shapely geometry object.

    Returns:
        shapely.Geometry: First geometry or unchanged object.
    """
    if isinstance(geom, GeometryCollection) and len(geom.geoms) > 0:
        return geom.geoms[0]

    return geom

def _extract_value(text, key):
    """
    Parse the value of a specific key from a semi-structured OSM tag string.

    Args:
        text (str): Raw OSM `other_tags` string.
        key (str): Key to extract value for.

    Returns:
        str or None: Extracted value or None.
    """
    pattern = rf'"{key}"=>"([^"]+)"'
    try:
        match = re.search(pattern, text)
        if match:
            return match.group(1)
        return None
    except:
        return None

def extract(osm_path, geom_type, osm_keys, osm_query):
    """
    Extract specific infrastructure features from a .pbf file using OSM keys/values.

    Args:
        osm_path (str or Path): Path to .osm.pbf file.
        geom_type (str): One of 'points', 'lines', 'multipolygons'.
        osm_keys (list): Keys to extract from OSM file.
        osm_query (dict): Key-value mapping used to filter.

    Returns:
        gpd.GeoDataFrame: Extracted GeoDataFrame with `object_type` field.
    """
    features = gpd.read_file(osm_path, layer=geom_type, engine="pyogrio")

    if "osm_way_id" in features.columns:
        features["osm_id"] = features["osm_id"].fillna(features["osm_way_id"])

    for key in osm_keys:
        if key not in features.columns:
            features[key] = features["other_tags"].apply(
                lambda x: _extract_value(x, key)
            )

    # build query
    collect_indices = []
    for query_key in osm_query.keys():
        collect_indices.append(
            features[features[query_key].isin(osm_query[query_key])].index.values
        )

    # get complete list
    collect_indices = functools.reduce(operator.iconcat, collect_indices, [])

    # remove duplicates from list
    collect_indices = list(set(collect_indices))
    features = features.iloc[collect_indices]

    features = features[["osm_id", "geometry"] + osm_keys]

    features.rename(columns={osm_keys[0]: "object_type"}, inplace=True)

    return features

def read_osm_data(osm_path, asset_type):
    """
    Load and extract OSM features for a given critical infrastructure type.

    Args:
        osm_path (str or Path): Path to .osm.pbf file.
        asset_type (str): One of the keys in DICT_CIS_OSM.

    Returns:
        gpd.GeoDataFrame: Cleaned and validated exposure GeoDataFrame.

    Raises:
        ImportWarning: If asset_type is not supported.
    """
    # features consisting in points and multipolygon results:
    if asset_type in ["Healthcare", "Education", "Food", "Buildings"]:
        gdf = pd.concat(
            [
                extract(
                    osm_path,
                    "points",
                    DICT_CIS_OSM[asset_type]["osm_keys"],
                    DICT_CIS_OSM[asset_type]["osm_query"],
                ),
                extract(
                    osm_path,
                    "multipolygons",
                    DICT_CIS_OSM[asset_type]["osm_keys"],
                    DICT_CIS_OSM[asset_type]["osm_query"],
                ),
            ]
        )

    # features consisting in points, multipolygons and lines:
    elif asset_type in ["Gas", "Oil", "Water", "Power"]:
        gdf = pd.concat(
            [
                extract(
                    osm_path,
                    "points",
                    DICT_CIS_OSM[asset_type]["osm_keys"],
                    DICT_CIS_OSM[asset_type]["osm_query"],
                ),
                extract(
                    osm_path,
                    "multipolygons",
                    DICT_CIS_OSM[asset_type]["osm_keys"],
                    DICT_CIS_OSM[asset_type]["osm_query"],
                ),
                extract(
                    osm_path,
                    "lines",
                    DICT_CIS_OSM[asset_type]["osm_keys"],
                    DICT_CIS_OSM[asset_type]["osm_query"],
                ),
            ]
        )

    # features consisting in multipolygons and lines:
    elif asset_type in ["Airports"]:
        gdf = pd.concat(
            [
                extract(
                    osm_path,
                    "multipolygons",
                    DICT_CIS_OSM[asset_type]["osm_keys"],
                    DICT_CIS_OSM[asset_type]["osm_query"],
                ),
                extract(
                    osm_path,
                    "lines",
                    DICT_CIS_OSM[asset_type]["osm_keys"],
                    DICT_CIS_OSM[asset_type]["osm_query"],
                ),
            ]
        )

    # features consisting in multiple datattypes, but only lines needed:
    elif asset_type in ["Railway", "Roads", "Roadway"]:
        gdf = pd.concat(
            [
                extract(
                    osm_path,
                    "lines",
                    DICT_CIS_OSM[asset_type]["osm_keys"],
                    DICT_CIS_OSM[asset_type]["osm_query"],
                )
            ]
        )

    # features consisting in all data types, but only points and multipolygon needed:
    elif asset_type in [
        "Telecommunication",
        "wastewater",
        "waste_solid",
        "waste_water",
        "water_supply",
    ]:
        gdf = pd.concat(
            [
                extract(
                    osm_path,
                    "points",
                    DICT_CIS_OSM[asset_type]["osm_keys"],
                    DICT_CIS_OSM[asset_type]["osm_query"],
                ),
                extract(
                    osm_path,
                    "multipolygons",
                    DICT_CIS_OSM[asset_type]["osm_keys"],
                    DICT_CIS_OSM[asset_type]["osm_query"],
                ),
            ]
        )

    else:
        raise ImportWarning("feature not in DICT_CIS_OSM. Returning empty gdf")

    # make all geometries valid
    gdf["geometry"] = shapely.make_valid(gdf["geometry"])
    gdf = gdf[gdf.geometry.is_valid]

    # only keep assets with unique geometries
    features = _remove_contained_assets(gdf)

    # remove potential geometrycollections to avoid errors later on
    features["geometry"] = features["geometry"].apply(extract_first_geom)

    # remove features that are not in the asset_type list
    unique_objects_in_asset_type = OBJECTS_TO_KEEP[asset_type]

    return features[features["object_type"].isin(unique_objects_in_asset_type)]

In [7]:
def convert_mixed_geometries_to_polygons(features, asset_type):
    """
    Convert point and linestring geometries to polygons for asset types with mixed geometry types.
    Only converts geometries for object types that have at least some polygon representations.
    Respects specific object types that should maintain their original geometry.
    
    Args:
        features (gpd.GeoDataFrame): Infrastructure features
        asset_type (str): Type of infrastructure asset
        
    Returns:
        gpd.GeoDataFrame: Features with consistent polygon geometries where appropriate
    """
    # Only apply for certain asset types
    if asset_type not in ['Education', 'Healthcare', 'Telecommunication','Power','Gas', 'Oil']:
        return features
    
    # Define object types that should NOT be converted for each asset type
    preserve_geometry = {
        'Power': {
            'line': 'LineString',      # Should always remain as lines
            'tower': 'Point',          # Should always remain as points
            'pole': 'Point',           # Should always remain as points
            'catenary_mast': 'Point',  # Should always remain as points
            'cable': 'LineString',     # Should always remain as lines
            'minor_line': 'LineString' # Should always remain as lines
        },
        'Gas': {
            'pipeline': 'LineString'   # Should always remain as lines
        },
        'Oil': {
            'pipeline': 'LineString'   # Should always remain as lines
        },
        'Telecommunication': {    
            'mast': 'Point',           # Should always remain as points
            'communications_tower': 'Point'  # Should always remain as points
        },
    }
    
    # Get the preserve list for this asset type
    preserve_list = preserve_geometry.get(asset_type, {})
    
    # Add geometry type information
    features['geom_type'] = features.geometry.geom_type
    
    # Create a mask for features that should preserve their geometry
    preserve_mask = pd.Series(False, index=features.index)
    for obj_type, geom_type in preserve_list.items():
        # Mark features with this object_type to preserve if they have the right geometry
        type_mask = (features['object_type'] == obj_type) & (features['geom_type'] == geom_type)
        preserve_mask = preserve_mask | type_mask
    
    # Get polygon features to calculate median areas (only for non-preserved features)
    polygon_features = features.loc[
        (~preserve_mask) & features.geom_type.isin(['Polygon', 'MultiPolygon'])
    ].to_crs(3035)
    
    # If no polygon features exist, return original features
    if len(polygon_features) == 0:
        features = features.drop(['geom_type'], axis=1)
        return features
        
    polygon_features['square_m2'] = polygon_features.area
    
    # Calculate median area by object type
    square_m2_object_type = polygon_features[['object_type', 'square_m2']].groupby('object_type').median()
    
    # Default area if median cannot be calculated (1000 sq meters ~ small building)
    default_area = 1000
    
    # Find object types that have mixed geometries (linestrings + polygons)
    # Only consider non-preserved features
    non_preserved_features = features[~preserve_mask]
    mixed_geom_types = non_preserved_features.groupby(['object_type', 'geom_type']).size().unstack().fillna(0)
    
    # Identify object types that have both linestrings and polygons
    linestrings_to_polygonize = []
    if 'LineString' in mixed_geom_types.columns and any(col in mixed_geom_types.columns for col in ['Polygon', 'MultiPolygon']):
        for obj_type in mixed_geom_types.index:
            # Skip if this object type should be preserved
            if obj_type in preserve_list and preserve_list[obj_type] == 'LineString':
                continue
                
            line_count = mixed_geom_types.loc[obj_type, 'LineString'] if 'LineString' in mixed_geom_types.columns else 0
            poly_count = sum(mixed_geom_types.loc[obj_type, col] for col in ['Polygon', 'MultiPolygon'] 
                            if col in mixed_geom_types.columns)
            
            # If this object type has both linestrings and polygons, add to conversion list
            if line_count > 0 and poly_count > 0:
                linestrings_to_polygonize.append(obj_type)
    
    # Convert linestrings to polygons
    if linestrings_to_polygonize:
        print(f"Converting linestrings to polygons for {asset_type}: {linestrings_to_polygonize}")
        
        # Get linestrings to convert
        all_linestrings_to_polygonize = features.loc[
            (features.object_type.isin(linestrings_to_polygonize)) & 
            (features.geom_type == 'LineString') &
            (~preserve_mask)  # Ensure we don't convert preserved features
        ]
        
        if len(all_linestrings_to_polygonize) > 0:
            # Define function to convert linestring to polygon
            def polygonize_linestring(linestring):
                try:
                    # Simple conversion for closed linestrings
                    if linestring.is_closed:
                        return shapely.geometry.Polygon(linestring)
                    else:
                        # For open linestrings, create a small buffer
                        return linestring.buffer(0.0001)
                except Exception:
                    # Fallback: create a small buffer
                    return linestring.buffer(0.0001)
            
            # Apply conversion
            new_geometries = all_linestrings_to_polygonize.geometry.apply(polygonize_linestring).values
            
            # Update geometries
            features.loc[
                (features.object_type.isin(linestrings_to_polygonize)) & 
                (features.geom_type == 'LineString') &
                (~preserve_mask),  # Ensure we don't convert preserved features
                'geometry'
            ] = new_geometries
    
    # Get the points to convert (only for object types that also have polygons)
    points_to_polygonize = []
    if 'Point' in mixed_geom_types.columns and any(col in mixed_geom_types.columns for col in ['Polygon', 'MultiPolygon']):
        for obj_type in mixed_geom_types.index:
            # Skip if this object type should be preserved
            if obj_type in preserve_list and preserve_list[obj_type] == 'Point':
                continue
                
            point_count = mixed_geom_types.loc[obj_type, 'Point'] if 'Point' in mixed_geom_types.columns else 0
            poly_count = sum(mixed_geom_types.loc[obj_type, col] for col in ['Polygon', 'MultiPolygon'] 
                            if col in mixed_geom_types.columns)
            
            # If this object type has both points and polygons, add to conversion list
            if point_count > 0 and poly_count > 0:
                points_to_polygonize.append(obj_type)
    
    if points_to_polygonize:
        all_assets_to_polygonize = features.loc[
            (features.object_type.isin(points_to_polygonize)) & 
            (features.geom_type == 'Point') &
            (~preserve_mask)  # Ensure we don't convert preserved features
        ].to_crs(3035)
        
        if len(all_assets_to_polygonize) > 0:
            print(f"Converting {len(all_assets_to_polygonize)} points to polygons for {asset_type}: {points_to_polygonize}")
            
            # Define function to polygonize points
            def polygonize_point_per_asset(asset):
                # Get buffer length (half of width/length)
                if asset.object_type in square_m2_object_type.index:
                    area = square_m2_object_type.loc[asset.object_type].values[0]
                else:
                    area = default_area
                    
                buffer_length = np.sqrt(area) / 2
                
                # Buffer the point to create a square polygon
                return asset.geometry.buffer(buffer_length, cap_style='square')
            
            # Apply the conversion
            new_geometries = all_assets_to_polygonize.apply(
                lambda asset: polygonize_point_per_asset(asset), axis=1
            ).set_crs(3035).to_crs(3035).values
            
            # Update the geometries
            features.loc[
                (features.object_type.isin(points_to_polygonize)) & 
                (features.geom_type == 'Point') &
                (~preserve_mask),  # Ensure we don't convert preserved features
                'geometry'
            ] = new_geometries
    
    # Remove the temporary geom_type column
    features = features.drop(['geom_type'], axis=1)
    
    return features


This dictionary maps the names of European countries to their respective ISO 3166-1 alpha-2 codes. These codes are two-letter country codes defined in the ISO 3166-1 standard.The dictionary keys are country names (str) and values are their ISO 3166-1 alpha-2 codes (str).


In [8]:
EU_code_full = {
    "AL": "Albania",
    "AT": "Austria",
    "BE": "Belgium",
    "BG": "Bulgaria",
    "CH": "Switzerland",
    "CY": "Cyprus",
    "CZ": "Czechia",
    "DE": "Germany",
    "DK": "Denmark",
    "EE": "Estonia",
    "EL": "Greece",
    "ES": "Spain",
    "FI": "Finland",
    "FR": "France",
    "HR": "Croatia",
    "HU": "Hungary",
    "IE": "Ireland",
    "IS": "Iceland",
    "IT": "Italy",
    "LT": "Lithuania",
    "LU": "Luxembourg",
    "LV": "Latvia",
    "MK": "North Macedonia",
    "MT": "Malta",
    "NL": "Netherlands",
    "NO": "Norway",
    "PL": "Poland",
    "PT": "Portugal",
    "RO": "Romania",
    "SE": "Sweden",
    "SI": "Slovenia",
    "SK": "Slovakia"
}

In [9]:
# List of networks
network_list = [
    "Education",
    "Healthcare",
    "Airports",
    "Ports",
    "Roadway",
    "Railway",
    "Telecommunication",
    "Power",
    "Gas",
    "Oil",
]

In [10]:
# choose country 
country = 'PT'

In [48]:
# choose asset_type 
asset_type = 'Education'

In [49]:
pbf_file = "portugal-260119.osm"

In [50]:
%%time
osm_path = data_path / f"{pbf_file}.pbf"
features = read_osm_data(osm_path,asset_type=asset_type)
features_index = features.sindex

  return ogr_read(


CPU times: total: 1min 41s
Wall time: 1min 32s


In [51]:
%%time
LAU = gpd.read_file(LAU_path, engine="pyogrio")
NUTS = gpd.read_file(NUTS_path, engine="pyogrio")

CPU times: total: 10.5 s
Wall time: 10.5 s


In [52]:
NUTS2 = NUTS.loc[NUTS.LEVL_CODE == 2].reset_index(drop=True)
NUTS_index = NUTS2.sindex

In [53]:
def get_NUTS2(LAU_region,NUTS2):

    NUTS2_rough = NUTS2.iloc[NUTS_index.intersection((LAU_region.geometry.centroid.x,LAU_region.geometry.centroid.y))]
    boolean = NUTS2_rough.intersects(LAU_region.geometry.centroid).values 
    try:
        return NUTS2_rough.loc[boolean].NUTS_ID.values[0]
    except:
        return None

In [54]:
LAU_country = LAU.loc[LAU.CNTR_CODE == country] ## make sure this iso2 code is changed with different countries

In [55]:
tqdm.pandas()
LAU_country.loc[:,'NUTS2'] = LAU_country.progress_apply(lambda LAU_region: 
                                              get_NUTS2(LAU_region,NUTS2),axis=1)

100%|████████████████████████████████████████████████████████████████████████████| 3092/3092 [00:00<00:00, 3904.39it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [56]:
collect_assets = []
for LAU_region in tqdm(LAU_country.to_crs(4326).itertuples(), total=len(LAU_country)):
    # Get rough intersection using spatial index
    assets_rough = features.iloc[features_index.intersection(LAU_region.geometry.bounds)].copy()
    
    # Skip if no assets found in bounding box
    if assets_rough.empty:
        continue

    # Calculate precise intersection - assign to geometry column correctly
    assets_rough['geometry'] = assets_rough.geometry.intersection(LAU_region.geometry)
    
    # Filter out empty geometries and make a copy to avoid warnings
    assets_precise = assets_rough[~assets_rough.geometry.is_empty].copy()
    
    # Now safely assign new columns
    assets_precise['LAU'] = LAU_region.GISCO_ID
    assets_precise['NUTS2'] = LAU_region.NUTS2
    assets_precise['CNTR_CODE'] = LAU_region.CNTR_CODE
    collect_assets.append(assets_precise)
  
country_assets = pd.concat(collect_assets).to_crs(3035)
country_assets.osm_id = country_assets.osm_id.astype(np.float64)
country_assets = country_assets.loc[country_assets.is_valid]
country_assets = country_assets.reset_index(drop=True)
country_assets = convert_mixed_geometries_to_polygons(country_assets, asset_type)


100%|████████████████████████████████████████████████████████████████████████████| 3092/3092 [00:01<00:00, 1663.97it/s]


Converting 1412 points to polygons for Education: ['college', 'kindergarten', 'library', 'school', 'university']


In [57]:
# only for roads/railway: add corridor values
if asset_type.lower() in ["roadway", "railway"]:

    tent_path = TENT_roads_path if asset_type.lower() == "roadway" else TENT_rail_path

    add_path = pd.read_parquet(tent_path)[["osm_way_id", "CORRIDORS"]]
    corridor_dict = dict(zip(add_path["osm_way_id"], add_path["CORRIDORS"]))

    country_assets["CORRIDOR"] = country_assets["osm_id"].map(corridor_dict)



In [58]:
country_assets.head()

Unnamed: 0,osm_id,geometry,object_type,building,name,LAU,NUTS2,CNTR_CODE
0,185199800.0,"POLYGON ((2772783.864 2240602.702, 2772815.477...",school,,Escola Básica de Perelhal,PT_030260,PT11,PT
1,943235400.0,"POLYGON ((2787534.520 2239153.829, 2787504.074...",school,,Escola Básica de Pousa,PT_030261,PT11,PT
2,134657600.0,"POLYGON ((2778944.922 2234377.318, 2778945.922...",school,,Escola Básica do 1º Ciclo de Remelhe,PT_030263,PT11,PT
3,1267597000.0,"POLYGON ((2783330.562 2244817.087, 2783331.724...",school,,Escola Básica de Bárrio,PT_030264,PT11,PT
4,1267597000.0,"POLYGON ((2780415.719 2238453.277, 2780424.969...",school,,Escola Básica de Santa Eugénia,PT_030265,PT11,PT


In [59]:
# Base output directory 
base_out = Path("Exposure_files")   

# Create folder named after the asset type
asset_folder = base_out / asset_type
asset_folder.mkdir(parents=True, exist_ok=True)

# Save
country_assets.to_parquet(asset_folder / f"{asset_type}_{country}.parquet")