In [None]:
import geoplanar
import geopandas as gpd
import numpy as np

In [None]:
# load downloaded buildings of a site
buildings = gpd.read_file(r"overture_data\berlin_buildings.gpkg")
#buildings = gpd.read_file(r"overture_data\hongkong_buildings.gpkg")
#buildings = gpd.read_file(r"overture_data\paris_buildings.gpkg")
#buildings = gpd.read_file(r"overture_data\rome_buildings.gpkg")
#buildings = gpd.read_file(r"overture_data\saopaulo_buildings.gpkg")

In [None]:
def process_region_buildings(buildings, simplify, simplification_tolerance=.1, merge_limit=25):
    '''Pass the region buildings through the geoplanar simplification pipeline.'''
    
    initial_shape = buildings.shape

    ## fix invalid geometry
    buildings["geometry"] = buildings.make_valid()

    ## explode multipolygons
    buildings = buildings.explode(ignore_index=True)

    ## keep only polygons
    buildings = buildings[buildings["geometry"].geom_type == "Polygon"].reset_index(
        drop=True
    )

    ## simplify geometry - most eubucco data has topological issues
    ## one region - 109491 - has an issue with simplification, without normalisation
    if simplify:
        buildings["geometry"] = buildings.simplify(simplification_tolerance).normalize()

    # drop very large buildings
    buildings = buildings[buildings.area < 200_000].reset_index(drop=True)

    
    ## merge buildings that overlap either 1) at least .10 percent or are smaller than 30m^2
    buildings = geoplanar.merge_overlaps(
        buildings, merge_limit=merge_limit, overlap_limit=0.1
    )

    ## drop remaining overlaps
    buildings = geoplanar.trim_overlaps(buildings, strategy='largest')

    ## fix any multipolygons
    buildings = buildings.explode(ignore_index=True)

    print(
        "Percent polygons: ",
        (buildings.geom_type == "Polygon").sum() / buildings.shape[0],
    )

    # drop non-polygons
    buildings = buildings[buildings.geom_type == "Polygon"].reset_index(drop=True)

    # merge touching collapsing buildings
    shrink = buildings.buffer(-0.5, resolution=2)
    buildings = geoplanar.merge_touching(
        buildings, np.where(shrink.is_empty), largest=True
    )
    # drop non polygons
    buildings = buildings.explode(ignore_index=True)
    buildings = buildings[buildings.geom_type == "Polygon"].reset_index(drop=True)

    ## need one more pass to ensure only valid geometries
    if simplify:
        buildings["geometry"] = buildings.simplify(simplification_tolerance)
        buildings["geometry"] = buildings.make_valid()
        buildings = buildings[buildings.geom_type == "Polygon"].reset_index(drop=True)

    print(
        "Final polygons: ",
        buildings.shape[0],
        ", dropped: ",
        1 - (buildings.shape[0] / initial_shape[0]),
    )

    buildings["geometry"] = buildings.normalize()
    return buildings

In [None]:
processed_buildings = process_region_buildings(buildings=buildings, simplify=True, simplification_tolerance=.1, merge_limit=25)

In [None]:
# save preprocessed buildings
processed_buildings.to_file(r"overture_data\berlin_buildings_preprocessed.gpkg", driver="GPKG")
#processed_buildings.to_file(r"overture_data\hongkong_buildings_preprocessed.gpkg", driver="GPKG")
#processed_buildings.to_file(r"overture_data\paris_buildings_preprocessed.gpkg", driver="GPKG")
#processed_buildings.to_file(r"overture_data\rome_buildings_preprocessed.gpkg", driver="GPKG")
#processed_buildings.to_file(r"overture_data\saopaulo_buildings_preprocessed.gpkg", driver="GPKG")