From 216265bc8da5fc2f616710f6cdedf8da3836f3ab Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Thu, 21 Mar 2024 17:24:05 +0100 Subject: [PATCH 1/5] refactor tessellation --- momepy/__init__.py | 1 + momepy/functional/_elements.py | 180 +++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 momepy/functional/_elements.py diff --git a/momepy/__init__.py b/momepy/__init__.py index 60f51162..e18a4bd2 100644 --- a/momepy/__init__.py +++ b/momepy/__init__.py @@ -9,6 +9,7 @@ from .elements import * from .functional._dimension import * from .functional._distribution import * +from .functional._elements import * from .functional._shape import * from .graph import * from .intensity import * diff --git a/momepy/functional/_elements.py b/momepy/functional/_elements.py new file mode 100644 index 00000000..caa012d6 --- /dev/null +++ b/momepy/functional/_elements.py @@ -0,0 +1,180 @@ +import warnings + +import geopandas as gpd +import numpy as np +import pandas as pd +import shapely +from joblib import Parallel, delayed +from libpysal.cg import voronoi_frames +from packaging.version import Version + +GPD_GE_013 = Version(gpd.__version__) >= Version("0.13.0") +GPD_GE_10 = Version(gpd.__version__) >= Version("1.0.0") + +__all__ = [ + "morphological_tessellation", + "enclosed_tessellation", + "verify_tessellation", +] + + +def morphological_tessellation( + geometry: gpd.GeoSeries | gpd.GeoDataFrame, + clip: str + | shapely.Geometry + | gpd.GeoSeries + | gpd.GeoDataFrame + | None = "bounding_box", + shrink: float = 0.4, + segment: float = 0.5, +) -> gpd.GeoSeries: + """Generate morphological tessellation. + + Morpohological tessellation is a method to divide space into cells based on + building footprints and Voronoi tessellation. The function wraps + :func:`libpysal.cg.voronoi_frames` and provides customized default parameters + following :cite:`fleischmann2020`. + + + See :cite:`fleischmann2020` for details of implementation. + + + Parameters + ---------- + geometry : gpd.GeoSeries | gpd.GeoDataFrame + A GeoDataFrame or GeoSeries containing buildings to tessellate. + clip : str | shapely.Geometry | gpd.GeoSeries | gpd.GeoDataFrame | None + Polygon used to clip the Voronoi polygons, by default "bounding_box". You can + pass any option accepted by :func:`libpysal.cg.voronoi_frames` or geopandas + object that will be automatically unioned. + shrink : float, optional + The distance for negative buffer to generate space between adjacent polygons). + By default 0.4 + segment : float, optional + The maximum distance between points after discretization. By default 0.5 + + Returns + ------- + gpd.GeoSeries + GeoSeries with an index matching the index of input geometry + """ + if isinstance(clip, gpd.GeoSeries | gpd.GeoDataFrame): + clip = clip.union_all() if GPD_GE_10 else clip.unary_union + + return voronoi_frames( + geometry, + clip=clip, + shrink=shrink, + segment=segment, + return_input=False, + as_gdf=False, + ) + + +def enclosed_tessellation( + geometry, + enclosures: gpd.GeoSeries, + shrink=0.4, + segment=0.5, + threshold=0.05, + n_jobs=-1, +): + enclosures = enclosures.to_frame() + enclosures["position"] = range(len(enclosures)) + index_name = enclosures.index.name + if not index_name: + index_name = "enclosure_index" + enclosures[index_name] = enclosures.index + + if GPD_GE_013: + inp, res = geometry.sindex.query(enclosures.geometry, predicate="intersects") + else: + inp, res = geometry.sindex.query_bulk( + enclosures.geometry, predicate="intersects" + ) + + unique, counts = np.unique(inp, return_counts=True) + splits = unique[counts > 1] + single = unique[counts == 1] + altered = unique[counts > 0] + + tuples = [] + for i in splits: + tuples.append( + ( + enclosures.index[i], + enclosures.geometry.iloc[i], + geometry.iloc[res[inp == i]], + ) + ) + new = Parallel(n_jobs=n_jobs)( + delayed(_tess)(*t, threshold, shrink, segment, index_name) for t in tuples + ) + + # finalise the result + singles = enclosures.iloc[single] + singles.index = singles.position.loc[single].apply( + lambda ix: geometry.iloc[res[inp == ix]].index[0] + ) + clean_blocks = enclosures.drop(altered) + clean_blocks.index = range(-len(clean_blocks), 0, 1) + return pd.concat( + new + [singles.drop(columns="position"), clean_blocks.drop(columns="position")] + ) + + +def _tess(ix, enclosure, blg, threshold, shrink, segment, enclosure_id): + poly = enclosure + within = blg[ + shapely.area(shapely.intersection(blg.geometry.array, poly)) + > (shapely.area(blg.geometry.array) * threshold) + ].copy() + if len(within) > 1: + tess = voronoi_frames( + within, + clip=poly, + shrink=shrink, + segment=segment, + return_input=False, + as_gdf=True, + ) + tess[enclosure_id] = ix + return tess + return gpd.GeoDataFrame( + {enclosure_id: ix}, + geometry=[poly], + index=[-1], + crs=blg.crs, + ) + + +def verify_tessellation(tesselation, geometry): + """Check whether result matches buildings and contains only Polygons.""" + # check against input layer + ids_original = geometry.index + ids_generated = tesselation.index + if len(ids_original) != len(ids_generated): + collapsed = ids_original.difference(ids_generated) + warnings.warn( + message=( + "Tessellation does not fully match buildings. " + f"{len(collapsed)} element(s) collapsed " + f"during generation. Index of the affected elements: {collapsed}." + ), + category=UserWarning, + stacklevel=4, + ) + + # check MultiPolygons - usually caused by error in input geometry + multipolygons = tesselation[tesselation.geometry.geom_type == "MultiPolygon"].index + if len(multipolygons) > 0: + warnings.warn( + message=( + "Tessellation contains MultiPolygon elements. Initial " + "objects should be edited. Index of affected " + f"elements: {list(multipolygons)}." + ), + category=UserWarning, + stacklevel=4, + ) + return collapsed, multipolygons From ee10731e61ca79e7b6dfa71a1647962000855231 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Fri, 22 Mar 2024 15:02:58 +0100 Subject: [PATCH 2/5] docs --- momepy/functional/_elements.py | 170 +++++++++++++++++++++++++++------ 1 file changed, 141 insertions(+), 29 deletions(-) diff --git a/momepy/functional/_elements.py b/momepy/functional/_elements.py index caa012d6..13d91430 100644 --- a/momepy/functional/_elements.py +++ b/momepy/functional/_elements.py @@ -35,14 +35,28 @@ def morphological_tessellation( :func:`libpysal.cg.voronoi_frames` and provides customized default parameters following :cite:`fleischmann2020`. + Tessellation requires data of relatively high level of precision + and there are three particular patterns causing issues: - See :cite:`fleischmann2020` for details of implementation. + 1. Features will collapse into empty polygon - these + do not have tessellation cell in the end. + 2. Features will split into MultiPolygons - in some cases, + features with narrow links between parts split into two + during 'shrinking'. In most cases that is not an issue + and the resulting tessellation is correct anyway, but + sometimes this results in a cell being a MultiPolygon, + which is not correct. + 3. Overlapping features - features which overlap even + after 'shrinking' cause invalid tessellation geometry. + + All three types can be tested using :class:`momepy.CheckTessellationInput`. + See :cite:`fleischmann2020` for details of implementation. Parameters ---------- geometry : gpd.GeoSeries | gpd.GeoDataFrame - A GeoDataFrame or GeoSeries containing buildings to tessellate. + A GeoDataFrame or GeoSeries containing buildings to tessellate the space around. clip : str | shapely.Geometry | gpd.GeoSeries | gpd.GeoDataFrame | None Polygon used to clip the Voronoi polygons, by default "bounding_box". You can pass any option accepted by :func:`libpysal.cg.voronoi_frames` or geopandas @@ -57,6 +71,12 @@ def morphological_tessellation( ------- gpd.GeoSeries GeoSeries with an index matching the index of input geometry + + See also + -------- + momepy.enclosed_tessellation + momepy.CheckTessellationInput + momepy.verify_tessellation """ if isinstance(clip, gpd.GeoSeries | gpd.GeoDataFrame): clip = clip.union_all() if GPD_GE_10 else clip.unary_union @@ -72,20 +92,82 @@ def morphological_tessellation( def enclosed_tessellation( - geometry, + geometry: gpd.GeoSeries | gpd.GeoDataFrame, enclosures: gpd.GeoSeries, - shrink=0.4, - segment=0.5, - threshold=0.05, - n_jobs=-1, -): + shrink: float = 0.4, + segment: float = 0.5, + threshold: float = 0.05, + n_jobs: int = -1, +) -> gpd.GeoDataFrame: + """Generate enclosed tessellation + + Enclosed tessellation is an enhanced :func:`morphological_tessellation`, based on + predefined enclosures and building footprints. We can see enclosed tessellation as + two-step partitioning of space based on building footprints and boundaries (e.g. + street network, railway). Original morphological tessellation is used under the hood + to partition each enclosure. + + Tessellation requires data of relatively high level of precision and there are three + particular patterns causing issues: + + 1. Features will collapse into empty polygon - these + do not have tessellation cell in the end. + 2. Features will split into MultiPolygons - in some cases, + features with narrow links between parts split into two during 'shrinking'. + In most cases that is not an issue and the resulting tessellation is correct + anyway, but sometimes this results in a cell being a MultiPolygon, which is + not correct. + 3. Overlapping features - features which overlap even + after 'shrinking' cause invalid tessellation geometry. + + All three types can be tested using :class:`momepy.CheckTessellationInput`. + + Parameters + ---------- + geometry : gpd.GeoSeries | gpd.GeoDataFrame + A GeoDataFrame or GeoSeries containing buildings to tessellate the space around. + enclosures : gpd.GeoSeries + The enclosures geometry, which can be generated using :func:`momepy.enclosures`. + shrink : float, optional + The distance for negative buffer to generate space between adjacent polygons). + By default 0.4 + segment : float, optional + The maximum distance between points after discretization. By default 0.5 + threshold : float, optional + The minimum threshold for a building to be considered within an enclosure. + Threshold is a ratio of building area which needs to be within an enclosure to + inlude it in the tessellation of that enclosure. Resolves sliver geometry + issues. If None, the check is skipped and all intersecting buildings are + considered. By default 0.05 + n_jobs : int, optional + The number of jobs to run in parallel. -1 means using all available cores. + By default -1 + + Returns + ------- + gpd.GeoDataFrame + GeoDataFrame with an index matching the index of input geometry and a column + matching the index of input enclosures. + + See also + -------- + momepy.enclosures + momepy.morphological_tessellation + momepy.CheckTessellationInput + momepy.verify_tessellation + """ + + # convert to GeoDataFrame and add position (we will need it later) enclosures = enclosures.to_frame() enclosures["position"] = range(len(enclosures)) + + # preserve index name if exists index_name = enclosures.index.name if not index_name: index_name = "enclosure_index" enclosures[index_name] = enclosures.index + # figure out which enlosures contain which buildings if GPD_GE_013: inp, res = geometry.sindex.query(enclosures.geometry, predicate="intersects") else: @@ -93,45 +175,54 @@ def enclosed_tessellation( enclosures.geometry, predicate="intersects" ) + # find out which enclosures contain one and multiple buildings unique, counts = np.unique(inp, return_counts=True) splits = unique[counts > 1] single = unique[counts == 1] altered = unique[counts > 0] - tuples = [] - for i in splits: - tuples.append( - ( - enclosures.index[i], - enclosures.geometry.iloc[i], - geometry.iloc[res[inp == i]], - ) + # prepare input for parallel processing + tuples = [ + ( + enclosures.index[i], # enclosure index + enclosures.geometry.iloc[i], # enclosure geometry + geometry.iloc[res[inp == i]], # buildings within the enclosure ) + for i in splits + ] + + # generate tessellation in parallel new = Parallel(n_jobs=n_jobs)( delayed(_tess)(*t, threshold, shrink, segment, index_name) for t in tuples ) - # finalise the result + # get building index for enclosures with single building singles = enclosures.iloc[single] singles.index = singles.position.loc[single].apply( lambda ix: geometry.iloc[res[inp == ix]].index[0] ) + # combine results clean_blocks = enclosures.drop(altered) - clean_blocks.index = range(-len(clean_blocks), 0, 1) + clean_blocks.index = range( + -len(clean_blocks), 0, 1 + ) # assign negative index to enclosures with no buildings return pd.concat( new + [singles.drop(columns="position"), clean_blocks.drop(columns="position")] ) -def _tess(ix, enclosure, blg, threshold, shrink, segment, enclosure_id): - poly = enclosure - within = blg[ - shapely.area(shapely.intersection(blg.geometry.array, poly)) - > (shapely.area(blg.geometry.array) * threshold) - ].copy() - if len(within) > 1: +def _tess(ix, poly, blg, threshold, shrink, segment, enclosure_id): + """Generate tessellation for a single enclosure. Helper for enclosed_tessellation""" + # check if threshold is set and filter buildings based on the threshold + if threshold: + blg = blg[ + shapely.area(shapely.intersection(blg.geometry.array, poly)) + > (shapely.area(blg.geometry.array) * threshold) + ] + + if len(blg) > 1: tess = voronoi_frames( - within, + blg, clip=poly, shrink=shrink, segment=segment, @@ -140,6 +231,7 @@ def _tess(ix, enclosure, blg, threshold, shrink, segment, enclosure_id): ) tess[enclosure_id] = ix return tess + return gpd.GeoDataFrame( {enclosure_id: ix}, geometry=[poly], @@ -149,7 +241,27 @@ def _tess(ix, enclosure, blg, threshold, shrink, segment, enclosure_id): def verify_tessellation(tesselation, geometry): - """Check whether result matches buildings and contains only Polygons.""" + """Check whether result matches buildings and contains only Polygons. + + Checks if the generated tessellation fully matches the input buildings, i.e. if + there are all building indices present in the tessellation. Also checks if there are + any MultiPolygons present in the tessellation. The former is often caused by + buildings collapsing during the generation process, the latter is usually caused by + errors in the input geometry, overlapping buildings, or narrow links between parts + of the building. + + Parameters + ---------- + tesselation : gpd.GeoSeries | gpd.GeoDataFrame + tessellation geometry + geometry : gpd.GeoSeries | gpd.GeoDataFrame + building geometry used to generate tessellation + + Returns + ------- + tuple(excluded, multipolygons) + Tuple of indices of building IDs not present in tessellations and MultiPolygons. + """ # check against input layer ids_original = geometry.index ids_generated = tesselation.index @@ -162,7 +274,7 @@ def verify_tessellation(tesselation, geometry): f"during generation. Index of the affected elements: {collapsed}." ), category=UserWarning, - stacklevel=4, + stacklevel=2, ) # check MultiPolygons - usually caused by error in input geometry @@ -175,6 +287,6 @@ def verify_tessellation(tesselation, geometry): f"elements: {list(multipolygons)}." ), category=UserWarning, - stacklevel=4, + stacklevel=2, ) return collapsed, multipolygons From 0ab29ea12a383b40ba5a941d42cbcaabf29b7ba8 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Fri, 22 Mar 2024 15:58:03 +0100 Subject: [PATCH 3/5] tests --- momepy/functional/_elements.py | 5 + momepy/functional/tests/test_elements.py | 114 +++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 momepy/functional/tests/test_elements.py diff --git a/momepy/functional/_elements.py b/momepy/functional/_elements.py index 13d91430..ebc99533 100644 --- a/momepy/functional/_elements.py +++ b/momepy/functional/_elements.py @@ -122,6 +122,11 @@ def enclosed_tessellation( All three types can be tested using :class:`momepy.CheckTessellationInput`. + The idnex of the resulting GeoDataFrame links the input buildings with the output + geometry. Enclosures with no buildings are also included in the output with negative + index. Ensure that the input geometry has unique non-negative index for this to work + correctly. + Parameters ---------- geometry : gpd.GeoSeries | gpd.GeoDataFrame diff --git a/momepy/functional/tests/test_elements.py b/momepy/functional/tests/test_elements.py new file mode 100644 index 00000000..722cce51 --- /dev/null +++ b/momepy/functional/tests/test_elements.py @@ -0,0 +1,114 @@ +import geopandas as gpd +import numpy as np +import pandas as pd +from geopandas.testing import assert_geodataframe_equal +from packaging.version import Version +from pandas.testing import assert_index_equal +from shapely import affinity +from shapely.geometry import MultiPoint, Polygon + +import momepy as mm + +GPD_GE_013 = Version(gpd.__version__) >= Version("0.13.0") + + +class TestElements: + def setup_method(self): + test_file_path = mm.datasets.get_path("bubenec") + self.df_buildings = gpd.read_file(test_file_path, layer="buildings") + self.df_tessellation = gpd.read_file(test_file_path, layer="tessellation") + self.df_streets = gpd.read_file(test_file_path, layer="streets") + self.limit = mm.buffered_limit(self.df_buildings, 50) + self.enclosures = mm.enclosures( + self.df_streets, gpd.GeoSeries([self.limit.exterior]) + ) + + def test_morphological_tessellation(self): + tessellation = mm.morphological_tessellation( + self.df_buildings, + ) + assert (tessellation.geom_type == "Polygon").all() + assert tessellation.crs == self.df_buildings.crs + assert_index_equal(tessellation.index, self.df_buildings.index) + + clipped = mm.morphological_tessellation( + self.df_buildings, + clip=self.limit, + ) + + assert (tessellation.geom_type == "Polygon").all() + assert tessellation.crs == self.df_buildings.crs + assert_index_equal(tessellation.index, self.df_buildings.index) + assert clipped.area.sum() < tessellation.area.sum() + + sparser = mm.morphological_tessellation( + self.df_buildings, + segment=2, + ) + if GPD_GE_013: + assert ( + sparser.get_coordinates().shape[0] + < tessellation.get_coordinates().shape[0] + ) + + def test_morphological_tessellation_buffer_clip(self): + tessellation = mm.morphological_tessellation( + self.df_buildings, clip=self.df_buildings.buffer(50) + ) + assert (tessellation.geom_type == "Polygon").all() + assert tessellation.crs == self.df_buildings.crs + assert_index_equal(tessellation.index, self.df_buildings.index) + + def test_morphological_tessellation_errors(self): + df = self.df_buildings + b = df.total_bounds + x = np.mean([b[0], b[2]]) + y = np.mean([b[1], b[3]]) + + df = pd.concat( + [ + df, + gpd.GeoDataFrame( + {"uID": [145, 146, 147]}, + geometry=[ + Polygon([(x, y), (x, y + 1), (x + 1, y)]), + MultiPoint([(x, y), (x + 1, y)]).buffer(0.55), + affinity.rotate(df.geometry.iloc[0], 12), + ], + index=[144, 145, 146], + ), + ] + ) + tessellation = mm.morphological_tessellation( + df, + ) + assert (tessellation.geom_type == "Polygon").all() + assert 144 not in tessellation.index + assert len(tessellation) == len(df) - 1 + + def test_enclosed_tessellation(self): + tessellation = mm.enclosed_tessellation( + self.df_buildings, + self.enclosures.geometry, + ) + assert (tessellation.geom_type == "Polygon").all() + assert tessellation.crs == self.df_buildings.crs + assert (self.df_buildings.index.isin(tessellation.index)).all() + assert np.isin(np.array(range(-11, 0, 1)), tessellation.index).all() + + sparser = mm.enclosed_tessellation( + self.df_buildings, + self.enclosures.geometry, + segment=2, + ) + if GPD_GE_013: + assert ( + sparser.get_coordinates().shape[0] + < tessellation.get_coordinates().shape[0] + ) + + no_threshold_check = mm.enclosed_tessellation( + self.df_buildings, self.enclosures.geometry, threshold=None, n_jobs=1 + ) + + assert_geodataframe_equal(tessellation, no_threshold_check) From 1a4bb1b1624954de6f1831d2e469da02257788c8 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Fri, 22 Mar 2024 16:10:20 +0100 Subject: [PATCH 4/5] more tests --- momepy/functional/_elements.py | 2 +- momepy/functional/tests/test_elements.py | 36 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/momepy/functional/_elements.py b/momepy/functional/_elements.py index ebc99533..838922c1 100644 --- a/momepy/functional/_elements.py +++ b/momepy/functional/_elements.py @@ -275,7 +275,7 @@ def verify_tessellation(tesselation, geometry): warnings.warn( message=( "Tessellation does not fully match buildings. " - f"{len(collapsed)} element(s) collapsed " + f"{len(collapsed)} element(s) disappeared " f"during generation. Index of the affected elements: {collapsed}." ), category=UserWarning, diff --git a/momepy/functional/tests/test_elements.py b/momepy/functional/tests/test_elements.py index 722cce51..17a894e3 100644 --- a/momepy/functional/tests/test_elements.py +++ b/momepy/functional/tests/test_elements.py @@ -1,6 +1,7 @@ import geopandas as gpd import numpy as np import pandas as pd +import pytest from geopandas.testing import assert_geodataframe_equal from packaging.version import Version from pandas.testing import assert_index_equal @@ -112,3 +113,38 @@ def test_enclosed_tessellation(self): ) assert_geodataframe_equal(tessellation, no_threshold_check) + + def test_verify_tessellation(self): + df = self.df_buildings + b = df.total_bounds + x = np.mean([b[0], b[2]]) + y = np.mean([b[1], b[3]]) + + df = pd.concat( + [ + df, + gpd.GeoDataFrame( + {"uID": [145]}, + geometry=[ + Polygon([(x, y), (x, y + 1), (x + 1, y)]), + ], + index=[144], + ), + ] + ) + tessellation = mm.morphological_tessellation( + df, clip=self.df_streets.buffer(50) + ) + with ( + pytest.warns( + UserWarning, match="Tessellation does not fully match buildings" + ), + pytest.warns( + UserWarning, match="Tessellation contains MultiPolygon elements" + ), + ): + collapsed, multi = mm.verify_tessellation(tessellation, df) + assert_index_equal(collapsed, pd.Index([144])) + assert_index_equal( + multi, pd.Index([1, 46, 57, 62, 103, 105, 129, 130, 134, 136, 137]) + ) From 705eb7f1a2c809acac5c10dd249f63bad315b945 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Fri, 22 Mar 2024 19:44:28 +0100 Subject: [PATCH 5/5] Update _elements.py Co-authored-by: James Gaboardi --- momepy/functional/_elements.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/momepy/functional/_elements.py b/momepy/functional/_elements.py index 838922c1..a1c8cfe8 100644 --- a/momepy/functional/_elements.py +++ b/momepy/functional/_elements.py @@ -122,7 +122,7 @@ def enclosed_tessellation( All three types can be tested using :class:`momepy.CheckTessellationInput`. - The idnex of the resulting GeoDataFrame links the input buildings with the output + The index of the resulting GeoDataFrame links the input buildings with the output geometry. Enclosures with no buildings are also included in the output with negative index. Ensure that the input geometry has unique non-negative index for this to work correctly.