Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: helper functions part 1 #465

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
4143349
including two helper functions
gregmaya Feb 1, 2023
2f666ea
formating string correction
gregmaya Feb 1, 2023
015f558
black formatting on strings
gregmaya Feb 1, 2023
c6d713c
second guessing where the formatting issue is
gregmaya Feb 1, 2023
a0cb0c4
trailing whitespace fix (third time luck)
gregmaya Feb 1, 2023
3b3e2d7
updating test checks
gregmaya Feb 2, 2023
1782891
combined selection of adjacents: shape and area
gregmaya Feb 2, 2023
0abc422
selection function updated
gregmaya Feb 2, 2023
a718939
update to check on 'low_area_threshold'
gregmaya Feb 2, 2023
7a0b1a8
improving selection of adjacents when they touch more than one rab
gregmaya Feb 7, 2023
e6aafde
black formating
gregmaya Feb 7, 2023
0e293fe
black attempt 2
gregmaya Feb 7, 2023
e61f473
white spaces
gregmaya Feb 7, 2023
538bdee
blank: line not containing whitespace?
gregmaya Feb 8, 2023
5822e90
replacing the area mask for a hausdorff_mask
gregmaya Feb 13, 2023
99e78c4
updating roundabout_simplification() to reflect changes in helper fun…
gregmaya Feb 13, 2023
5a3109e
formatting (1)
gregmaya Feb 13, 2023
392750d
updating ASSERTs in tests
gregmaya Feb 13, 2023
997db14
replacing hausdodorff distance with max distance to centroid
gregmaya Feb 16, 2023
88e7e0b
formatting (the wright way)
gregmaya Feb 16, 2023
f84f9d4
fixing np.linalg.norm use
gregmaya Feb 16, 2023
62eab1f
assert update on new test for low diameter_factor
gregmaya Feb 16, 2023
6cc8b8f
updating code based on previous suggestions
gregmaya Mar 16, 2023
7548af0
add esda to CI and import it within fn
martinfleis Mar 21, 2023
f1d67a2
Merge remote-tracking branch 'upstream/main' into pr/gregmaya/465
martinfleis Mar 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/envs/310-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dependencies:
- inequality
- libpysal>=4.6.0
- mapclassify
- esda
- networkx
- packaging
- pandas!=1.5.0
Expand Down
2 changes: 2 additions & 0 deletions ci/envs/311-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dependencies:
- geopandas
- inequality
- libpysal>=4.6.0
- esda
- networkx
- osmnx
- packaging
Expand All @@ -27,3 +28,4 @@ dependencies:
- git+https://github.com/networkx/networkx.git
- git+https://github.com/pygeos/pygeos.git
- git+https://github.com/pysal/mapclassify.git
- git+https://github.com/pysal/esda.git
1 change: 1 addition & 0 deletions ci/envs/39-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dependencies:
- inequality
- libpysal>=4.6.0
- mapclassify
- esda
- networkx
- osmnx
- packaging
Expand Down
110 changes: 78 additions & 32 deletions momepy/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,43 @@ def _polygonize_ifnone(edges, polys):
return polys


def _create_compactness(gdf):
from esda import shape

# measure area
gdf["area"] = gdf.area

# create index and append to input gdf
gdf["circular_compactness"] = shape.minimum_bounding_circle_ratio(gdf)

return gdf


def _count_edges_per_polygon(lines_gdf, polygons_gdf):
"""
Counts the number of linestrings that formed the polygons
"""
# Check if the polygons are valid geometries and fix them if not
polygons_gdf["geometry"] = polygons_gdf.geometry.make_valid()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will fail for older geopandas (which we still formally support).

Also, we should call make_valid only if there are actually some invalid polygons.


# query bulk to identify the lines that are fully covered by each polygon
_, pol_i = polygons_gdf.sindex.query_bulk(
lines_gdf.geometry, predicate="covered_by"
)

# tracing the actual index from the polygons_gdf
idx_pols = polygons_gdf.index.take(pol_i)

# counting the number of lines per polygon
iss, counts = np.unique(idx_pols, return_counts=True)
count_dict = dict(zip(iss, counts))

# add column to polygons_gdf
polygons_gdf["count_edges"] = polygons_gdf.index.map(count_dict)

return polygons_gdf


def _selecting_rabs_from_poly(
gdf,
area_col="area",
Expand All @@ -764,55 +801,63 @@ def _selecting_rabs_from_poly(
________
GeoDataFrames : roundabouts and adjacent polygons
"""
# calculate parameters
if area_col == "area":
gdf.loc[:, area_col] = gdf.geometry.area
circom_serie = CircularCompactness(gdf, area_col).series
# selecting roundabout polygons based on compactness
mask = circom_serie > circom_threshold
mask = gdf.circular_compactness > circom_threshold
rab = gdf[mask]
# exclude those above the area threshold

# exclude those above the area threshold and larger than 2000sqmt
area_threshold_val = gdf.area.quantile(area_threshold)
rab = rab[rab[area_col] < area_threshold_val]
if area_threshold_val > 2000:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason why 2000? Should this be controllable via a keyword?

rab = rab[rab[area_col] < area_threshold_val]

if include_adjacent is True:
# calculating a pseudo diameter for dimeter metric later
bounds = rab.geometry.bounds
rab = pd.concat([rab, bounds], axis=1)
rab["deltax"] = rab.maxx - rab.minx
rab["deltay"] = rab.maxy - rab.miny
rab["rab_diameter"] = rab[["deltax", "deltay"]].max(axis=1)

# selecting the adjacent areas that are of smaller than itself
# selecting the adjacent areas that have only three formning edges
gdf["savedgeom"] = gdf.geometry
if GPD_10:
rab_adj = gpd.sjoin(gdf, rab, predicate="intersects")
else:
rab_adj = gpd.sjoin(gdf, rab, op="intersects")

area_right = area_col + "_right"
area_left = area_col + "_left"
area_mask = rab_adj[area_right] >= rab_adj[area_left]
rab_adj = rab_adj[area_mask]
rab_adj.index.name = "index"
# remove the adjacent polygons that are selected more than once
rab_adj = rab_adj[~rab_adj.index.duplicated(keep=False)]

# adding a hausdorff_distance threshold
rab_adj["hdist"] = 0
# TODO: (should be a way to vectorize)
for i, group in rab_adj.groupby("index_right"):
for g in group.itertuples():
hdist = g.geometry.hausdorff_distance(rab.loc[i].geometry)
rab_adj.loc[g.Index, "hdist"] = hdist

rab_plus = rab_adj[rab_adj.hdist < (rab_adj.rab_diameter * diameter_factor)]
# mask based on number of forming edges (3)
mask_adjacents = (rab_adj["count_edges_left"] == 3) | (
rab_adj["circular_compactness_left"] > circom_threshold
)
rab_adj = rab_adj[mask_adjacents]

# calculating how far an adjacent polygon stretches to
# determine if it should still be simplified
max_dists = []
for _, row in rab_adj.iterrows():
dists = [
row.savedgeom_right.centroid.distance(Point(x, y))
for x, y in row.geometry.exterior.coords
]
max_dists.append(max(dists))
rab_adj["max_dist"] = max_dists
max_d_mask = (
rab_adj["max_dist"] <= rab_adj["rab_diameter"] * diameter_factor
) | (rab_adj["circular_compactness_left"] > circom_threshold)
rab_plus2 = rab_adj[max_d_mask]

else:
rab["index_right"] = rab.index
rab_plus = rab
rab_plus2 = rab

# only keeping relevant fields
geom_col = rab_plus.geometry.name
rab_plus = rab_plus[[geom_col, "index_right"]]
geom_col = rab_plus2.geometry.name
return_rabs = rab_plus2[[geom_col, "index_right"]]

return rab_plus
return return_rabs


def _rabs_center_points(gdf, center_type="centroid"):
Expand Down Expand Up @@ -1032,9 +1077,9 @@ def roundabout_simplification(
include_adjacent : boolean (default True)
Adjacent polygons to be considered also as part of the simplification.
diameter_factor : float (default 1.5)
The factor to be applied to the diameter of each roundabout that determines
how far an adjacent polygon can stretch until it is no longer considered part
of the overall roundabout group. Only applyies when include_adjacent = True.
The factor to be applied to the diameter area of each roundabouts to
exclude adjacent polygons that extend past the distance * diameter_factor
Only applyies when include_adjacent = True.
center_type : string (default 'centroid')
Method to use for converging the incoming LineStrings.
Current list of options available : 'centroid', 'mean'.
Expand All @@ -1051,8 +1096,7 @@ def roundabout_simplification(
will evaluate which of those
incoming lines should be extended according to their deflection angle.
Segments will only be considered a part of the same street if the deflection
angle
is above the threshold.
angle is above the threshold.

Returns
-------
Expand All @@ -1073,8 +1117,10 @@ def roundabout_simplification(
)

polys = _polygonize_ifnone(edges, polys)
polys_attr_1 = _create_compactness(polys)
polys_attr_2 = _count_edges_per_polygon(edges, polys_attr_1)
rab = _selecting_rabs_from_poly(
polys,
polys_attr_2,
area_col=area_col,
circom_threshold=circom_threshold,
area_threshold=area_threshold,
Expand Down
12 changes: 6 additions & 6 deletions momepy/tests/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,23 +148,23 @@ def test_roundabout_simplification_point_error(self):
@pytest.mark.skipif(not GPD_09, reason="requires geopandas 0.9+")
def test_roundabout_simplification_default(self):
check = mm.roundabout_simplification(self.df_streets_rabs)
assert len(check) == 65
assert len(check) == 67
assert len(self.df_streets_rabs) == 88 # checking that nothing has changed

@pytest.mark.skipif(not GPD_09, reason="requires geopandas 0.9+")
def test_roundabout_simplification_high_circom_threshold(self):
check = mm.roundabout_simplification(
self.df_streets_rabs, self.df_rab_polys, circom_threshold=0.97
)
assert len(check) == 77
assert len(check) == 67
assert len(self.df_streets_rabs) == 88

@pytest.mark.skipif(not GPD_09, reason="requires geopandas 0.9+")
def test_roundabout_simplification_low_area_threshold(self):
def test_roundabout_simplification_low_diameter_factor(self):
check = mm.roundabout_simplification(
self.df_streets_rabs, self.df_rab_polys, area_threshold=0.8
self.df_streets_rabs, self.df_rab_polys, diameter_factor=0.4
)
assert len(check) == 67
assert len(check) == 73
assert len(self.df_streets_rabs) == 88

@pytest.mark.skipif(not GPD_09, reason="requires geopandas 0.9+")
Expand All @@ -180,5 +180,5 @@ def test_roundabout_simplification_center_type_mean(self):
check = mm.roundabout_simplification(
self.df_streets_rabs, self.df_rab_polys, center_type="mean"
)
assert len(check) == 65
assert len(check) == 67
assert len(self.df_streets_rabs) == 88