pysal · martinfleis · Jun 12, 2024 · Jun 7, 2024 · Jun 10, 2024 · Jun 10, 2024
diff --git a/momepy/functional/_dimension.py b/momepy/functional/_dimension.py
@@ -1,18 +1,30 @@
+import math
+
 import numpy as np
+import pandas as pd
 import shapely
 from geopandas import GeoDataFrame, GeoSeries
 from libpysal.graph import Graph
 from numpy.typing import NDArray
-from pandas import Series
+from pandas import DataFrame, Series
 
 __all__ = [
     "volume",
     "floor_area",
     "courtyard_area",
     "longest_axis_length",
     "perimeter_wall",
+    "street_profile",
 ]
 
+try:
+    from numba import njit
+
+    HAS_NUMBA = True
+except (ModuleNotFoundError, ImportError):
+    HAS_NUMBA = False
+    from libpysal.common import jit as njit
+
 
 def volume(
     area: NDArray[np.float_] | Series,
@@ -157,3 +169,162 @@ def perimeter_wall(
     ].values
 
     return results
+
+
+def street_profile(
+    left: GeoDataFrame,
+    right: GeoDataFrame,
+    distance: int = 10,
+    tick_length: int = 50,
+    heights: None | Series = None,
+) -> DataFrame:
+    """
+    Calculates the street profile characters.
+    This functions returns a DataFrame with widths (w), standard deviation of width(wd),
+    openness (o), heights (h), standard deviation of height (hd) and
+    ratio height/width (p). The algorithm generates perpendicular lines to the ``right``
+    dataframe features  every ``distance`` and measures values on intersections with
+    features of ``left``. If no feature is reached within ``tick_length`` its value
+    is set as width (being a theoretical maximum).
+
+    Derived from :cite:`araldi2019`.
+
+    Parameters
+    ----------
+    left : GeoDataFrame
+        A GeoDataFrame containing streets to analyse.
+    right : GeoDataFrame
+        A GeoDataFrame containing buildings along the streets.
+        Only Polygon geometries are currently supported.
+    distance : int (default 10)
+        The distance between perpendicular ticks.
+    tick_length : int (default 50)
+        The length of ticks.
+    heights: pd.Series (default None)
+        The ``pd.Series`` where building height are stored. If set to ``None``,
+        height and ratio height/width will not be calculated.
+
+    Returns
+    -------
+    DataFrame
+
+    Examples
+    --------
+    >>> street_prof = momepy.street_profile(streets_df,
+    ...                 buildings_df, heights=buildings_df['height'])
+    100%|██████████| 33/33 [00:02<00:00, 15.66it/s]
+    >>> streets_df['width'] = street_prof.w
+    >>> streets_df['deviations'] = street_prof.wd
+    """
+
+    ## generate points for every street at `distance` intervals
+    segments = left.segmentize(distance)
+    coords, coord_indxs = shapely.get_coordinates(segments, return_index=True)
+    starts = ~pd.Series(coord_indxs).duplicated(keep="first")
+    ends = ~pd.Series(coord_indxs).duplicated(keep="last")
+    end_markers = starts | ends
+
+    ## generate tick streings
+    njit_ticks = generate_ticks(coords, end_markers.values, tick_length)
+    ticks = shapely.linestrings(njit_ticks.reshape(-1, 2, 2))
+
+    ## find the length of intersection of the nearest building for every tick
+    inp, res = right.geometry.sindex.query(ticks, predicate="intersects")
+    intersections = shapely.intersection(ticks[inp], right.geometry.array[res])
+    distances = shapely.distance(intersections, shapely.points(coords[inp // 2]))
+    min_distances = pd.Series(distances).groupby(inp).min()
+    dists = np.full((len(ticks),), np.nan)
+    dists[min_distances.index.values] = min_distances.values
+
+    ## generate tick values and groupby street
+    tick_coords = np.repeat(coord_indxs, 2)
+    street_res = (
+        pd.Series(dists)
+        .groupby(tick_coords)
+        .apply(generate_street_tick_values, tick_length)
+    )
+    njit_result = np.concatenate(street_res).reshape((-1, 3))
+    njit_result[np.isnan(njit_result[:, 0]), 0] = tick_length
+
+    final_result = pd.DataFrame(np.nan, columns=["w", "o", "wd"], index=left.index)
+    final_result.loc[street_res.index] = njit_result
+
+    ## if heights are available add heights stats to the result
+    if heights is not None:
+        min_heights = heights[res].groupby(inp).min()
+        tick_heights = np.full((len(ticks),), np.nan)
+        tick_heights[min_heights.index.values] = min_heights.values
+        heights_res = pd.Series(tick_heights).groupby(tick_coords).agg(["mean", "std"])
+        final_result["h"] = heights_res["mean"]
+        final_result["hd"] = heights_res["std"]
+        final_result["p"] = final_result["h"] / final_result["w"].replace(0, np.nan)
+
+    return final_result
+
+
+# angle between two points
+@njit
+def _get_angle_njit(x1, y1, x2, y2):
+    """
+    pt1, pt2 : tuple
+    """
+    x_diff = x2 - x1
+    y_diff = y2 - y1
+    return math.degrees(math.atan2(y_diff, x_diff))
+
+
+# get the second end point of a tick
+# p1 = bearing + 90
+@njit
+def _get_point_njit(x1, y1, bearing, dist):
+    bearing = math.radians(bearing)
+    x = x1 + dist * math.cos(bearing)
+    y = y1 + dist * math.sin(bearing)
+    return np.array((x, y))
+
+
+@njit
+def generate_ticks(list_points, end_markers, tick_length):
+    ticks = np.empty((len(list_points) * 2, 4), dtype=np.float64)
+
+    for i in range(len(list_points)):
+        tick_pos = i * 2
+        end = end_markers[i]
+        pt = list_points[i]
+
+        if end:
+            ticks[tick_pos, :] = np.array([pt[0], pt[1], pt[0], pt[1]])
+            ticks[tick_pos + 1, :] = np.array([pt[0], pt[1], pt[0], pt[1]])
+        else:
+            next_pt = list_points[i + 1]
+            njit_angle1 = _get_angle_njit(pt[0], pt[1], next_pt[0], next_pt[1])
+            njit_end_1 = _get_point_njit(
+                pt[0], pt[1], njit_angle1 + 90, tick_length / 2
+            )
+            njit_angle2 = _get_angle_njit(njit_end_1[0], njit_end_1[1], pt[0], pt[1])
+            njit_end_2 = _get_point_njit(
+                njit_end_1[0], njit_end_1[1], njit_angle2, tick_length
+            )
+            ticks[tick_pos, :] = np.array([njit_end_1[0], njit_end_1[1], pt[0], pt[1]])
+            ticks[tick_pos + 1, :] = np.array(
+                [njit_end_2[0], njit_end_2[1], pt[0], pt[1]]
+            )
+
+    return ticks
+
+
+def generate_street_tick_values(group, tick_length):
+    s = group.values
+
+    lefts = s[::2]
+    rights = s[1::2]
+    left_mean = np.nanmean(lefts) if ~np.isnan(lefts).all() else tick_length / 2
+    right_mean = np.nanmean(rights) if ~np.isnan(rights).all() else tick_length / 2
+    width = np.mean([left_mean, right_mean]) * 2
+    f_sum = s.shape[0]
+    s_nan = np.isnan(s)
+
+    openness_score = np.nan if not f_sum else s_nan.sum() / f_sum  # how few buildings
+
+    deviation_score = np.nan if s_nan.all() else np.nanstd(s)  # deviation of buildings
+    return [width, openness_score, deviation_score]
diff --git a/momepy/functional/tests/test_dimension.py b/momepy/functional/tests/test_dimension.py
@@ -4,10 +4,13 @@
 import pytest
 from libpysal.graph import Graph
 from packaging.version import Version
+from pandas.testing import assert_series_equal
 from shapely import Polygon
 
 import momepy as mm
 
+from .conftest import assert_result
+
 GPD_013 = Version(gpd.__version__) >= Version("0.13")
 
 
@@ -84,3 +87,98 @@ def test_perimeter_wall(self):
 
         pd.testing.assert_series_equal(result, result_given_graph)
         assert result[0] == pytest.approx(137.210, rel=1e-3)
+
+    @pytest.mark.skipif(not GPD_013, reason="no attribute 'segmentize'")
+    def test_street_profile(self):
+        sp = mm.street_profile(
+            self.df_streets,
+            self.df_buildings,
+            tick_length=50,
+            distance=1,
+            heights=self.df_buildings["height"],
+        )
+
+        expected_w = {
+            "count": 35,
+            "mean": 43.39405649921903,
+            "min": 31.017731525484447,
+            "max": 50.0,
+        }
+        expected_wd = {
+            "count": 22,
+            "mean": 1.1977356963898373,
+            "min": 0.09706119360586668,
+            "max": 5.154163996499861,
+        }
+        expected_o = {
+            "count": 35,
+            "mean": 0.7186966927475066,
+            "min": 0.15270935960591134,
+            "max": 1.0,
+        }
+        expected_h = {
+            "count": 22,
+            "mean": 16.72499857958264,
+            "min": 10.0,
+            "max": 28.1969381969382,
+        }
+        expected_hd = {
+            "count": 22,
+            "mean": 1.2372251098113227,
+            "min": 0.0,
+            "max": 7.947097088834963,
+        }
+        expected_p = {
+            "count": 22,
+            "mean": 0.43257459410448046,
+            "min": 0.20379941361273096,
+            "max": 0.7432052069071473,
+        }
+
+        assert_result(sp["w"], expected_w, self.df_streets)
+        assert_result(sp["wd"], expected_wd, self.df_streets)
+        assert_result(sp["o"], expected_o, self.df_streets)
+        assert_result(sp["h"], expected_h, self.df_streets)
+        assert_result(sp["hd"], expected_hd, self.df_streets)
+        assert_result(sp["p"], expected_p, self.df_streets)
+
+
+class TestDimensionEquivalence:
+    def setup_method(self):
+        test_file_path = mm.datasets.get_path("bubenec")
+        self.df_buildings = gpd.read_file(test_file_path, layer="buildings")
+        self.df_streets = gpd.read_file(test_file_path, layer="streets")
+        self.df_buildings["height"] = np.linspace(10.0, 30.0, 144)
+
+    @pytest.mark.skipif(not GPD_013, reason="no attribute 'segmentize'")
+    def test_street_profile(self):
+        sp_new = mm.street_profile(
+            self.df_streets,
+            self.df_buildings,
+            tick_length=50,
+            distance=1,
+            heights=self.df_buildings["height"],
+        )
+        sp_old = mm.StreetProfile(
+            self.df_streets,
+            self.df_buildings,
+            tick_length=50,
+            distance=1,
+            heights=self.df_buildings["height"],
+        )
+
+        ## needs tolerance because the generate ticks are different
+        assert_series_equal(sp_new["w"], sp_old.w, rtol=1e-2, check_names=False)
+        assert_series_equal(
+            sp_new["wd"].replace(np.nan, 0), sp_old.wd, rtol=1, check_names=False
+        )
+        assert_series_equal(sp_new["o"], sp_old.o, rtol=1e-1, check_names=False)
+        assert_series_equal(
+            sp_new["h"].replace(np.nan, 0), sp_old.h, check_names=False, rtol=1e-1
+        )
+        assert_series_equal(
+            sp_new["hd"].replace(np.nan, 0), sp_old.hd, check_names=False, rtol=1e-1
+        )
+        assert_series_equal(
+            sp_new["p"].replace(np.nan, 0), sp_old.p, check_names=False, rtol=1
+        )