diff --git a/momepy/__init__.py b/momepy/__init__.py index e18a4bd2..8ad653a4 100644 --- a/momepy/__init__.py +++ b/momepy/__init__.py @@ -9,6 +9,7 @@ from .elements import * from .functional._dimension import * from .functional._distribution import * +from .functional._diversity import * from .functional._elements import * from .functional._shape import * from .graph import * diff --git a/momepy/dimension.py b/momepy/dimension.py index 7c0cf021..59feddaa 100644 --- a/momepy/dimension.py +++ b/momepy/dimension.py @@ -445,7 +445,7 @@ def __init__( values_list = data.loc[neighbours] if rng: - values_list = limit_range(values_list, rng=rng) + values_list = limit_range(values_list.values, rng=rng) if "mean" in mode: means.append(np.mean(values_list)) if "median" in mode: diff --git a/momepy/diversity.py b/momepy/diversity.py index a53725b9..f8399c6e 100644 --- a/momepy/diversity.py +++ b/momepy/diversity.py @@ -209,7 +209,7 @@ def __init__(self, gdf, values, spatial_weights, unique_id, rng=None, verbose=Tr values_list = data.loc[neighbours] if rng: - values_list = limit_range(values_list, rng=rng) + values_list = limit_range(values_list.values, rng=rng) results_list.append(Theil(values_list).T) else: results_list.append(np.nan) diff --git a/momepy/functional/_diversity.py b/momepy/functional/_diversity.py new file mode 100644 index 00000000..deac7176 --- /dev/null +++ b/momepy/functional/_diversity.py @@ -0,0 +1,85 @@ +import numpy as np +from libpysal.graph import Graph +from numpy.typing import NDArray +from pandas import DataFrame, Series +from scipy import stats + +from ..utils import limit_range + +__all__ = ["describe"] + + +def describe( + y: NDArray[np.float_] | Series, + graph: Graph, + q: tuple[float, float] | None = None, + include_mode: bool = False, +) -> DataFrame: + """Describe the distribution of values within a set neighbourhood. + + Given the graph, computes the descriptive statisitcs of values within the + neighbourhood of each node. Optionally, the values can be limited to a certain + quantile range before computing the statistics. + + Notes + ----- + The index of ``values`` must match the index along which the ``graph`` is + built. + + Parameters + ---------- + y : NDArray[np.float_] | Series + An 1D array of numeric values to be described. + graph : libpysal.graph.Graph + Graph representing spatial relationships between elements. + q : tuple[float, float] | None, optional + Tuple of percentages for the percentiles to compute. Values must be between 0 + and 100 inclusive. When set, values below and above the percentiles will be + discarded before computation of the average. The percentiles are computed for + each neighborhood. By default None. + include_mode : False + Compute mode along with other statistics. Default is False. Mode is + computationally expensive and not useful for continous variables. + + Returns + ------- + DataFrame + A DataFrame with descriptive statistics. + """ + + def _describe(values, q, include_mode=False): + """Helper function to calculate average.""" + values = limit_range(values.values, q) + + results = [ + np.mean(values), + np.median(values), + np.std(values), + np.min(values), + np.max(values), + np.sum(values), + ] + if include_mode: + results.append(stats.mode(values, keepdims=False)[0]) + return results + + if not isinstance(y, Series): + y = Series(y) + + grouper = y.take(graph._adjacency.index.codes[1]).groupby( + graph._adjacency.index.codes[0] + ) + + if q is None: + stat_ = grouper.agg(["mean", "median", "std", "min", "max", "sum"]) + if include_mode: + stat_["mode"] = grouper.agg(lambda x: stats.mode(x, keepdims=False)[0]) + else: + agg = grouper.agg(_describe, q=q, include_mode=include_mode) + stat_ = DataFrame(zip(*agg, strict=True)).T + cols = ["mean", "median", "std", "min", "max", "sum"] + if include_mode: + cols.append("mode") + stat_.columns = cols + + return stat_ diff --git a/momepy/functional/tests/conftest.py b/momepy/functional/tests/conftest.py new file mode 100644 index 00000000..d8a7b62f --- /dev/null +++ b/momepy/functional/tests/conftest.py @@ -0,0 +1,11 @@ +import pandas as pd +import pytest +from pandas.testing import assert_index_equal + + +def assert_result(result, expected, geometry, **kwargs): + """Check the expected values and types of the result.""" + for key, value in expected.items(): + assert getattr(result, key)() == pytest.approx(value) + assert isinstance(result, pd.Series) + assert_index_equal(result.index, geometry.index, **kwargs) diff --git a/momepy/functional/tests/test_distribution.py b/momepy/functional/tests/test_distribution.py index 04ba73e2..162c8f46 100644 --- a/momepy/functional/tests/test_distribution.py +++ b/momepy/functional/tests/test_distribution.py @@ -4,7 +4,7 @@ import momepy as mm -from .test_shape import assert_result +from .conftest import assert_result class TestDistribution: diff --git a/momepy/functional/tests/test_diversity.py b/momepy/functional/tests/test_diversity.py new file mode 100644 index 00000000..2fe06319 --- /dev/null +++ b/momepy/functional/tests/test_diversity.py @@ -0,0 +1,117 @@ +import geopandas as gpd +import pytest +from libpysal.graph import Graph +from packaging.version import Version +from pandas.testing import assert_frame_equal + +import momepy as mm + +from .conftest import assert_result + +GPD_013 = Version(gpd.__version__) >= Version("0.13") + + +class TestDistribution: + def setup_method(self): + test_file_path = mm.datasets.get_path("bubenec") + self.df_buildings = gpd.read_file(test_file_path, layer="buildings") + self.graph = Graph.build_knn(self.df_buildings.centroid, k=3) + + def test_describe(self): + area = self.df_buildings.area + r = mm.describe(area, self.graph) + + expected_mean = { + "mean": 587.3761020554495, + "sum": 84582.15869598472, + "min": 50.44045729583316, + "max": 1187.2662413659234, + } + assert_result(r["mean"], expected_mean, self.df_buildings, exact=False) + + expected_median = { + "mean": 577.4640489818667, + "sum": 83154.8230533888, + "min": 50.43336175017242, + "max": 1225.8094201694726, + } + assert_result(r["median"], expected_median, self.df_buildings, exact=False) + + expected_std = { + "mean": 255.59307136480083, + "sum": 36805.40227653132, + "min": 0.05050450812944085, + "max": 1092.484902679786, + } + assert_result(r["std"], expected_std, self.df_buildings, exact=False) + + expected_min = { + "mean": 349.53354434499295, + "sum": 50332.830385678986, + "min": 50.39387578315866, + "max": 761.0313042971973, + } + assert_result(r["min"], expected_min, self.df_buildings, exact=False) + + expected_max = { + "mean": 835.1307128394886, + "sum": 120258.82264888636, + "min": 50.49413435416841, + "max": 2127.7522277389035, + } + assert_result(r["max"], expected_max, self.df_buildings, exact=False) + + expected_sum = { + "mean": 1762.128306166348, + "sum": 253746.47608795413, + "min": 151.32137188749948, + "max": 3561.79872409777, + } + assert_result(r["sum"], expected_sum, self.df_buildings, exact=False) + + def test_describe_quantile(self): + graph = Graph.build_knn(self.df_buildings.centroid, k=15) + area = self.df_buildings.area + r = mm.describe(area, graph, q=(25, 75)) + + expected_mean = { + "mean": 601.6960154385389, + "sum": 86644.2262231496, + "min": 250.25984637364323, + "max": 901.0028506943196, + } + assert_result(r["mean"], expected_mean, self.df_buildings, exact=False) + + @pytest.mark.skipif(not GPD_013, reason="get_coordinates() not available") + def test_describe_mode(self): + corners = mm.corners(self.df_buildings) + r = mm.describe(corners, self.graph, include_mode=True) + + expected = { + "mean": 6.152777777777778, + "sum": 886, + "min": 4, + "max": 17, + } + assert_result(r["mode"], expected, self.df_buildings, exact=False) + + @pytest.mark.skipif(not GPD_013, reason="get_coordinates() not available") + def test_describe_quantile_mode(self): + graph = Graph.build_knn(self.df_buildings.centroid, k=15) + corners = mm.corners(self.df_buildings) + r = mm.describe(corners, graph, q=(25, 75), include_mode=True) + + expected = { + "mean": 6.958333333333333, + "sum": 1002.0, + "min": 4.0, + "max": 12, + } + assert_result(r["mode"], expected, self.df_buildings, exact=False) + + def test_describe_array(self): + area = self.df_buildings.area + r = mm.describe(area, self.graph) + r2 = mm.describe(area.values, self.graph) + + assert_frame_equal(r, r2) diff --git a/momepy/functional/tests/test_shape.py b/momepy/functional/tests/test_shape.py index 78ca3003..85d1d97b 100644 --- a/momepy/functional/tests/test_shape.py +++ b/momepy/functional/tests/test_shape.py @@ -3,19 +3,13 @@ import pandas as pd import pytest from packaging.version import Version -from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal +from pandas.testing import assert_frame_equal, assert_series_equal import momepy as mm -GPD_013 = Version(gpd.__version__) >= Version("0.13") - +from .conftest import assert_result -def assert_result(result, expected, geometry, **kwargs): - """Check the expected values and types of the result.""" - for key, value in expected.items(): - assert getattr(result, key)() == pytest.approx(value) - assert isinstance(result, pd.Series) - assert_index_equal(result.index, geometry.index, **kwargs) +GPD_013 = Version(gpd.__version__) >= Version("0.13") class TestShape: diff --git a/momepy/tests/test_utils.py b/momepy/tests/test_utils.py index dd289e4b..701d2dc5 100644 --- a/momepy/tests/test_utils.py +++ b/momepy/tests/test_utils.py @@ -196,9 +196,9 @@ def test_nx_to_gdf_osmnx(self): assert len(lines) == 16 def test_limit_range(self): - assert list(mm.limit_range(range(10), rng=(25, 75))) == [2, 3, 4, 5, 6, 7] - assert list(mm.limit_range(range(10), rng=(10, 90))) == [1, 2, 3, 4, 5, 6, 7, 8] - assert list(mm.limit_range([0, 1], rng=(25, 75))) == [0, 1] + assert list(mm.limit_range(np.arange(10), rng=(25, 75))) == [2, 3, 4, 5, 6, 7] + assert list(mm.limit_range(np.arange(10), rng=(10, 90))) == [1, 2, 3, 4, 5, 6, 7, 8] + assert list(mm.limit_range(np.array([0, 1]), rng=(25, 75))) == [0, 1] assert list( mm.limit_range(np.array([0, 1, 2, 3, 4, np.nan]), rng=(25, 75)) ) == [1, 2, 3] diff --git a/momepy/utils.py b/momepy/utils.py index 89d8e3c6..bb437718 100644 --- a/momepy/utils.py +++ b/momepy/utils.py @@ -469,7 +469,6 @@ def limit_range(vals, rng): The limited array. """ - vals = np.asarray(vals) nan_tracker = np.isnan(vals) if (len(vals) > 2) and (not nan_tracker.all()): @@ -479,11 +478,9 @@ def limit_range(vals, rng): method = {"interpolation": "nearest"} rng = sorted(rng) if nan_tracker.any(): - lower = np.nanpercentile(vals, rng[0], **method) - higher = np.nanpercentile(vals, rng[1], **method) + lower, higher = np.nanpercentile(vals, rng, **method) else: - lower = np.percentile(vals, rng[0], **method) - higher = np.percentile(vals, rng[1], **method) + lower, higher = np.percentile(vals, rng, **method) vals = vals[(lower <= vals) & (vals <= higher)] return vals