Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: describe as a replacement of AverageCharacter #570

Merged
merged 11 commits into from
Apr 24, 2024
84 changes: 82 additions & 2 deletions momepy/functional/_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
import shapely
from geopandas import GeoDataFrame, GeoSeries
from libpysal.graph import Graph
from numpy.typing import NDArray
from packaging.version import Version
from pandas import Series
from scipy import sparse
from pandas import DataFrame, Series
from scipy import sparse, stats

from ..utils import limit_range

__all__ = [
"orientation",
Expand All @@ -16,6 +19,7 @@
"building_adjacency",
"neighbors",
"street_alignment",
"describe",
]

GPD_GE_013 = Version(gpd.__version__) >= Version("0.13.0")
Expand Down Expand Up @@ -339,3 +343,79 @@
Series
"""
return (building_orientation - street_orientation.loc[street_index].values).abs()


def describe(
values: NDArray[np.float_] | Series,
jGaboardi marked this conversation as resolved.
Show resolved Hide resolved
graph: Graph,
q: tuple[float, float] | None = None,
include_mode: bool = False,
) -> DataFrame:
"""Describe the distribution of values within a set neighbourhood.

Given the graph, computes the descriptive statisitcs of values within the
neighbourhood of each node. Optionally, the values can be limited to a certain
quantile range before computing the statistics.

Notes
-----
The index of ``values`` must match the index along which the ``graph`` is
built.

Parameters
----------
values : NDArray[np.float_] | Series
array of values
martinfleis marked this conversation as resolved.
Show resolved Hide resolved
graph : libpysal.graph.Graph
Graph representing spatial relationships between elements.
q : tuple[float, float] | None, optional
tuple of percentages for the percentiles to compute. Values must be between 0
martinfleis marked this conversation as resolved.
Show resolved Hide resolved
and 100 inclusive. When set, values below and above the percentiles will be
discarded before computation of the average. The percentiles are computed for
each neighborhood. By default None
martinfleis marked this conversation as resolved.
Show resolved Hide resolved
include_mode : False
Compute mode along with other statistics. Default is False. Mode is
computationally expensive and not useful for continous variables.

Returns
-------
DataFrame
A DataFrame with descriptive statistics
martinfleis marked this conversation as resolved.
Show resolved Hide resolved
"""

def _describe(values, q, include_mode=False):

Check warning on line 386 in momepy/functional/_distribution.py

View check run for this annotation

Codecov / codecov/patch

momepy/functional/_distribution.py#L386

Added line #L386 was not covered by tests
"""Helper function to calculate average."""
values = limit_range(values, q)

Check warning on line 388 in momepy/functional/_distribution.py

View check run for this annotation

Codecov / codecov/patch

momepy/functional/_distribution.py#L388

Added line #L388 was not covered by tests

results = [

Check warning on line 390 in momepy/functional/_distribution.py

View check run for this annotation

Codecov / codecov/patch

momepy/functional/_distribution.py#L390

Added line #L390 was not covered by tests
np.mean(values),
np.median(values),
np.std(values),
np.min(values),
np.max(values),
np.sum(values),
]
if include_mode:
results.append(stats.mode(values, keepdims=False)[0])
return results

Check warning on line 400 in momepy/functional/_distribution.py

View check run for this annotation

Codecov / codecov/patch

momepy/functional/_distribution.py#L398-L400

Added lines #L398 - L400 were not covered by tests

if not isinstance(values, Series):
values = Series(values)

Check warning on line 403 in momepy/functional/_distribution.py

View check run for this annotation

Codecov / codecov/patch

momepy/functional/_distribution.py#L402-L403

Added lines #L402 - L403 were not covered by tests

grouper = values.take(graph._adjacency.index.codes[1]).groupby(

Check warning on line 405 in momepy/functional/_distribution.py

View check run for this annotation

Codecov / codecov/patch

momepy/functional/_distribution.py#L405

Added line #L405 was not covered by tests
graph._adjacency.index.codes[0]
)

if q is None:
stat_ = grouper.agg(["mean", "median", "std", "min", "max", "sum"])
jGaboardi marked this conversation as resolved.
Show resolved Hide resolved
if include_mode:
stat_["mode"] = grouper.agg(lambda x: stats.mode(x, keepdims=False)[0])

Check warning on line 412 in momepy/functional/_distribution.py

View check run for this annotation

Codecov / codecov/patch

momepy/functional/_distribution.py#L409-L412

Added lines #L409 - L412 were not covered by tests
else:
agg = graph.apply(values, _describe, q=q, include_mode=include_mode)
stat_ = DataFrame(zip(*agg, strict=True)).T
cols = ["mean", "median", "std", "min", "max", "sum"]
if include_mode:
cols.append("mode")
stat_.columns = cols

Check warning on line 419 in momepy/functional/_distribution.py

View check run for this annotation

Codecov / codecov/patch

momepy/functional/_distribution.py#L414-L419

Added lines #L414 - L419 were not covered by tests

return stat_

Check warning on line 421 in momepy/functional/_distribution.py

View check run for this annotation

Codecov / codecov/patch

momepy/functional/_distribution.py#L421

Added line #L421 was not covered by tests
6 changes: 2 additions & 4 deletions momepy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,11 +479,9 @@ def limit_range(vals, rng):
method = {"interpolation": "nearest"}
rng = sorted(rng)
if nan_tracker.any():
lower = np.nanpercentile(vals, rng[0], **method)
higher = np.nanpercentile(vals, rng[1], **method)
lower, higher = np.nanpercentile(vals, rng, **method)
else:
lower = np.percentile(vals, rng[0], **method)
higher = np.percentile(vals, rng[1], **method)
lower, higher = np.percentile(vals, rng, **method)
vals = vals[(lower <= vals) & (vals <= higher)]

return vals
Expand Down