pysal · jGaboardi · Apr 25, 2024 · Apr 24, 2024 · Apr 24, 2024 · Apr 24, 2024
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -31,11 +31,13 @@ jobs:
           - ci/envs/312-dev.yaml
         include:
           - environment-file: ci/envs/312-latest.yaml
-            os: macos-latest
+            os: macos-13 # Intel
           - environment-file: ci/envs/312-latest.yaml
             os: macos-14 # Apple Silicon
           - environment-file: ci/envs/312-latest.yaml
             os: windows-latest
+          - environment-file: ci/envs/312-min.yaml
+            os: ubuntu-latest
     defaults:
       run:
         shell: bash -l {0}

diff --git a/ci/envs/310-latest.yaml b/ci/envs/310-latest.yaml
@@ -13,6 +13,7 @@ dependencies:
   - shapely>=2
   - esda
   - tqdm
+  - numba
   # testing
   - codecov
   - pytest

diff --git a/ci/envs/310-oldest.yaml b/ci/envs/310-oldest.yaml
@@ -13,6 +13,7 @@ dependencies:
   - pandas>=1.4.0,!=1.5.0,<2
   - shapely=2.0
   - tqdm=4.63.0
+  - numba
   # testing
   - codecov
   - pytest

diff --git a/ci/envs/311-latest.yaml b/ci/envs/311-latest.yaml
@@ -13,6 +13,7 @@ dependencies:
   - shapely>=2
   - esda
   - tqdm
+  - numba
   # testing
   - codecov
   - pytest

diff --git a/ci/envs/312-dev.yaml b/ci/envs/312-dev.yaml
@@ -13,6 +13,7 @@ dependencies:
   - pandas!=1.5.0
   - shapely>=2
   - tqdm
+  - numba
   # testing
   - codecov
   - pytest

diff --git a/ci/envs/312-latest.yaml b/ci/envs/312-latest.yaml
@@ -14,6 +14,7 @@ dependencies:
   - shapely>=2
   - esda
   - tqdm
+  - numba
   # testing
   - codecov
   - pytest

diff --git a/ci/envs/312-min.yaml b/ci/envs/312-min.yaml
@@ -0,0 +1,32 @@
+name: test
+channels:
+  - conda-forge
+dependencies:
+  - python=3.12
+  - geopandas
+  - inequality
+  - libpysal>=4.10.0
+  - mapclassify
+  - networkx
+  - osmnx
+  - packaging
+  - pandas!=1.5.0
+  - shapely>=2
+  - esda
+  - tqdm
+  # testing
+  - codecov
+  - pytest
+  - pytest-cov
+  # user guide testing
+  - dask
+  - inequality
+  - jupyter
+  - matplotlib
+  - osmnx
+  - clustergram
+  - bokeh
+  - geopy
+  - ipywidgets
+  - Iprogress
+
diff --git a/momepy/functional/_diversity.py b/momepy/functional/_diversity.py
@@ -1,10 +1,9 @@
+import warnings
+
 import numpy as np
 from libpysal.graph import Graph
 from numpy.typing import NDArray
 from pandas import DataFrame, Series
-from scipy import stats
-
-from ..utils import limit_range
 
 __all__ = ["describe"]
 
@@ -26,6 +25,9 @@ def describe(
     The index of ``values`` must match the index along which the ``graph`` is
     built.
 
+    The numba package is used extensively in this function to accelerate the computation
+    of statistics. Without numba, these computations may become slow on large data.
+
     Parameters
     ----------
     y : NDArray[np.float_] | Series
@@ -46,10 +48,42 @@ def describe(
     DataFrame
         A DataFrame with descriptive statistics.
     """
+    try:
+        from numba import njit
+    except (ModuleNotFoundError, ImportError):
+        warnings.warn(
+            "The numba package is used extensively in this function to accelerate the"
+            " computation of statistics but it is not installed or  cannot be imported."
+            " Without numba, these computations may become slow on large data.",
+            UserWarning,
+            stacklevel=2,
+        )
+        from libpysal.common import jit as njit
 
+    @njit
+    def _mode(array):
+        """Custom mode function for numba."""
+        array = np.sort(array.ravel())
+        mask = np.empty(array.shape, dtype=np.bool_)
+        mask[:1] = True
+        mask[1:] = array[1:] != array[:-1]
+        unique = array[mask]
+        idx = np.nonzero(mask)[0]
+        idx = np.append(idx, mask.size)
+        counts = np.diff(idx)
+        return unique[np.argmax(counts)]
+
+    @njit
     def _describe(values, q, include_mode=False):
         """Helper function to calculate average."""
-        values = limit_range(values.values, q)
+        nan_tracker = np.isnan(values)
+
+        if (len(values) > 2) and (not nan_tracker.all()):
+            if nan_tracker.any():
+                lower, higher = np.nanpercentile(values, q)
+            else:
+                lower, higher = np.percentile(values, q)
+            values = values[(lower <= values) & (values <= higher)]
 
         results = [
             np.mean(values),
@@ -60,7 +94,7 @@ def _describe(values, q, include_mode=False):
             np.sum(values),
         ]
         if include_mode:
-            results.append(stats.mode(values, keepdims=False)[0])
+            results.append(_mode(values))
         return results
 
     if not isinstance(y, Series):
@@ -73,9 +107,9 @@ def _describe(values, q, include_mode=False):
     if q is None:
         stat_ = grouper.agg(["mean", "median", "std", "min", "max", "sum"])
         if include_mode:
-            stat_["mode"] = grouper.agg(lambda x: stats.mode(x, keepdims=False)[0])
+            stat_["mode"] = grouper.agg(lambda x: _mode(x.values))
     else:
-        agg = grouper.agg(_describe, q=q, include_mode=include_mode)
+        agg = grouper.agg(lambda x: _describe(x.values, q=q, include_mode=include_mode))
         stat_ = DataFrame(zip(*agg, strict=True)).T
         cols = ["mean", "median", "std", "min", "max", "sum"]
         if include_mode: