From 96953db8a3ce5ed9e522e2120adf7d54b2911e76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mart=C3=AD=20Bosch?= <marti.bosch@protonmail.com>
Date: Mon, 11 Sep 2023 12:34:53 +0200
Subject: [PATCH] feat: 3.0.0rc0 (#40)

* fix: proper geometry column in zonal analysis

* fix: convert zone_index list to series in zonal analysis

* fix: missing CRS in zonal statistics gdf

* feat: spatiotemporalzonal statistics gdf

* ci: test running pylandstats-notebooks
---
 .github/workflows/tests.yml   | 10 ++++++
 pylandstats/spatiotemporal.py | 58 +++++++++++++++++++++++++++++++
 pylandstats/zonal.py          | 16 +++++++--
 tests/test_pylandstats.py     | 65 +++++++++++++++++++++++++++--------
 4 files changed, 133 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 3eeaddb..9ac8d87 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -43,5 +43,15 @@ jobs:
       - name: upload coverage reports to Codecov
         uses: codecov/codecov-action@v3
 
+      - name: Test notebooks
+        run: |
+          git clone https://github.com/martibosch/pylandstats-notebooks
+          cd pylandstats-notebooks
+          mamba env update -f environment.yml
+          snakemake -c1 register_ipykernel
+          snakemake -c1 lulc_tifs elev_zones
+          snakemake -c1 run_notebooks
+          cd ..
+
       - name: list files
         run: ls -l .
diff --git a/pylandstats/spatiotemporal.py b/pylandstats/spatiotemporal.py
index 820bf01..a287f12 100644
--- a/pylandstats/spatiotemporal.py
+++ b/pylandstats/spatiotemporal.py
@@ -1,6 +1,7 @@
 """Spatio-temporal analysis."""
 import functools
 
+import geopandas as gpd
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
@@ -325,6 +326,63 @@ def compute_landscape_metrics_df(  # noqa: D102
         )
     )
 
+    def compute_zonal_statistics_gdf(
+        self, metrics, *, class_val=None, metrics_kws=None
+    ):
+        """Compute the zonal statistics geo-data frame over the landscape raster.
+
+        Parameters
+        ----------
+        metrics : list-like, optional
+            A list-like of strings with the names of the metrics that should be
+            computed. If `None`, all the implemented class-level metrics will be
+            computed.
+        class_val : int, optional
+            If provided, the zonal statistics will be computed at the level of the
+            corresponding class, otherwise they will be computed at the landscape level.
+        metrics_kws : dict, optional
+            Dictionary mapping the keyword arguments (values) that should be passed to
+            each metric method (key), e.g., to exclude the boundary from the computation
+            of `total_edge`, metric_kws should map the string 'total_edge' (method name)
+            to {'count_boundary': False}. If `None`, each metric will be computed
+            according to FRAGSTATS defaults.
+
+        Returns
+        -------
+        zonal_statistics_gdf : geopandas.GeoDataFrame
+            Geo-data frame with the computed zonal statistics.
+        """
+
+        # TODO: DRY with `ZonalAnalysis.compute_zonal_statistics_gdf`?
+        def _compute_zonal_metrics_df(sta):
+            if class_val is None:
+                zonal_metrics_df = sta.compute_landscape_metrics_df(
+                    metrics=metrics, metrics_kws=metrics_kws
+                )
+            else:
+                zonal_metrics_df = sta.compute_class_metrics_df(
+                    metrics=metrics, classes=[class_val], metrics_kws=metrics_kws
+                )
+            return zonal_metrics_df
+
+        zonal_metrics_df = pd.concat(
+            [
+                _compute_zonal_metrics_df(sta).assign(**{self.attribute_name: zone})
+                for zone, sta in zip(self.zone_gser.index, self.stas)
+            ]
+        )
+
+        return gpd.GeoDataFrame(
+            # first set zone as outermost index
+            zonal_metrics_df.reset_index().set_index(
+                [self.attribute_name] + zonal_metrics_df.index.names
+            ),
+            geometry=zonal_metrics_df.reset_index()[self.attribute_name]
+            .map(self.zone_gser)
+            .values,
+            crs=self.zone_gser.crs,
+        )
+
     def plot_metric(  # noqa: D102
         self,
         metric,
diff --git a/pylandstats/zonal.py b/pylandstats/zonal.py
index 99143e5..f6e1702 100644
--- a/pylandstats/zonal.py
+++ b/pylandstats/zonal.py
@@ -107,7 +107,13 @@ def __init__(
                 if zone_index is not None:
                     # we get the index after calling `set_index` because this will give
                     # us the right index both when `zone_index` is a column name or a
-                    # list-like
+                    # list-like.
+                    # note that if zone_index is a list, pandas will try to interpret
+                    # its values as column names, so we need to convert it to a numpy
+                    # array/pandas series first so that the values are set as index.
+                    # we will convert it to a pandas series so that we can set a name.
+                    if isinstance(zone_index, list):
+                        zone_index = pd.Series(zone_index, name="zone")
                     zone_index = zones.set_index(zone_index).index
                     # we now take just the "geometry" column and treat `zones` as
                     # GeoSeries.
@@ -220,7 +226,13 @@ def compute_zonal_statistics_gdf(
         # ensure that we have numeric types (not strings)
         # metric_ser = pd.to_numeric(metric_ser)
 
-        return gpd.GeoDataFrame(zonal_metrics_df, geometry=self.zone_gser)
+        return gpd.GeoDataFrame(
+            zonal_metrics_df,
+            geometry=zonal_metrics_df.reset_index()[self.attribute_name]
+            .map(self.zone_gser)
+            .values,
+            crs=self.zone_gser.crs,
+        )
 
 
 class BufferAnalysis(ZonalAnalysis):
diff --git a/tests/test_pylandstats.py b/tests/test_pylandstats.py
index a3140a9..0ee65d1 100644
--- a/tests/test_pylandstats.py
+++ b/tests/test_pylandstats.py
@@ -927,20 +927,27 @@ def test_compute_zonal_statistics_gdf(self):
 
         # test that the gdf has the proper shape (number of zones, number of metrics +
         # geometry column)
-        metrics = ["patch_density"]
-        zs_gdf = za.compute_zonal_statistics_gdf(metrics)
-        self.assertEqual(zs_gdf.shape, (len(self.zone_gdf), len(metrics) + 1))
-
-        # test that the zonal statistics when excluding boundaries should be less or
-        # equal than including them
-        metric = "total_edge"
-        metric_kws = {"count_boundary": True}
-        self.assertLessEqual(
-            za.compute_zonal_statistics_gdf([metric])[metric].sum(),
-            za.compute_zonal_statistics_gdf([metric], metrics_kws={metric: metric_kws})[
-                metric
-            ].sum(),
-        )
+        for class_val in [None, za.present_classes[0]]:
+            metrics = ["patch_density"]
+            zs_gdf = za.compute_zonal_statistics_gdf(metrics, class_val=class_val)
+            self.assertEqual(zs_gdf.shape, (len(self.zone_gdf), len(metrics) + 1))
+            # test that the crs is set correctly
+            self.assertEqual(zs_gdf.crs, self.zone_gdf.crs)
+            # test that the geometry column is not None
+            self.assertFalse(zs_gdf.geometry.isna().any())
+
+            # test that the zonal statistics when excluding boundaries should be less or
+            # equal than including them
+            metric = "total_edge"
+            metric_kws = {"count_boundary": True}
+            self.assertLessEqual(
+                za.compute_zonal_statistics_gdf([metric], class_val=class_val)[
+                    metric
+                ].sum(),
+                za.compute_zonal_statistics_gdf(
+                    [metric], class_val=class_val, metrics_kws={metric: metric_kws}
+                )[metric].sum(),
+            )
 
     def test_buffer_init(self):
         naive_gser = gpd.GeoSeries([self.geom])
@@ -1223,6 +1230,36 @@ def test_dataframes(self):
                 )
             )
 
+    def test_compute_zonal_statistics_gdf(self):
+        for _class, init_args, init_kws, _ in self.init_combinations:
+            stza = _class(self.landscape_fps, *init_args, dates=self.dates, **init_kws)
+            # test that the gdf has the proper shape (number of zones, number of metrics
+            # + geometry column)
+            for class_val in [None, stza.present_classes[0]]:
+                metrics = ["patch_density"]
+                zs_gdf = stza.compute_zonal_statistics_gdf(metrics, class_val=class_val)
+                self.assertEqual(
+                    zs_gdf.shape,
+                    (len(stza.zone_gser) * len(self.dates), len(metrics) + 1),
+                )
+                # test that the crs is set correctly
+                self.assertEqual(zs_gdf.crs, self.zone_gser.crs)
+                # test that the geometry column is not None
+                self.assertFalse(zs_gdf.geometry.isna().any())
+
+                # test that the zonal statistics when excluding boundaries should be
+                # less or equal than including them
+                metric = "total_edge"
+                metric_kws = {"count_boundary": True}
+                self.assertLessEqual(
+                    stza.compute_zonal_statistics_gdf([metric], class_val=class_val)[
+                        metric
+                    ].sum(),
+                    stza.compute_zonal_statistics_gdf(
+                        [metric], class_val=class_val, metrics_kws={metric: metric_kws}
+                    )[metric].sum(),
+                )
+
     def test_plot_metric(self):
         for _class, init_args, init_kws, _ in self.init_combinations:
             stza = _class(self.landscape_fps, *init_args, dates=self.dates, **init_kws)