From 96953db8a3ce5ed9e522e2120adf7d54b2911e76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=AD=20Bosch?= Date: Mon, 11 Sep 2023 12:34:53 +0200 Subject: [PATCH] feat: 3.0.0rc0 (#40) * fix: proper geometry column in zonal analysis * fix: convert zone_index list to series in zonal analysis * fix: missing CRS in zonal statistics gdf * feat: spatiotemporalzonal statistics gdf * ci: test running pylandstats-notebooks --- .github/workflows/tests.yml | 10 ++++++ pylandstats/spatiotemporal.py | 58 +++++++++++++++++++++++++++++++ pylandstats/zonal.py | 16 +++++++-- tests/test_pylandstats.py | 65 +++++++++++++++++++++++++++-------- 4 files changed, 133 insertions(+), 16 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3eeaddb..9ac8d87 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -43,5 +43,15 @@ jobs: - name: upload coverage reports to Codecov uses: codecov/codecov-action@v3 + - name: Test notebooks + run: | + git clone https://github.com/martibosch/pylandstats-notebooks + cd pylandstats-notebooks + mamba env update -f environment.yml + snakemake -c1 register_ipykernel + snakemake -c1 lulc_tifs elev_zones + snakemake -c1 run_notebooks + cd .. + - name: list files run: ls -l . diff --git a/pylandstats/spatiotemporal.py b/pylandstats/spatiotemporal.py index 820bf01..a287f12 100644 --- a/pylandstats/spatiotemporal.py +++ b/pylandstats/spatiotemporal.py @@ -1,6 +1,7 @@ """Spatio-temporal analysis.""" import functools +import geopandas as gpd import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -325,6 +326,63 @@ def compute_landscape_metrics_df( # noqa: D102 ) ) + def compute_zonal_statistics_gdf( + self, metrics, *, class_val=None, metrics_kws=None + ): + """Compute the zonal statistics geo-data frame over the landscape raster. + + Parameters + ---------- + metrics : list-like, optional + A list-like of strings with the names of the metrics that should be + computed. If `None`, all the implemented class-level metrics will be + computed. + class_val : int, optional + If provided, the zonal statistics will be computed at the level of the + corresponding class, otherwise they will be computed at the landscape level. + metrics_kws : dict, optional + Dictionary mapping the keyword arguments (values) that should be passed to + each metric method (key), e.g., to exclude the boundary from the computation + of `total_edge`, metric_kws should map the string 'total_edge' (method name) + to {'count_boundary': False}. If `None`, each metric will be computed + according to FRAGSTATS defaults. + + Returns + ------- + zonal_statistics_gdf : geopandas.GeoDataFrame + Geo-data frame with the computed zonal statistics. + """ + + # TODO: DRY with `ZonalAnalysis.compute_zonal_statistics_gdf`? + def _compute_zonal_metrics_df(sta): + if class_val is None: + zonal_metrics_df = sta.compute_landscape_metrics_df( + metrics=metrics, metrics_kws=metrics_kws + ) + else: + zonal_metrics_df = sta.compute_class_metrics_df( + metrics=metrics, classes=[class_val], metrics_kws=metrics_kws + ) + return zonal_metrics_df + + zonal_metrics_df = pd.concat( + [ + _compute_zonal_metrics_df(sta).assign(**{self.attribute_name: zone}) + for zone, sta in zip(self.zone_gser.index, self.stas) + ] + ) + + return gpd.GeoDataFrame( + # first set zone as outermost index + zonal_metrics_df.reset_index().set_index( + [self.attribute_name] + zonal_metrics_df.index.names + ), + geometry=zonal_metrics_df.reset_index()[self.attribute_name] + .map(self.zone_gser) + .values, + crs=self.zone_gser.crs, + ) + def plot_metric( # noqa: D102 self, metric, diff --git a/pylandstats/zonal.py b/pylandstats/zonal.py index 99143e5..f6e1702 100644 --- a/pylandstats/zonal.py +++ b/pylandstats/zonal.py @@ -107,7 +107,13 @@ def __init__( if zone_index is not None: # we get the index after calling `set_index` because this will give # us the right index both when `zone_index` is a column name or a - # list-like + # list-like. + # note that if zone_index is a list, pandas will try to interpret + # its values as column names, so we need to convert it to a numpy + # array/pandas series first so that the values are set as index. + # we will convert it to a pandas series so that we can set a name. + if isinstance(zone_index, list): + zone_index = pd.Series(zone_index, name="zone") zone_index = zones.set_index(zone_index).index # we now take just the "geometry" column and treat `zones` as # GeoSeries. @@ -220,7 +226,13 @@ def compute_zonal_statistics_gdf( # ensure that we have numeric types (not strings) # metric_ser = pd.to_numeric(metric_ser) - return gpd.GeoDataFrame(zonal_metrics_df, geometry=self.zone_gser) + return gpd.GeoDataFrame( + zonal_metrics_df, + geometry=zonal_metrics_df.reset_index()[self.attribute_name] + .map(self.zone_gser) + .values, + crs=self.zone_gser.crs, + ) class BufferAnalysis(ZonalAnalysis): diff --git a/tests/test_pylandstats.py b/tests/test_pylandstats.py index a3140a9..0ee65d1 100644 --- a/tests/test_pylandstats.py +++ b/tests/test_pylandstats.py @@ -927,20 +927,27 @@ def test_compute_zonal_statistics_gdf(self): # test that the gdf has the proper shape (number of zones, number of metrics + # geometry column) - metrics = ["patch_density"] - zs_gdf = za.compute_zonal_statistics_gdf(metrics) - self.assertEqual(zs_gdf.shape, (len(self.zone_gdf), len(metrics) + 1)) - - # test that the zonal statistics when excluding boundaries should be less or - # equal than including them - metric = "total_edge" - metric_kws = {"count_boundary": True} - self.assertLessEqual( - za.compute_zonal_statistics_gdf([metric])[metric].sum(), - za.compute_zonal_statistics_gdf([metric], metrics_kws={metric: metric_kws})[ - metric - ].sum(), - ) + for class_val in [None, za.present_classes[0]]: + metrics = ["patch_density"] + zs_gdf = za.compute_zonal_statistics_gdf(metrics, class_val=class_val) + self.assertEqual(zs_gdf.shape, (len(self.zone_gdf), len(metrics) + 1)) + # test that the crs is set correctly + self.assertEqual(zs_gdf.crs, self.zone_gdf.crs) + # test that the geometry column is not None + self.assertFalse(zs_gdf.geometry.isna().any()) + + # test that the zonal statistics when excluding boundaries should be less or + # equal than including them + metric = "total_edge" + metric_kws = {"count_boundary": True} + self.assertLessEqual( + za.compute_zonal_statistics_gdf([metric], class_val=class_val)[ + metric + ].sum(), + za.compute_zonal_statistics_gdf( + [metric], class_val=class_val, metrics_kws={metric: metric_kws} + )[metric].sum(), + ) def test_buffer_init(self): naive_gser = gpd.GeoSeries([self.geom]) @@ -1223,6 +1230,36 @@ def test_dataframes(self): ) ) + def test_compute_zonal_statistics_gdf(self): + for _class, init_args, init_kws, _ in self.init_combinations: + stza = _class(self.landscape_fps, *init_args, dates=self.dates, **init_kws) + # test that the gdf has the proper shape (number of zones, number of metrics + # + geometry column) + for class_val in [None, stza.present_classes[0]]: + metrics = ["patch_density"] + zs_gdf = stza.compute_zonal_statistics_gdf(metrics, class_val=class_val) + self.assertEqual( + zs_gdf.shape, + (len(stza.zone_gser) * len(self.dates), len(metrics) + 1), + ) + # test that the crs is set correctly + self.assertEqual(zs_gdf.crs, self.zone_gser.crs) + # test that the geometry column is not None + self.assertFalse(zs_gdf.geometry.isna().any()) + + # test that the zonal statistics when excluding boundaries should be + # less or equal than including them + metric = "total_edge" + metric_kws = {"count_boundary": True} + self.assertLessEqual( + stza.compute_zonal_statistics_gdf([metric], class_val=class_val)[ + metric + ].sum(), + stza.compute_zonal_statistics_gdf( + [metric], class_val=class_val, metrics_kws={metric: metric_kws} + )[metric].sum(), + ) + def test_plot_metric(self): for _class, init_args, init_kws, _ in self.init_combinations: stza = _class(self.landscape_fps, *init_args, dates=self.dates, **init_kws)