From e2276c3bbf15995bfc379c2705abbae619e5c489 Mon Sep 17 00:00:00 2001 From: Marco Braun Date: Thu, 6 Jul 2023 18:09:17 -0400 Subject: [PATCH 1/4] fixed inner_mask for locstream case (previously all lat/lon inside inner_mask were returned == bounding box of polygon) --- clisops/core/subset.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clisops/core/subset.py b/clisops/core/subset.py index 7124134d..c5a7a2a1 100644 --- a/clisops/core/subset.py +++ b/clisops/core/subset.py @@ -1134,6 +1134,10 @@ def subset_shape( # inner_mask including the shapes inner_mask = mask_2d.notnull() | inner_mask + # in the locstream case inner_mask remains all True, but all non-polygon values can be dropped, + # so here "outside inner_mask" is everything outside the polygon. + if len(sp_dims) == 1: + inner_mask = mask_2d.notnull() # loop through variables for v in ds_copy.data_vars: From 2e9d8e158804eaf77237dbc802b03c01269e8f67 Mon Sep 17 00:00:00 2001 From: Marco Braun <43412203+vindelico@users.noreply.github.com> Date: Mon, 24 Jul 2023 12:37:52 -0400 Subject: [PATCH 2/4] Update clisops/core/subset.py Co-authored-by: Pascal Bourgault --- clisops/core/subset.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/clisops/core/subset.py b/clisops/core/subset.py index 58868ec2..7101c7b6 100644 --- a/clisops/core/subset.py +++ b/clisops/core/subset.py @@ -1107,11 +1107,12 @@ def subset_shape( # True in the inner zone, False in the outer inner_mask = inner_mask & (left != 0) & (right != 0) - # inner_mask including the shapes - inner_mask = mask_2d.notnull() | inner_mask - # in the locstream case inner_mask remains all True, but all non-polygon values can be dropped, - # so here "outside inner_mask" is everything outside the polygon. - if len(sp_dims) == 1: + if len(sp_dims) > 1: + # inner_mask including the shapes + inner_mask = mask_2d.notnull() | inner_mask + else: + # in the locstream case inner_mask remains all True, but all non-polygon values can be dropped, + # so here "outside inner_mask" is everything outside the polygon. inner_mask = mask_2d.notnull() # loop through variables From 234bbfa987daf8d5300a10c94eeb94be3d1dd888 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 25 Jul 2023 15:01:56 -0400 Subject: [PATCH 3/4] Fix for when sp dims are not coords - cleaner path for locstream --- clisops/core/subset.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/clisops/core/subset.py b/clisops/core/subset.py index 7101c7b6..572178df 100644 --- a/clisops/core/subset.py +++ b/clisops/core/subset.py @@ -1095,19 +1095,19 @@ def subset_shape( sp_dims = set(mask_2d.dims) # Spatial dimensions - # Find the outer mask. When subsetting unconnected shapes, - # we don't want to drop the inner NaN regions, it may cause problems downstream. - inner_mask = xarray.full_like(mask_2d, True, dtype=bool) - for dim in sp_dims: - # For each dimension, propagate shape indexes in either directions - # Then sum on the other dimension. You get a step function going from 0 to X. - # The non-zero part that left and right have in common is the "inner" zone. - left = mask_2d.bfill(dim).sum(sp_dims - {dim}) - right = mask_2d.ffill(dim).sum(sp_dims - {dim}) - # True in the inner zone, False in the outer - inner_mask = inner_mask & (left != 0) & (right != 0) - if len(sp_dims) > 1: + # Find the outer mask. When subsetting unconnected shapes, + # we don't want to drop the inner NaN regions, it may cause problems downstream. + inner_mask = xarray.full_like(mask_2d, True, dtype=bool) + for dim in sp_dims: + # For each dimension, propagate shape indexes in either directions + # Then sum on the other dimension. You get a step function going from 0 to X. + # The non-zero part that left and right have in common is the "inner" zone. + left = mask_2d.bfill(dim).sum(sp_dims - {dim}) + right = mask_2d.ffill(dim).sum(sp_dims - {dim}) + # True in the inner zone, False in the outer + inner_mask = inner_mask & (left != 0) & (right != 0) + # inner_mask including the shapes inner_mask = mask_2d.notnull() | inner_mask else: @@ -1124,9 +1124,13 @@ def subset_shape( # Remove grid points outside the inner mask # Then extract the coords. # Using a where(inner_mask) on ds_copy triggers warnings with dask, sel seems safer. - mask_2d = mask_2d.where(inner_mask, drop=True) - for dim in sp_dims: - ds_copy = ds_copy.sel({dim: mask_2d[dim]}) + # But this only works if dims have coords. + if set(sp_dims).issubset(ds_copy.coords.keys()): + mask_2d = mask_2d.where(inner_mask, drop=True) + for dim in sp_dims: + ds_copy = ds_copy.sel({dim: mask_2d[dim]}) + else: + ds_copy = ds_copy.where(inner_mask, drop=True) # Add a CRS definition using CF conventions and as a global attribute in CRS_WKT for reference purposes ds_copy.attrs["crs"] = raster_crs.to_string() From e9f1d9e0adacec335c4c4341ee13aed4034fc4b6 Mon Sep 17 00:00:00 2001 From: Marco Braun Date: Tue, 25 Jul 2023 17:56:03 -0400 Subject: [PATCH 4/4] change test for subset_shape, locstream case --- tests/core/test_subset.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/core/test_subset.py b/tests/core/test_subset.py index 5c2624e0..3cc1f124 100644 --- a/tests/core/test_subset.py +++ b/tests/core/test_subset.py @@ -876,17 +876,17 @@ def test_vectorize_touches_polygons(self): def test_locstream(self): da = xr.DataArray( - [1, 2, 3, 4], + [1, 2, 3, 4, 5, 6, 7, 8, 9], dims=("site",), coords={ - "lat": (("site",), [10, 30, 20, 40]), - "lon": (("site",), [-50, -80, -70, -100]), + "lat": (("site",), [55, 55, 55, 40, 40, 40, 25, 25, 25]), + "lon": (("site",), [-80, -70, -60, -80, -70, -60, -80, -70, -60]), }, ) - poly = Polygon([[-90, 15], [-65, 15], [-65, 35], [-90, 35]]) + poly = Polygon([[-90, 40], [-70, 20], [-50, 40], [-70, 60]]) shape = gpd.GeoDataFrame(geometry=[poly]) sub = subset.subset_shape(da, shape=shape) - exp = da.isel(site=[1, 2]) + exp = da.isel(site=[1, 3, 4, 5, 7]) xr.testing.assert_identical(sub, exp)