Skip to content

Commit

Permalink
Merge 4434330 into 434bae6
Browse files Browse the repository at this point in the history
  • Loading branch information
aulemahal committed Apr 16, 2024
2 parents 434bae6 + 4434330 commit 0b21631
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 27 deletions.
5 changes: 5 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ Version History
v0.13.1 (unreleased)
--------------------

New Features
^^^^^^^^^^^^
* Core bbox and shape subsetting accepts a ``mask_outside`` argument. When False, data outside the shape/box but within the subsetted dataset is not masked (#337).
* Added ``shapely.Polygon`` to the types understood by ``core.subset.subset_shape`` (#337).

Other Changes
^^^^^^^^^^^^^
* Internal warnings now consistently use the `clisops` configured `loguru` logger (#335).
Expand Down
62 changes: 39 additions & 23 deletions clisops/core/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,10 +933,11 @@ def create_weight_masks(

def subset_shape(
ds: Union[xarray.DataArray, xarray.Dataset],
shape: Union[str, Path, gpd.GeoDataFrame],
shape: Union[str, Path, gpd.GeoDataFrame, Polygon],
raster_crs: Optional[Union[str, int]] = None,
shape_crs: Optional[Union[str, int]] = None,
buffer: Optional[Union[int, float]] = None,
mask_outside: bool = True,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
first_level: Optional[Union[float, int]] = None,
Expand All @@ -953,15 +954,19 @@ def subset_shape(
----------
ds : Union[xarray.DataArray, xarray.Dataset]
Input values.
shape : Union[str, Path, gpd.GeoDataFrame]
Path to a shape file, or GeoDataFrame directly. Supports GeoPandas-compatible formats.
shape : Union[str, Path, gpd.GeoDataFrame, shapely.Polygon]
Path to a shape file, or GeoDataFrame, or shapely Polygon. Supports GeoPandas-compatible formats.
raster_crs : Optional[Union[str, int]]
EPSG number or PROJ4 string.
shape_crs : Optional[Union[str, int]]
EPSG number or PROJ4 string.
buffer : Optional[Union[int, float]]
Buffer the shape in order to select a larger region stemming from it.
Units are based on the shape degrees/metres.
mask_outside : bool
This controls what happens to data outside the shape but still inside subsetted dataset.
True (default) masks these points. If False, this is the same as calling :py:func:`subset_bbox`
with the shape's ``total_bounds``.
start_date : Optional[str]
Start date of the subset.
Date string format -- can be year ("%Y"), year-month ("%Y-%m") or year-month-day("%Y-%m-%d").
Expand Down Expand Up @@ -1024,6 +1029,8 @@ def subset_shape(

if isinstance(shape, gpd.GeoDataFrame):
poly = shape.copy()
elif isinstance(shape, Polygon):
poly = gpd.GeoDataFrame(geometry=[shape])
else:
poly = gpd.GeoDataFrame.from_file(shape)

Expand All @@ -1038,7 +1045,14 @@ def subset_shape(
# If polygon doesn't cross prime meridian, subset bbox first to reduce processing time.
# Only case not implemented is when lon_bnds cross the 0 deg meridian but dataset grid has all positive lons.
try:
ds_copy = subset_bbox(ds_copy, lon_bnds=lon_bnds, lat_bnds=lat_bnds)
ds_copy = subset_bbox(
ds_copy, lon_bnds=lon_bnds, lat_bnds=lat_bnds, mask_outside=mask_outside
)
if not mask_outside:
if isinstance(ds, xarray.DataArray):
ds_copy = list(ds_copy.data_vars.values())[0]
ds_copy.name = ds.name
return ds_copy
except ValueError as e:
raise ValueError(
"No grid cell centroids found within provided polygon bounding box. "
Expand Down Expand Up @@ -1169,6 +1183,7 @@ def subset_bbox(
da: Union[xarray.DataArray, xarray.Dataset],
lon_bnds: Union[np.array, Tuple[Optional[float], Optional[float]]] = None,
lat_bnds: Union[np.array, Tuple[Optional[float], Optional[float]]] = None,
mask_outside: bool = True,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
first_level: Optional[Union[float, int]] = None,
Expand All @@ -1181,9 +1196,6 @@ def subset_bbox(
Return a subset of a DataArray or Dataset for grid points falling within a spatial bounding box
defined by longitude and latitudinal bounds and for dates falling within provided bounds.
TODO: returns the what?
In the case of a lat-lon rectilinear grid, this simply returns the
Parameters
----------
da : Union[xarray.DataArray, xarray.Dataset]
Expand All @@ -1192,6 +1204,9 @@ def subset_bbox(
List of minimum and maximum longitudinal bounds. Optional. Defaults to all longitudes in original data-array.
lat_bnds : Union[np.array, Tuple[Optional[float], Optional[float]]]
List of minimum and maximum latitudinal bounds. Optional. Defaults to all latitudes in original data-array.
mask_outside: bool
For data on grids other than latitude-longitude, this controls what happens to data outside
the bbox but still inside the subsetted dataset. True (default) masks these points while False keeps them.
start_date : Optional[str]
Start date of the subset.
Date string format -- can be year ("%Y"), year-month ("%Y-%m") or year-month-day("%Y-%m-%d").
Expand Down Expand Up @@ -1306,22 +1321,23 @@ def subset_bbox(
"the area covered by the bounding box."
)

# Recompute condition on cropped coordinates
if lat_bnds is not None:
lat_cond = in_bounds(lat_b, da[lat])

if lon_bnds is not None:
lon_cond = in_bounds(lon_b, da[lon])

# Mask coordinates outside the bounding box
if isinstance(da, xarray.Dataset):
# If da is a xr.DataSet Mask only variables that have the
# same 2d coordinates as lat (or lon)
for var in da.data_vars:
if set(da[lat].dims).issubset(da[var].dims):
da[var] = da[var].where(lon_cond & lat_cond)
else:
da = da.where(lon_cond & lat_cond)
if mask_outside:
# Recompute condition on cropped coordinates
if lat_bnds is not None:
lat_cond = in_bounds(lat_b, da[lat])

if lon_bnds is not None:
lon_cond = in_bounds(lon_b, da[lon])

# Mask coordinates outside the bounding box
if isinstance(da, xarray.Dataset):
# If da is a xr.DataSet Mask only variables that have the
# same 2d coordinates as lat (or lon)
for var in da.data_vars:
if set(da[lat].dims).issubset(da[var].dims):
da[var] = da[var].where(lon_cond & lat_cond)
else:
da = da.where(lon_cond & lat_cond)

else:
raise (
Expand Down
22 changes: 18 additions & 4 deletions tests/test_core_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,14 @@ def test_irregular(self):
assert np.all(out.lat.values[mask1.values] >= np.min(self.lat))
assert np.all(out.lat.values[mask1.values] <= np.max(self.lat))

# No mask
out_nomask = subset.subset_bbox(
da, lon_bnds=self.lon, lat_bnds=self.lat, mask_outside=False
)
assert (out.lon == out_nomask.lon).all()
assert (out.lat == out_nomask.lat).all()
assert out_nomask.isnull().sum() == 0

def test_irregular_dataset(self):
da = xr.open_dataset(self.nc_2dlonlat)
out = subset.subset_bbox(da, lon_bnds=[-150, 100], lat_bnds=[10, 60])
Expand Down Expand Up @@ -811,6 +819,14 @@ def test_rotated_pole_with_time(self):
not in [str(q.message) for q in record]
)

# No mask
sub_nomask = subset.subset_shape(
ds, self.eastern_canada_geojson, mask_outside=False
)
assert (sub.lon == sub_nomask.lon).all()
assert (sub.lat == sub_nomask.lat).all()
assert sub_nomask.isnull().sum() == 0

def test_small_poly_buffer(self, tmp_netcdf_filename):
ds = xr.open_dataset(self.nc_file)

Expand Down Expand Up @@ -869,14 +885,13 @@ def test_vectorize_touches_polygons(self):
"""Check that points touching the polygon are included in subset."""
# Create simple polygon
poly = Polygon([[0, 0], [1, 0], [1, 1]])
shape = gpd.GeoDataFrame(geometry=[poly])
# Create simple data array
da = xr.DataArray(
data=[[0, 1], [2, 3]],
dims=("lon", "lat"),
coords={"lon": [0, 1], "lat": [0, 1]},
)
sub = subset.subset_shape(da, shape=shape)
sub = subset.subset_shape(da, shape=poly)
assert sub.notnull().sum() == 3

def test_locstream(self):
Expand All @@ -889,8 +904,7 @@ def test_locstream(self):
},
)
poly = Polygon([[-90, 40], [-70, 20], [-50, 40], [-70, 60]])
shape = gpd.GeoDataFrame(geometry=[poly])
sub = subset.subset_shape(da, shape=shape)
sub = subset.subset_shape(da, shape=poly)
exp = da.isel(site=[1, 3, 4, 5, 7])
xr.testing.assert_identical(sub, exp)

Expand Down

0 comments on commit 0b21631

Please sign in to comment.