Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix 224 inconsistent bounds #225

Merged
merged 6 commits into from
May 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions clisops/ops/base_operation.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,15 @@ def _remove_redundant_fill_values(self, ds):
"""
Get coordinate variables and remove fill values added by xarray (CF conventions say that coordinate variables cannot have missing values).
Get bounds variables and remove fill values added by xarray.

See issue: https://github.com/roocs/clisops/issues/224
"""
if isinstance(ds, xr.Dataset):
main_var = get_main_variable(ds)
for coord_id in ds[main_var].coords:
# remove fill value from coordinate variables
if ds.coords[coord_id].dims == (coord_id,):
ds[coord_id].encoding["_FillValue"] = None
# if ds.coords[coord_id].dims == (coord_id,):
ds[coord_id].encoding["_FillValue"] = None
# remove fill value from bounds variables if they exist
try:
bnd = ds.cf.get_bounds(coord_id).name
Expand All @@ -86,6 +88,29 @@ def _remove_redundant_fill_values(self, ds):
continue
return ds

def _remove_redundant_coordinates_from_bounds(self, ds):
"""
This method removes redundant coordinates from bounds, example:

double time_bnds(time, bnds) ;
time_bnds:coordinates = "height" ;

Programs like cdo will complain about this:

Warning (cdf_set_var): Inconsistent variable definition for time_bnds!

See issue: https://github.com/roocs/clisops/issues/224
"""
if isinstance(ds, xr.Dataset):
main_var = get_main_variable(ds)
for coord_id in ds[main_var].coords:
try:
bnd = ds.cf.get_bounds(coord_id).name
ds[bnd].encoding["coordinates"] = None
except KeyError:
continue
return ds

def process(self):
"""
Main processing method used by all sub-classes.
Expand All @@ -108,6 +133,8 @@ def process(self):

# remove fill values from lat/lon/time if required
processed_ds = self._remove_redundant_fill_values(processed_ds)
# remove redundant coordinates from bounds
processed_ds = self._remove_redundant_coordinates_from_bounds(processed_ds)

# Work out how many outputs should be created based on the size
# of the array. Manage this as a list of time slices.
Expand Down
29 changes: 29 additions & 0 deletions tests/ops/test_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1604,18 +1604,47 @@ def test_subset_nc_no_fill_value(cmip5_tas_file, tmpdir):
ds.to_netcdf(f"{tmpdir}/test_fill_values.nc")
ds = _load_ds(f"{tmpdir}/test_fill_values.nc")

# assert np.isnan(float(ds.time.encoding.get("_FillValue")))
assert np.isnan(float(ds.lat.encoding.get("_FillValue")))
assert np.isnan(float(ds.lon.encoding.get("_FillValue")))
assert np.isnan(float(ds.height.encoding.get("_FillValue")))

assert np.isnan(float(ds.lat_bnds.encoding.get("_FillValue")))
assert np.isnan(float(ds.lon_bnds.encoding.get("_FillValue")))
assert np.isnan(float(ds.time_bnds.encoding.get("_FillValue")))

# check that there is no fill value in encoding for coordinate variables and bounds
res = _load_ds(result)
assert "_FillValue" not in res.time.encoding
assert "_FillValue" not in res.lat.encoding
assert "_FillValue" not in res.lon.encoding
assert "_FillValue" not in res.height.encoding

assert "_FillValue" not in res.lat_bnds.encoding
assert "_FillValue" not in res.lon_bnds.encoding
assert "_FillValue" not in res.time_bnds.encoding


def test_subset_nc_consistent_bounds(cmip5_tas_file, tmpdir):
"""Tests clisops subset function with a time subset."""
result = subset(
ds=CMIP5_TAS,
time=time_interval("2005-01-01T00:00:00", "2020-12-30T00:00:00"),
output_dir=tmpdir,
output_type="nc",
file_namer="simple",
)
res = _load_ds(result)
# check fill value in bounds
assert "_FillValue" not in res.lat_bnds.encoding
assert "_FillValue" not in res.lon_bnds.encoding
assert "_FillValue" not in res.time_bnds.encoding
# check fill value in coordinates
assert "_FillValue" not in res.time.encoding
assert "_FillValue" not in res.lat.encoding
assert "_FillValue" not in res.lon.encoding
assert "_FillValue" not in res.height.encoding
# check coordinates in bounds
assert "coordinates" not in res.lat_bnds.encoding
assert "coordinates" not in res.lon_bnds.encoding
assert "coordinates" not in res.time_bnds.encoding