Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CF: also decode time bounds when available #2571

Merged
merged 7 commits into from
Dec 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 8 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ v0.11.1 (unreleased)
Breaking changes
~~~~~~~~~~~~~~~~

- Time bounds variables are now also decoded according to CF conventions
(:issue:`2565`). The previous behavior was to decode them only if they
had specific time attributes, now these attributes are copied
automatically from the corresponding time coordinate. This might
brake downstream code that was relying on these variables to be
not decoded.
By `Fabien Maussion <https://github.com/fmaussion>`_.

Enhancements
~~~~~~~~~~~~

Expand All @@ -46,7 +54,6 @@ Enhancements
<https://github.com/spencerkclark>`_.
- Support Dask ``HighLevelGraphs`` by `Matthew Rocklin <https://matthewrocklin.com>`_.


Bug fixes
~~~~~~~~~

Expand Down
34 changes: 33 additions & 1 deletion xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,11 +320,39 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True,
return Variable(dimensions, data, attributes, encoding=encoding)


def _update_bounds_attributes(variables):
"""Adds time attributes to time bounds variables.

Variables handling time bounds ("Cell boundaries" in the CF
conventions) do not necessarily carry the necessary attributes to be
decoded. This copies the attributes from the time variable to the
associated boundaries.

See Also:

http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/
cf-conventions.html#cell-boundaries

https://github.com/pydata/xarray/issues/2565
"""

# For all time variables with bounds
for v in variables.values():
attrs = v.attrs
has_date_units = 'units' in attrs and 'since' in attrs['units']
if has_date_units and 'bounds' in attrs:
if attrs['bounds'] in variables:
bounds_attrs = variables[attrs['bounds']].attrs
bounds_attrs.setdefault('units', attrs['units'])
if 'calendar' in attrs:
bounds_attrs.setdefault('calendar', attrs['calendar'])


def decode_cf_variables(variables, attributes, concat_characters=True,
mask_and_scale=True, decode_times=True,
decode_coords=True, drop_variables=None):
"""
Decode a several CF encoded variables.
Decode several CF encoded variables.

See: decode_cf_variable
"""
Expand All @@ -350,6 +378,10 @@ def stackable(dim):
drop_variables = []
drop_variables = set(drop_variables)

# Time bounds coordinates might miss the decoding attributes
if decode_times:
_update_bounds_attributes(variables)

new_vars = OrderedDict()
for k, v in iteritems(variables):
if k in drop_variables:
Expand Down
36 changes: 36 additions & 0 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from xarray import DataArray, Variable, coding, decode_cf
from xarray.coding.times import (_import_cftime, cftime_to_nptime,
decode_cf_datetime, encode_cf_datetime)
from xarray.conventions import _update_bounds_attributes
from xarray.core.common import contains_cftime_datetimes

from . import (
Expand Down Expand Up @@ -624,6 +625,41 @@ def test_decode_cf(calendar):
assert ds.test.dtype == np.dtype('M8[ns]')


def test_decode_cf_time_bounds():

da = DataArray(np.arange(6, dtype='int64').reshape((3, 2)),
coords={'time': [1, 2, 3]},
dims=('time', 'nbnd'), name='time_bnds')

attrs = {'units': 'days since 2001-01',
'calendar': 'standard',
'bounds': 'time_bnds'}

ds = da.to_dataset()
ds['time'].attrs.update(attrs)
_update_bounds_attributes(ds.variables)
assert ds.variables['time_bnds'].attrs == {'units': 'days since 2001-01',
'calendar': 'standard'}
dsc = decode_cf(ds)
assert dsc.time_bnds.dtype == np.dtype('M8[ns]')
dsc = decode_cf(ds, decode_times=False)
assert dsc.time_bnds.dtype == np.dtype('int64')

# Do not overwrite existing attrs
ds = da.to_dataset()
ds['time'].attrs.update(attrs)
bnd_attr = {'units': 'hours since 2001-01', 'calendar': 'noleap'}
ds['time_bnds'].attrs.update(bnd_attr)
_update_bounds_attributes(ds.variables)
assert ds.variables['time_bnds'].attrs == bnd_attr

# If bounds variable not available do not complain
ds = da.to_dataset()
ds['time'].attrs.update(attrs)
ds['time'].attrs['bounds'] = 'fake_var'
_update_bounds_attributes(ds.variables)


@pytest.fixture(params=_ALL_CALENDARS)
def calendar(request):
return request.param
Expand Down