Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CF Writer Improvements #976

Merged
merged 4 commits into from
Dec 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
53 changes: 45 additions & 8 deletions satpy/tests/writer_tests/test_cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,10 @@ def test_single_time_value(self):
end_time=end_time))
with TempFile() as filename:
scn.save_datasets(filename=filename, writer='cf')
with xr.open_dataset(filename, decode_cf=False) as f:
self.assertTrue(np.all(f['time_bnds'][:] == np.array([-300., 600.])))
with xr.open_dataset(filename, decode_cf=True) as f:
np.testing.assert_array_equal(f['time'], scn['test-array']['time'])
bounds_exp = np.array([[start_time, end_time]], dtype='datetime64[m]')
np.testing.assert_array_equal(f['time_bnds'], bounds_exp)

def test_bounds(self):
"""Test setting time bounds."""
Expand All @@ -238,8 +240,23 @@ def test_bounds(self):
end_time=end_time))
with TempFile() as filename:
scn.save_datasets(filename=filename, writer='cf')
# Check decoded time coordinates & bounds
with xr.open_dataset(filename, decode_cf=True) as f:
bounds_exp = np.array([[start_time, end_time]], dtype='datetime64[m]')
np.testing.assert_array_equal(f['time_bnds'], bounds_exp)
self.assertEqual(f['time'].attrs['bounds'], 'time_bnds')

# Check raw time coordinates & bounds
with xr.open_dataset(filename, decode_cf=False) as f:
np.testing.assert_almost_equal(f['time_bnds'], [[-0.0034722, 0.0069444]])

# User-specified time encoding should have preference
with TempFile() as filename:
time_units = 'seconds since 2018-01-01'
scn.save_datasets(filename=filename, encoding={'time': {'units': time_units}},
writer='cf')
with xr.open_dataset(filename, decode_cf=False) as f:
self.assertTrue(np.all(f['time_bnds'][:] == np.array([-300., 600.])))
np.testing.assert_array_equal(f['time_bnds'], [[12909600, 12910500]])

def test_bounds_minimum(self):
"""Test minimum bounds."""
Expand All @@ -264,8 +281,9 @@ def test_bounds_minimum(self):
end_time=end_timeB))
with TempFile() as filename:
scn.save_datasets(filename=filename, writer='cf')
with xr.open_dataset(filename, decode_cf=False) as f:
self.assertTrue(np.all(f['time_bnds'][:] == np.array([-300., 600.])))
with xr.open_dataset(filename, decode_cf=True) as f:
bounds_exp = np.array([[start_timeA, end_timeB]], dtype='datetime64[m]')
np.testing.assert_array_equal(f['time_bnds'], bounds_exp)

def test_bounds_missing_time_info(self):
"""Test time bounds generation in case of missing time."""
Expand All @@ -286,11 +304,12 @@ def test_bounds_missing_time_info(self):
coords={'time': [np.datetime64('2018-05-30T10:05:00')]})
with TempFile() as filename:
scn.save_datasets(filename=filename, writer='cf')
with xr.open_dataset(filename, decode_cf=False) as f:
self.assertTrue(np.all(f['time_bnds'][:] == np.array([-300., 600.])))
with xr.open_dataset(filename, decode_cf=True) as f:
bounds_exp = np.array([[start_timeA, end_timeA]], dtype='datetime64[m]')
np.testing.assert_array_equal(f['time_bnds'], bounds_exp)

def test_encoding_kwarg(self):
"""Test encoding of keyword arguments."""
"""Test 'encoding' keyword argument."""
from satpy import Scene
import xarray as xr
scn = Scene()
Expand All @@ -312,6 +331,24 @@ def test_encoding_kwarg(self):
# check that dtype behave as int8
self.assertTrue(np.iinfo(f['test-array'][:].dtype).max == 127)

def test_unlimited_dims_kwarg(self):
"""Test specification of unlimited dimensions."""
from satpy import Scene
import xarray as xr
scn = Scene()
start_time = datetime(2018, 5, 30, 10, 0)
end_time = datetime(2018, 5, 30, 10, 15)
test_array = np.array([[1, 2], [3, 4]])
scn['test-array'] = xr.DataArray(test_array,
dims=['x', 'y'],
coords={'time': np.datetime64('2018-05-30T10:05:00')},
attrs=dict(start_time=start_time,
end_time=end_time))
with TempFile() as filename:
scn.save_datasets(filename=filename, writer='cf', unlimited_dims=['time'])
with xr.open_dataset(filename) as f:
self.assertSetEqual(f.encoding['unlimited_dims'], {'time'})

def test_header_attrs(self):
"""Check master attributes are set."""
from satpy import Scene
Expand Down
48 changes: 29 additions & 19 deletions satpy/writers/cf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@

from dask.base import tokenize
import xarray as xr
from xarray.coding.times import CFDatetimeCoder
import numpy as np

from pyresample.geometry import AreaDefinition, SwathDefinition
Expand Down Expand Up @@ -259,22 +260,14 @@ def area2cf(dataarray, strict=False):
return res


def make_time_bounds(dataarray, start_times, end_times):
def make_time_bounds(start_times, end_times):
"""Create time bounds for the current *dataarray*."""
import numpy as np
start_time = min(start_time for start_time in start_times
if start_time is not None)
end_time = min(end_time for end_time in end_times
if end_time is not None)
try:
dtnp64 = dataarray['time'].data[0]
except IndexError:
dtnp64 = dataarray['time'].data
time_bnds = [(np.datetime64(start_time) - dtnp64),
(np.datetime64(end_time) - dtnp64)]
data = xr.DataArray(np.array(time_bnds)[None, :] / np.timedelta64(1, 's'),
data = xr.DataArray([[np.datetime64(start_time), np.datetime64(end_time)]],
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not entirely sure, why the current method uses the timestamp. But I guess it's just for encoding reasons (bounds relative to timestamp). By changing the bounds' datatype to datetime64 xarray will handle the encoding.

dims=['time', 'bnds_1d'])
data.encoding['_FillValue'] = None
return data


Expand Down Expand Up @@ -546,7 +539,7 @@ def _collect_datasets(self, datasets, epoch=EPOCH, flatten_attrs=False, exclude_
datas = {}
start_times = []
end_times = []
for ds in ds_collection.values():
for ds_name, ds in sorted(ds_collection.items()):
if ds.dtype not in CF_DTYPES:
warnings.warn('Dtype {} not compatible with {}.'.format(str(ds.dtype), CF_VERSION))
try:
Expand All @@ -566,20 +559,37 @@ def _collect_datasets(self, datasets, epoch=EPOCH, flatten_attrs=False, exclude_

return datas, start_times, end_times

def update_encoding(self, datasets, to_netcdf_kwargs):
def update_encoding(self, dataset, to_netcdf_kwargs):
"""Update encoding.

Avoid _FillValue attribute being added to coordinate variables (https://github.com/pydata/xarray/issues/1865).
"""
other_to_netcdf_kwargs = to_netcdf_kwargs.copy()
encoding = other_to_netcdf_kwargs.pop('encoding', {}).copy()
coord_vars = []
for data_array in datasets:
for name, data_array in dataset.items():
coord_vars.extend(set(data_array.dims).intersection(data_array.coords))
for coord_var in coord_vars:
encoding.setdefault(coord_var, {})
encoding[coord_var].update({'_FillValue': None})

# Make sure time coordinates and bounds have the same units. Default is xarray's CF datetime
# encoding, which can be overridden by user-defined encoding.
if 'time' in dataset:
try:
dtnp64 = dataset['time'].data[0]
except IndexError:
dtnp64 = dataset['time'].data

default = CFDatetimeCoder().encode(xr.DataArray(dtnp64))
time_enc = {'units': default.attrs['units'], 'calendar': default.attrs['calendar']}
time_enc.update(encoding.get('time', {}))
bounds_enc = {'units': time_enc['units'],
'calendar': time_enc['calendar'],
'_FillValue': None}
encoding['time'] = time_enc
encoding['time_bnds'] = bounds_enc # FUTURE: Not required anymore with xarray-0.14+

return encoding, other_to_netcdf_kwargs

def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, engine=None, epoch=EPOCH,
Expand All @@ -591,7 +601,7 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None,

Args:
datasets (list):
Names of datasets to be saved
Datasets to be saved
filename (str):
Output file
groups (dict):
Expand Down Expand Up @@ -656,6 +666,7 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None,

init_nc_kwargs = to_netcdf_kwargs.copy()
init_nc_kwargs.pop('encoding', None) # No variables to be encoded at this point
init_nc_kwargs.pop('unlimited_dims', None)
written = [root.to_netcdf(filename, engine=engine, mode='w', **init_nc_kwargs)]

# Write datasets to groups (appending to the file; group=None means no group)
Expand All @@ -665,17 +676,16 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None,
group_datasets, epoch=epoch, flatten_attrs=flatten_attrs, exclude_attrs=exclude_attrs,
include_lonlats=include_lonlats, pretty=pretty, compression=compression)
dataset = xr.Dataset(datas)
try:
dataset['time_bnds'] = make_time_bounds(dataset,
start_times,
if 'time' in dataset:
dataset['time_bnds'] = make_time_bounds(start_times,
end_times)
dataset['time'].attrs['bounds'] = "time_bnds"
dataset['time'].attrs['standard_name'] = "time"
except KeyError:
else:
grp_str = ' of group {}'.format(group_name) if group_name is not None else ''
logger.warning('No time dimension in datasets{}, skipping time bounds creation.'.format(grp_str))

encoding, other_to_netcdf_kwargs = self.update_encoding(datasets, to_netcdf_kwargs)
encoding, other_to_netcdf_kwargs = self.update_encoding(dataset, to_netcdf_kwargs)
res = dataset.to_netcdf(filename, engine=engine, group=group_name, mode='a', encoding=encoding,
**other_to_netcdf_kwargs)
written.append(res)
Expand Down