From ff6d6a0429b5601c6fc0fd427bb7ac7d8ec3e0dd Mon Sep 17 00:00:00 2001 From: Stephan Finkensieper Date: Thu, 9 Jul 2020 12:11:04 +0200 Subject: [PATCH 1/7] Preserve chunks in CF Writer --- satpy/tests/writer_tests/test_cf.py | 34 +++++++++++++++++++++++++++++ satpy/writers/cf_writer.py | 16 ++++++++++++-- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index d6b3147d23..5ff9162505 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -960,6 +960,40 @@ def test_area2lonlat(self): self.assertDictContainsSubset({'name': 'longitude', 'standard_name': 'longitude', 'units': 'degrees_east'}, lon.attrs) + def test_update_encoding(self): + import xarray as xr + from satpy.writers.cf_writer import CFWriter + + # Without time dimension + ds = xr.Dataset({'foo': (('y', 'x'), [[1, 2], [3, 4]]), + 'bar': (('y', 'x'), [[3, 4], [5, 6]])}, + coords={'y': [1, 2], 'x': [3, 4]}) + ds = ds.chunk(2) + kwargs = {'encoding': {'bar': {'chunksizes': (1, 1)}}, + 'other': 'kwargs'} + enc, other_kwargs = CFWriter.update_encoding(ds, kwargs) + self.assertDictEqual(enc, {'y': {'_FillValue': None}, + 'x': {'_FillValue': None}, + 'foo': {'chunksizes': (2, 2)}, + 'bar': {'chunksizes': (1, 1)}}) + self.assertDictEqual(other_kwargs, {'other': 'kwargs'}) + + # With time dimension + ds = ds.expand_dims({'time': [datetime(2009, 7, 1, 12, 15)]}) + kwargs = {'encoding': {'bar': {'chunksizes': (1, 1, 1)}}, + 'other': 'kwargs'} + enc, other_kwargs = CFWriter.update_encoding(ds, kwargs) + self.assertDictEqual(enc, {'y': {'_FillValue': None}, + 'x': {'_FillValue': None}, + 'foo': {'chunksizes': (1, 2, 2)}, + 'bar': {'chunksizes': (1, 1, 1)}, + 'time': {'_FillValue': None, + 'calendar': 'proleptic_gregorian', + 'units': 'days since 2009-07-01 12:15:00'}, + 'time_bnds': {'_FillValue': None, + 'calendar': 'proleptic_gregorian', + 'units': 'days since 2009-07-01 12:15:00'}}) + def suite(): """Test suite for this writer's tests.""" diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index 80e9bc7491..2be311ec58 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -514,13 +514,25 @@ def _collect_datasets(self, datasets, epoch=EPOCH, flatten_attrs=False, exclude_ return datas, start_times, end_times - def update_encoding(self, dataset, to_netcdf_kwargs): + @staticmethod + def update_encoding(dataset, to_netcdf_kwargs): """Update encoding. - Avoid _FillValue attribute being added to coordinate variables (https://github.com/pydata/xarray/issues/1865). + Preserve chunk sizes, avoid fill values in coordinate variables and make sure that + time & time bounds have the same units. """ other_to_netcdf_kwargs = to_netcdf_kwargs.copy() encoding = other_to_netcdf_kwargs.pop('encoding', {}).copy() + + # If not specified otherwise by the user, preserve current chunks. + for var_name, data_var in dataset.data_vars.items(): + if data_var.chunks: + if var_name not in encoding: + encoding[var_name] = {} + encoding[var_name].setdefault('chunksizes', data_var.data.chunksize) + + # Avoid _FillValue attribute being added to coordinate variables + # (https://github.com/pydata/xarray/issues/1865). coord_vars = [] for data_array in dataset.values(): coord_vars.extend(set(data_array.dims).intersection(data_array.coords)) From 1951de4d4f610ea5577707a69fcc415c81384727 Mon Sep 17 00:00:00 2001 From: Stephan Finkensieper Date: Fri, 10 Jul 2020 10:04:10 +0200 Subject: [PATCH 2/7] Include all variables, not just data variables --- satpy/tests/writer_tests/test_cf.py | 6 +++++- satpy/writers/cf_writer.py | 9 ++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index 5ff9162505..3e52c82354 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -967,13 +967,16 @@ def test_update_encoding(self): # Without time dimension ds = xr.Dataset({'foo': (('y', 'x'), [[1, 2], [3, 4]]), 'bar': (('y', 'x'), [[3, 4], [5, 6]])}, - coords={'y': [1, 2], 'x': [3, 4]}) + coords={'y': [1, 2], + 'x': [3, 4], + 'lon': (('y', 'x'), [[7, 8], [9, 10]])}) ds = ds.chunk(2) kwargs = {'encoding': {'bar': {'chunksizes': (1, 1)}}, 'other': 'kwargs'} enc, other_kwargs = CFWriter.update_encoding(ds, kwargs) self.assertDictEqual(enc, {'y': {'_FillValue': None}, 'x': {'_FillValue': None}, + 'lon': {'chunksizes': (2, 2)}, 'foo': {'chunksizes': (2, 2)}, 'bar': {'chunksizes': (1, 1)}}) self.assertDictEqual(other_kwargs, {'other': 'kwargs'}) @@ -985,6 +988,7 @@ def test_update_encoding(self): enc, other_kwargs = CFWriter.update_encoding(ds, kwargs) self.assertDictEqual(enc, {'y': {'_FillValue': None}, 'x': {'_FillValue': None}, + 'lon': {'chunksizes': (2, 2)}, 'foo': {'chunksizes': (1, 2, 2)}, 'bar': {'chunksizes': (1, 1, 1)}, 'time': {'_FillValue': None, diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index 2be311ec58..c4cd8ac13b 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -525,11 +525,10 @@ def update_encoding(dataset, to_netcdf_kwargs): encoding = other_to_netcdf_kwargs.pop('encoding', {}).copy() # If not specified otherwise by the user, preserve current chunks. - for var_name, data_var in dataset.data_vars.items(): - if data_var.chunks: - if var_name not in encoding: - encoding[var_name] = {} - encoding[var_name].setdefault('chunksizes', data_var.data.chunksize) + for var_name, variable in dataset.variables.items(): + if variable.chunks: + encoding.setdefault(var_name, {}) + encoding[var_name].setdefault('chunksizes', variable.data.chunksize) # Avoid _FillValue attribute being added to coordinate variables # (https://github.com/pydata/xarray/issues/1865). From 976c002d770cefdc4271672693a213c60c58cc3c Mon Sep 17 00:00:00 2001 From: Stephan Finkensieper Date: Fri, 10 Jul 2020 15:42:16 +0200 Subject: [PATCH 3/7] Limit chunksizes to shape of the data --- satpy/tests/writer_tests/test_cf.py | 13 +++++++++++++ satpy/writers/cf_writer.py | 8 +++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index 3e52c82354..8372fc42bc 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -981,6 +981,16 @@ def test_update_encoding(self): 'bar': {'chunksizes': (1, 1)}}) self.assertDictEqual(other_kwargs, {'other': 'kwargs'}) + # Chunksize may not exceed shape + ds = ds.chunk(8) + kwargs = {'encoding': {}, 'other': 'kwargs'} + enc, other_kwargs = CFWriter.update_encoding(ds, kwargs) + self.assertDictEqual(enc, {'y': {'_FillValue': None}, + 'x': {'_FillValue': None}, + 'lon': {'chunksizes': (2, 2)}, + 'foo': {'chunksizes': (2, 2)}, + 'bar': {'chunksizes': (2, 2)}}) + # With time dimension ds = ds.expand_dims({'time': [datetime(2009, 7, 1, 12, 15)]}) kwargs = {'encoding': {'bar': {'chunksizes': (1, 1, 1)}}, @@ -998,6 +1008,9 @@ def test_update_encoding(self): 'calendar': 'proleptic_gregorian', 'units': 'days since 2009-07-01 12:15:00'}}) + # User-defined encoding may not be altered + self.assertDictEqual(kwargs['encoding'], {'bar': {'chunksizes': (1, 1, 1)}}) + def suite(): """Test suite for this writer's tests.""" diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index c4cd8ac13b..87c389b34c 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -99,6 +99,7 @@ """ from collections import OrderedDict, defaultdict +import copy import logging from datetime import datetime import json @@ -527,8 +528,12 @@ def update_encoding(dataset, to_netcdf_kwargs): # If not specified otherwise by the user, preserve current chunks. for var_name, variable in dataset.variables.items(): if variable.chunks: + chunks = tuple( + np.stack([variable.data.chunksize, + variable.shape]).min(axis=0) + ) # Chunksize may not exceed shape encoding.setdefault(var_name, {}) - encoding[var_name].setdefault('chunksizes', variable.data.chunksize) + encoding[var_name].setdefault('chunksizes', chunks) # Avoid _FillValue attribute being added to coordinate variables # (https://github.com/pydata/xarray/issues/1865). @@ -627,6 +632,7 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, root.attrs['Conventions'] = CF_VERSION # Remove satpy-specific kwargs + to_netcdf_kwargs = copy.deepcopy(to_netcdf_kwargs) # may contain dictionaries (encoding) satpy_kwargs = ['overlay', 'decorate', 'config_files'] for kwarg in satpy_kwargs: to_netcdf_kwargs.pop(kwarg, None) From aea0bf4a4334aa20d575c412b811209ae1228483 Mon Sep 17 00:00:00 2001 From: Stephan Finkensieper Date: Fri, 18 Sep 2020 09:32:01 +0200 Subject: [PATCH 4/7] Factorize encoding update & tests --- satpy/tests/writer_tests/test_cf.py | 33 ++++---- satpy/writers/cf_writer.py | 113 ++++++++++++++++------------ 2 files changed, 84 insertions(+), 62 deletions(-) diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index 8372fc42bc..b29b1e0c31 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -960,20 +960,24 @@ def test_area2lonlat(self): self.assertDictContainsSubset({'name': 'longitude', 'standard_name': 'longitude', 'units': 'degrees_east'}, lon.attrs) - def test_update_encoding(self): + +class EncodingUpdateTest(unittest.TestCase): + def setUp(self): import xarray as xr - from satpy.writers.cf_writer import CFWriter + self.ds = xr.Dataset({'foo': (('y', 'x'), [[1, 2], [3, 4]]), + 'bar': (('y', 'x'), [[3, 4], [5, 6]])}, + coords={'y': [1, 2], + 'x': [3, 4], + 'lon': (('y', 'x'), [[7, 8], [9, 10]])}) + + def test_without_time(self): + from satpy.writers.cf_writer import update_encoding # Without time dimension - ds = xr.Dataset({'foo': (('y', 'x'), [[1, 2], [3, 4]]), - 'bar': (('y', 'x'), [[3, 4], [5, 6]])}, - coords={'y': [1, 2], - 'x': [3, 4], - 'lon': (('y', 'x'), [[7, 8], [9, 10]])}) - ds = ds.chunk(2) + ds = self.ds.chunk(2) kwargs = {'encoding': {'bar': {'chunksizes': (1, 1)}}, 'other': 'kwargs'} - enc, other_kwargs = CFWriter.update_encoding(ds, kwargs) + enc, other_kwargs = update_encoding(ds, kwargs) self.assertDictEqual(enc, {'y': {'_FillValue': None}, 'x': {'_FillValue': None}, 'lon': {'chunksizes': (2, 2)}, @@ -982,20 +986,23 @@ def test_update_encoding(self): self.assertDictEqual(other_kwargs, {'other': 'kwargs'}) # Chunksize may not exceed shape - ds = ds.chunk(8) + ds = self.ds.chunk(8) kwargs = {'encoding': {}, 'other': 'kwargs'} - enc, other_kwargs = CFWriter.update_encoding(ds, kwargs) + enc, other_kwargs = update_encoding(ds, kwargs) self.assertDictEqual(enc, {'y': {'_FillValue': None}, 'x': {'_FillValue': None}, 'lon': {'chunksizes': (2, 2)}, 'foo': {'chunksizes': (2, 2)}, 'bar': {'chunksizes': (2, 2)}}) + def test_with_time(self): + from satpy.writers.cf_writer import update_encoding + # With time dimension - ds = ds.expand_dims({'time': [datetime(2009, 7, 1, 12, 15)]}) + ds = self.ds.chunk(8).expand_dims({'time': [datetime(2009, 7, 1, 12, 15)]}) kwargs = {'encoding': {'bar': {'chunksizes': (1, 1, 1)}}, 'other': 'kwargs'} - enc, other_kwargs = CFWriter.update_encoding(ds, kwargs) + enc, other_kwargs = update_encoding(ds, kwargs) self.assertDictEqual(enc, {'y': {'_FillValue': None}, 'x': {'_FillValue': None}, 'lon': {'chunksizes': (2, 2)}, diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index 87c389b34c..a6595224c6 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -399,6 +399,62 @@ def encode_attrs_nc(attrs): return OrderedDict(encoded_attrs) +def _set_default_chunks(encoding, dataset): + """Update encoding to preserve current dask chunks. + + Existing user-defined chunks take precedence. + """ + for var_name, variable in dataset.variables.items(): + if variable.chunks: + chunks = tuple( + np.stack([variable.data.chunksize, + variable.shape]).min(axis=0) + ) # Chunksize may not exceed shape + encoding.setdefault(var_name, {}) + encoding[var_name].setdefault('chunksizes', chunks) + + +def update_encoding(dataset, to_netcdf_kwargs): + """Update encoding. + + Preserve dask chunks, avoid fill values in coordinate variables and make sure that + time & time bounds have the same units. + """ + other_to_netcdf_kwargs = to_netcdf_kwargs.copy() + encoding = other_to_netcdf_kwargs.pop('encoding', {}).copy() + + # If not specified otherwise by the user, preserve current dask chunks. + _set_default_chunks(encoding, dataset) + + # Avoid _FillValue attribute being added to coordinate variables + # (https://github.com/pydata/xarray/issues/1865). + coord_vars = [] + for data_array in dataset.values(): + coord_vars.extend(set(data_array.dims).intersection(data_array.coords)) + for coord_var in coord_vars: + encoding.setdefault(coord_var, {}) + encoding[coord_var].update({'_FillValue': None}) + + # Make sure time coordinates and bounds have the same units. Default is xarray's CF datetime + # encoding, which can be overridden by user-defined encoding. + if 'time' in dataset: + try: + dtnp64 = dataset['time'].data[0] + except IndexError: + dtnp64 = dataset['time'].data + + default = CFDatetimeCoder().encode(xr.DataArray(dtnp64)) + time_enc = {'units': default.attrs['units'], 'calendar': default.attrs['calendar']} + time_enc.update(encoding.get('time', {})) + bounds_enc = {'units': time_enc['units'], + 'calendar': time_enc['calendar'], + '_FillValue': None} + encoding['time'] = time_enc + encoding['time_bnds'] = bounds_enc # FUTURE: Not required anymore with xarray-0.14+ + + return encoding, other_to_netcdf_kwargs + + class CFWriter(Writer): """Writer producing NetCDF/CF compatible datasets.""" @@ -476,6 +532,13 @@ def da2cf(dataarray, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, compr return new_data + @staticmethod + def update_encoding(dataset, to_netcdf_kwargs): + warnings.warn('CFWriter.update_encoding is deprecated. ' + 'Use satpy.writers.cf_writer.update_encoding instead.', + DeprecationWarning) + return update_encoding(dataset, to_netcdf_kwargs) + def save_dataset(self, dataset, filename=None, fill_value=None, **kwargs): """Save the *dataset* to a given *filename*.""" return self.save_datasets([dataset], filename, **kwargs) @@ -515,54 +578,6 @@ def _collect_datasets(self, datasets, epoch=EPOCH, flatten_attrs=False, exclude_ return datas, start_times, end_times - @staticmethod - def update_encoding(dataset, to_netcdf_kwargs): - """Update encoding. - - Preserve chunk sizes, avoid fill values in coordinate variables and make sure that - time & time bounds have the same units. - """ - other_to_netcdf_kwargs = to_netcdf_kwargs.copy() - encoding = other_to_netcdf_kwargs.pop('encoding', {}).copy() - - # If not specified otherwise by the user, preserve current chunks. - for var_name, variable in dataset.variables.items(): - if variable.chunks: - chunks = tuple( - np.stack([variable.data.chunksize, - variable.shape]).min(axis=0) - ) # Chunksize may not exceed shape - encoding.setdefault(var_name, {}) - encoding[var_name].setdefault('chunksizes', chunks) - - # Avoid _FillValue attribute being added to coordinate variables - # (https://github.com/pydata/xarray/issues/1865). - coord_vars = [] - for data_array in dataset.values(): - coord_vars.extend(set(data_array.dims).intersection(data_array.coords)) - for coord_var in coord_vars: - encoding.setdefault(coord_var, {}) - encoding[coord_var].update({'_FillValue': None}) - - # Make sure time coordinates and bounds have the same units. Default is xarray's CF datetime - # encoding, which can be overridden by user-defined encoding. - if 'time' in dataset: - try: - dtnp64 = dataset['time'].data[0] - except IndexError: - dtnp64 = dataset['time'].data - - default = CFDatetimeCoder().encode(xr.DataArray(dtnp64)) - time_enc = {'units': default.attrs['units'], 'calendar': default.attrs['calendar']} - time_enc.update(encoding.get('time', {})) - bounds_enc = {'units': time_enc['units'], - 'calendar': time_enc['calendar'], - '_FillValue': None} - encoding['time'] = time_enc - encoding['time_bnds'] = bounds_enc # FUTURE: Not required anymore with xarray-0.14+ - - return encoding, other_to_netcdf_kwargs - def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, engine=None, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, include_lonlats=True, pretty=False, compression=None, **to_netcdf_kwargs): @@ -658,7 +673,7 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, grp_str = ' of group {}'.format(group_name) if group_name is not None else '' logger.warning('No time dimension in datasets{}, skipping time bounds creation.'.format(grp_str)) - encoding, other_to_netcdf_kwargs = self.update_encoding(dataset, to_netcdf_kwargs) + encoding, other_to_netcdf_kwargs = update_encoding(dataset, to_netcdf_kwargs) res = dataset.to_netcdf(filename, engine=engine, group=group_name, mode='a', encoding=encoding, **other_to_netcdf_kwargs) written.append(res) From 6827a0fe10d4f53093df7ef914f279472821255a Mon Sep 17 00:00:00 2001 From: Stephan Finkensieper Date: Fri, 18 Sep 2020 09:39:49 +0200 Subject: [PATCH 5/7] Fix deprecation warnings Replace DataArray.drop with .drop_vars --- satpy/writers/cf_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index 7a78b35e34..158a6535e0 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -531,7 +531,7 @@ def da2cf(dataarray, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, compr new_data['y'].attrs['units'] = 'm' if 'crs' in new_data.coords: - new_data = new_data.drop('crs') + new_data = new_data.drop_vars('crs') if 'long_name' not in new_data.attrs and 'standard_name' not in new_data.attrs: new_data.attrs['long_name'] = new_data.name From da974972668596dc19c924f8020ce0531ad1daba Mon Sep 17 00:00:00 2001 From: Stephan Finkensieper Date: Fri, 18 Sep 2020 09:53:27 +0200 Subject: [PATCH 6/7] Remove _satpy* attributes --- satpy/tests/writer_tests/test_cf.py | 1 + satpy/writers/cf_writer.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index 51c427d1b6..87f46ab6ac 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -499,6 +499,7 @@ def test_da2cf(self): attrs, attrs_expected, attrs_expected_flat = self.get_test_attrs() attrs['area'] = 'some_area' attrs['prerequisites'] = [make_dsq(name='hej')] + attrs['_satpy_id_name'] = 'myname' # Adjust expected attributes expected_prereq = ("DataQuery(name='hej')") diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index 158a6535e0..41ccf666e3 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -496,6 +496,11 @@ def da2cf(dataarray, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, compr name = new_data.attrs.pop('name') new_data = new_data.rename(name) + # Remove _satpy* attributes + satpy_attrs = [key for key in new_data.attrs if key.startswith('_satpy')] + for satpy_attr in satpy_attrs: + new_data.attrs.pop(satpy_attr) + # Remove area as well as user-defined attributes for key in ['area'] + exclude_attrs: new_data.attrs.pop(key, None) From 9525cc9c87062322159a27b1408539d7bfa9ee59 Mon Sep 17 00:00:00 2001 From: Stephan Finkensieper Date: Fri, 18 Sep 2020 10:53:47 +0200 Subject: [PATCH 7/7] Factorize encoding update --- satpy/writers/cf_writer.py | 39 ++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index 41ccf666e3..9df5e988fc 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -429,20 +429,12 @@ def _set_default_chunks(encoding, dataset): encoding[var_name].setdefault('chunksizes', chunks) -def update_encoding(dataset, to_netcdf_kwargs): - """Update encoding. +def _set_default_fill_value(encoding, dataset): + """Set default fill values. - Preserve dask chunks, avoid fill values in coordinate variables and make sure that - time & time bounds have the same units. + Avoid _FillValue attribute being added to coordinate variables + (https://github.com/pydata/xarray/issues/1865). """ - other_to_netcdf_kwargs = to_netcdf_kwargs.copy() - encoding = other_to_netcdf_kwargs.pop('encoding', {}).copy() - - # If not specified otherwise by the user, preserve current dask chunks. - _set_default_chunks(encoding, dataset) - - # Avoid _FillValue attribute being added to coordinate variables - # (https://github.com/pydata/xarray/issues/1865). coord_vars = [] for data_array in dataset.values(): coord_vars.extend(set(data_array.dims).intersection(data_array.coords)) @@ -450,8 +442,13 @@ def update_encoding(dataset, to_netcdf_kwargs): encoding.setdefault(coord_var, {}) encoding[coord_var].update({'_FillValue': None}) - # Make sure time coordinates and bounds have the same units. Default is xarray's CF datetime - # encoding, which can be overridden by user-defined encoding. + +def _set_default_time_encoding(encoding, dataset): + """Set default time encoding. + + Make sure time coordinates and bounds have the same units. Default is xarray's CF datetime + encoding, which can be overridden by user-defined encoding. + """ if 'time' in dataset: try: dtnp64 = dataset['time'].data[0] @@ -467,6 +464,20 @@ def update_encoding(dataset, to_netcdf_kwargs): encoding['time'] = time_enc encoding['time_bnds'] = bounds_enc # FUTURE: Not required anymore with xarray-0.14+ + +def update_encoding(dataset, to_netcdf_kwargs): + """Update encoding. + + Preserve dask chunks, avoid fill values in coordinate variables and make sure that + time & time bounds have the same units. + """ + other_to_netcdf_kwargs = to_netcdf_kwargs.copy() + encoding = other_to_netcdf_kwargs.pop('encoding', {}).copy() + + _set_default_chunks(encoding, dataset) + _set_default_fill_value(encoding, dataset) + _set_default_time_encoding(encoding, dataset) + return encoding, other_to_netcdf_kwargs