Skip to content

Commit

Permalink
Preserve chunks in CF Writer
Browse files Browse the repository at this point in the history
  • Loading branch information
sfinkens committed Jul 9, 2020
1 parent b909413 commit c27beb2
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 2 deletions.
34 changes: 34 additions & 0 deletions satpy/tests/writer_tests/test_cf.py
Expand Up @@ -960,6 +960,40 @@ def test_area2lonlat(self):
self.assertDictContainsSubset({'name': 'longitude', 'standard_name': 'longitude', 'units': 'degrees_east'},
lon.attrs)

def test_update_encoding(self):
import xarray as xr
from satpy.writers.cf_writer import CFWriter

# Without time dimension
ds = xr.Dataset({'foo': (('y', 'x'), [[1, 2], [3, 4]]),
'bar': (('y', 'x'), [[3, 4], [5, 6]])},
coords={'y': [1, 2], 'x': [3, 4]})
ds = ds.chunk(2)
kwargs = {'encoding': {'bar': {'chunksizes': (1, 1)}},
'other': 'kwargs'}
enc, other_kwargs = CFWriter.update_encoding(ds, kwargs)
self.assertDictEqual(enc, {'y': {'_FillValue': None},
'x': {'_FillValue': None},
'foo': {'chunksizes': (2, 2)},
'bar': {'chunksizes': (1, 1)}})
self.assertDictEqual(other_kwargs, {'other': 'kwargs'})

# With time dimension
ds = ds.expand_dims({'time': [datetime(2009, 7, 1, 12, 15)]})
kwargs = {'encoding': {'bar': {'chunksizes': (1, 1, 1)}},
'other': 'kwargs'}
enc, other_kwargs = CFWriter.update_encoding(ds, kwargs)
self.assertDictEqual(enc, {'y': {'_FillValue': None},
'x': {'_FillValue': None},
'foo': {'chunksizes': (1, 2, 2)},
'bar': {'chunksizes': (1, 1, 1)},
'time': {'_FillValue': None,
'calendar': 'proleptic_gregorian',
'units': 'days since 2009-07-01 12:15:00'},
'time_bnds': {'_FillValue': None,
'calendar': 'proleptic_gregorian',
'units': 'days since 2009-07-01 12:15:00'}})


def suite():
"""Test suite for this writer's tests."""
Expand Down
16 changes: 14 additions & 2 deletions satpy/writers/cf_writer.py
Expand Up @@ -514,13 +514,25 @@ def _collect_datasets(self, datasets, epoch=EPOCH, flatten_attrs=False, exclude_

return datas, start_times, end_times

def update_encoding(self, dataset, to_netcdf_kwargs):
@staticmethod
def update_encoding(dataset, to_netcdf_kwargs):
"""Update encoding.
Avoid _FillValue attribute being added to coordinate variables (https://github.com/pydata/xarray/issues/1865).
Preserve chunk sizes, avoid fill values in coordinate variables and make sure that
time & time bounds have the same units.
"""
other_to_netcdf_kwargs = to_netcdf_kwargs.copy()
encoding = other_to_netcdf_kwargs.pop('encoding', {}).copy()

# If not specified otherwise by the user, preserve current chunks.
for var_name, data_var in dataset.data_vars.items():
if data_var.chunks:
if var_name not in encoding:
encoding[var_name] = {}
encoding[var_name].setdefault('chunksizes', data_var.data.chunksize)

# Avoid _FillValue attribute being added to coordinate variables
# (https://github.com/pydata/xarray/issues/1865).
coord_vars = []
for data_array in dataset.values():
coord_vars.extend(set(data_array.dims).intersection(data_array.coords))
Expand Down

0 comments on commit c27beb2

Please sign in to comment.