diff --git a/HISTORY.rst b/HISTORY.rst index cb61ada3..b2420354 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -4,6 +4,7 @@ History X.Y.Z (YYYY-MM-DD) ------------------ +* Fix #199 - do not create spurious fields in zarr writes. (:pr:`200`) * Improve fix for #172 - error out more reliably. (:pr:`198`) * Fix #172 - error out when missing datavars should be written. (:pr:`197`) * Fix #195 - allow non-standard columns to be tiled. (:pr:`196`) diff --git a/daskms/experimental/utils.py b/daskms/experimental/utils.py index eeec4280..5ce7ef56 100644 --- a/daskms/experimental/utils.py +++ b/daskms/experimental/utils.py @@ -83,27 +83,14 @@ def select_vars_and_coords(dataset, columns): f"columns: {column_set}. Some or all of these " f"are not present on the datasets. Aborting.") - data_cols = column_set & data_var_names - coord_cols = column_set & coord_names + data_sel = column_set & data_var_names + coord_sel = column_set & coord_names - for c in coord_cols: - coord_cols.union(*(d for d in coords[c].dims)) + for dv in data_sel: + coord_sel = coord_sel.union(*data_vars[dv].coords.keys()) - ret_data_vars = {} - - for c in data_cols: - ret_data_vars[c] = v = data_vars[c] - coord_cols.union(*(d for d in v.dims)) - - ret_coords = {} - - for c in coord_cols: - try: - v = coords[c] - except KeyError: - continue - else: - ret_coords[c] = v + ret_data_vars = {col: data_vars[col] for col in data_sel} + ret_coords = {c: coords[c] for c in coord_sel} return ret_data_vars, ret_coords diff --git a/daskms/experimental/zarr/__init__.py b/daskms/experimental/zarr/__init__.py index 20b5fad0..90773363 100644 --- a/daskms/experimental/zarr/__init__.py +++ b/daskms/experimental/zarr/__init__.py @@ -201,7 +201,8 @@ def xds_to_zarr(xds, store, columns=None): Path to store the data columns : list of str or str or None Columns to store. `None` or `"ALL"` stores all columns on each dataset. - Otherwise, a list of columns should be supplied. + Otherwise, a list of columns should be supplied. All coordinates + associated with a specified column will be written automatically. Returns ------- @@ -234,6 +235,9 @@ def xds_to_zarr(xds, store, columns=None): data_vars, coords = select_vars_and_coords(ds, columns) + # Create a new ds which is consistent with what we want to write. + ds = Dataset(data_vars, coords=coords, attrs=ds.attrs) + group = prepare_zarr_group(di, ds, store) data_vars = dict(_gen_writes(data_vars, ds.chunks, group))