Skip to content

Commit

Permalink
Opt-in to skipping coordinate writes with mode='a-'
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian committed Nov 16, 2023
1 parent 88eb1ec commit f77ca2a
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 18 deletions.
22 changes: 12 additions & 10 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from xarray.core.dataset import Dataset, _get_chunk, _maybe_chunk
from xarray.core.indexes import Index
from xarray.core.parallelcompat import guess_chunkmanager
from xarray.core.types import ZarrWriteModesOptional
from xarray.core.utils import is_remote_uri

if TYPE_CHECKING:
Expand Down Expand Up @@ -68,7 +69,6 @@
"NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC"
]


DATAARRAY_NAME = "__xarray_dataarray_name__"
DATAARRAY_VARIABLE = "__xarray_dataarray_variable__"

Expand Down Expand Up @@ -1524,7 +1524,7 @@ def to_zarr(
dataset: Dataset,
store: MutableMapping | str | os.PathLike[str] | None = None,
chunk_store: MutableMapping | str | os.PathLike | None = None,
mode: Literal["w", "w-", "a", "r+", None] = None,
mode: ZarrWriteModesOptional = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
Expand All @@ -1548,7 +1548,7 @@ def to_zarr(
dataset: Dataset,
store: MutableMapping | str | os.PathLike[str] | None = None,
chunk_store: MutableMapping | str | os.PathLike | None = None,
mode: Literal["w", "w-", "a", "r+", None] = None,
mode: ZarrWriteModesOptional = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
Expand All @@ -1570,7 +1570,7 @@ def to_zarr(
dataset: Dataset,
store: MutableMapping | str | os.PathLike[str] | None = None,
chunk_store: MutableMapping | str | os.PathLike | None = None,
mode: Literal["w", "w-", "a", "r+", None] = None,
mode: ZarrWriteModesOptional = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
Expand Down Expand Up @@ -1627,16 +1627,18 @@ def to_zarr(
else:
mode = "w-"

if mode != "a" and append_dim is not None:
if mode not in ["a", "a-"] and append_dim is not None:
raise ValueError("cannot set append_dim unless mode='a' or mode=None")

if mode not in ["a", "r+"] and region is not None:
raise ValueError("cannot set region unless mode='a', mode='r+' or mode=None")
if mode not in ["a", "a-", "r+"] and region is not None:
raise ValueError(
"cannot set region unless mode='a', mode'a-', mode='r+' or mode=None"
)

if mode not in ["w", "w-", "a", "r+"]:
if mode not in ["w", "w-", "a", "a-", "r+"]:
raise ValueError(
"The only supported options for mode are 'w', "
f"'w-', 'a' and 'r+', but mode={mode!r}"
f"'w-', 'a', 'a-', and 'r+', but mode={mode!r}"
)

# validate Dataset keys, DataArray names
Expand Down Expand Up @@ -1679,7 +1681,7 @@ def to_zarr(
write_empty=write_empty_chunks,
)

if mode in ["a", "r+"]:
if mode in ["a", "a-", "r+"]:
_validate_datatypes_for_zarr_append(zstore, dataset)
if append_dim is not None:
existing_dims = zstore.get_dimensions()
Expand Down
8 changes: 5 additions & 3 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from xarray.core import indexing
from xarray.core.parallelcompat import guess_chunkmanager
from xarray.core.pycompat import integer_types
from xarray.core.types import ZarrWriteModes
from xarray.core.utils import (
FrozenDict,
HiddenKeyDict,
Expand Down Expand Up @@ -377,7 +378,7 @@ class ZarrStore(AbstractWritableDataStore):
def open_group(
cls,
store,
mode="r",
mode: ZarrWriteModes = "r",
synchronizer=None,
group=None,
consolidated=False,
Expand All @@ -402,7 +403,8 @@ def open_group(
zarr_version = getattr(store, "_store_version", 2)

open_kwargs = dict(
mode=mode,
# mode='a-' is a handcrafted xarray specialty
mode="a" if mode == "a-" else mode,
synchronizer=synchronizer,
path=group,
)
Expand Down Expand Up @@ -632,7 +634,7 @@ def store(
# - new variables not already present, OR
# - variables with the append_dim in their dimensions
# We do NOT overwrite other variables.
if self._append_dim is not None:
if self._mode == "a-" and self._append_dim is not None:
variables_to_set = {
k: v
for k, v in variables_encoded.items()
Expand Down
8 changes: 5 additions & 3 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
T_Chunks,
T_DataArrayOrSet,
T_Dataset,
ZarrWriteModesOptional,
)
from xarray.core.utils import (
Default,
Expand Down Expand Up @@ -2341,7 +2342,7 @@ def to_zarr(
self,
store: MutableMapping | str | PathLike[str] | None = None,
chunk_store: MutableMapping | str | PathLike | None = None,
mode: Literal["w", "w-", "a", "r+", None] = None,
mode: ZarrWriteModesOptional = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
Expand Down Expand Up @@ -2379,10 +2380,11 @@ def to_zarr(
chunk_store : MutableMapping, str or path-like, optional
Store or path to directory in local or remote file system only for Zarr
array chunks. Requires zarr-python v2.4.0 or later.
mode : {"w", "w-", "a", "r+", None}, optional
mode : {"w", "w-", "a", "a-", r+", None}, optional
Persistence mode: "w" means create (overwrite if exists);
"w-" means create (fail if exists);
"a" means override existing variables (create if does not exist);
"a" means override all existing variables including dimension coordinates (create if does not exist);
"a-" means only append those variables that have ``append_dim``.
"r+" means modify existing array *values* only (raise an error if
any metadata or shapes would change).
The default mode is "a" if ``append_dim`` is set. Otherwise, it is
Expand Down
4 changes: 4 additions & 0 deletions xarray/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,3 +282,7 @@ def copy(
"midpoint",
"nearest",
]


ZarrWriteModes = Literal["w", "w-", "a", "a-", "r+"]
ZarrWriteModesOptional = ZarrWriteModes | None
14 changes: 12 additions & 2 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2387,14 +2387,24 @@ def test_append_with_append_dim_no_overwrite(self) -> None:
with self.create_zarr_target() as store_target:
ds.to_zarr(store_target, mode="w", **self.version_kwargs)
original = xr.concat([ds, ds_to_append], dim="time")
original2 = xr.concat([original, ds_to_append], dim="time")

# overwrite a coordinate; this will not get written to the store
# overwrite a coordinate;
# for mode='a-', this will not get written to the store
# because it does not have the append_dim as a dim
ds_to_append.lon.data[:] = -999
ds_to_append.to_zarr(store_target, append_dim="time", **self.version_kwargs)
ds_to_append.to_zarr(
store_target, mode="a-", append_dim="time", **self.version_kwargs
)
actual = xr.open_dataset(store_target, engine="zarr", **self.version_kwargs)
assert_identical(original, actual)

# by default, mode="a" will overwrite all coordinates.
ds_to_append.to_zarr(store_target, append_dim="time", **self.version_kwargs)
actual = xr.open_dataset(store_target, engine="zarr", **self.version_kwargs)
original2.lon.data[:] = -999
assert_identical(original2, actual)

@requires_dask
def test_to_zarr_compute_false_roundtrip(self) -> None:
from dask.delayed import Delayed
Expand Down

0 comments on commit f77ca2a

Please sign in to comment.