Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement Zarr v3 spec support #6475

Merged
merged 26 commits into from Nov 27, 2022
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
f040aa2
make zarr backend compatible with v3 spec
grlee77 Apr 9, 2022
4718376
add tests for Zarr v3 stores
grlee77 Apr 9, 2022
d590b76
add tests for Zarr v3 stores when the store is not a StoreV3 class
grlee77 Sep 21, 2022
dbdf63f
update import path to match Zarr v2.12 and v2.13 experimental API
grlee77 Sep 21, 2022
f9c14e0
flake8 fixes
grlee77 Apr 11, 2022
09ddcff
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 21, 2022
8952a07
Merge branch 'main' of github.com:pydata/xarray into zarr-v3
jhamman Oct 28, 2022
0f14df5
disallow consolidated metadata for zarr v3
jhamman Oct 28, 2022
1556ce0
whats new a + remove more consolidated metadata for v3
jhamman Oct 28, 2022
5885134
activate upstream dev test for zarr v3
jhamman Oct 29, 2022
3ca92f0
better typing
jhamman Oct 29, 2022
adabc52
Merge branch 'main' of github.com:pydata/xarray into zarr-v3
jhamman Nov 4, 2022
3c42a95
untype zarr_version in open_dataset
jhamman Nov 4, 2022
4e29496
update whats new
jhamman Nov 4, 2022
e40658a
Merge branch 'main' into zarr-v3
jhamman Nov 5, 2022
4a6d5c9
[test-upstream]
jhamman Nov 5, 2022
226a51a
Merge branch 'main' of github.com:pydata/xarray into zarr-v3
jhamman Nov 5, 2022
ecd54bb
Merge branch 'zarr-v3' of github.com:grlee77/xarray into zarr-v3
jhamman Nov 5, 2022
4e9f4c3
Merge branch 'main' into zarr-v3
jhamman Nov 17, 2022
0a84857
update comment
jhamman Nov 18, 2022
e402bb8
Merge branch 'zarr-v3' of github.com:grlee77/xarray into zarr-v3
jhamman Nov 18, 2022
c953864
fix whats new
jhamman Nov 19, 2022
507c827
Merge branch 'main' into zarr-v3
jhamman Nov 19, 2022
2d471b3
update whats new
jhamman Nov 19, 2022
b29975b
Merge branch 'zarr-v3' of github.com:grlee77/xarray into zarr-v3
jhamman Nov 19, 2022
730b3ba
Merge branch 'main' into zarr-v3
jhamman Nov 21, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Expand Up @@ -31,6 +31,8 @@ New Features
- Added methods :py:meth:`DataArrayGroupBy.cumprod` and :py:meth:`DatasetGroupBy.cumprod`.
(:pull:`5816`)
By `Patrick Naylor <https://github.com/patrick-naylor>`_
- Add experimental support for Zarr's V3 specification. (:pull:`6475`).
By `Gregory Lee <https://github.com/grlee77>`_ and `Joe Hamman <https://github.com/jhamman>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
9 changes: 9 additions & 0 deletions xarray/backends/api.py
Expand Up @@ -1542,6 +1542,7 @@ def to_zarr(
region: Mapping[str, slice] | None = None,
safe_chunks: bool = True,
storage_options: dict[str, str] | None = None,
zarr_version: int | None = None,
) -> backends.ZarrStore | Delayed:
"""This function creates an appropriate datastore for writing a dataset to
a zarr ztore
Expand Down Expand Up @@ -1608,6 +1609,13 @@ def to_zarr(
f"``region`` with to_zarr(), got {append_dim} in both"
)

if zarr_version is None:
# default to 2 if store doesn't specify it's version (e.g. a path)
zarr_version = getattr(store, "_store_version", 2)

if consolidated is None and zarr_version > 2:
consolidated = False

if mode == "r+":
already_consolidated = consolidated
consolidate_on_close = False
Expand All @@ -1626,6 +1634,7 @@ def to_zarr(
write_region=region,
safe_chunks=safe_chunks,
stacklevel=4, # for Dataset.to_zarr()
zarr_version=zarr_version,
)

if mode in ["a", "r+"]:
Expand Down
36 changes: 36 additions & 0 deletions xarray/backends/zarr.py
Expand Up @@ -353,19 +353,37 @@ def open_group(
write_region=None,
safe_chunks=True,
stacklevel=2,
zarr_version=None,
):
import zarr

# zarr doesn't support pathlib.Path objects yet. zarr-python#601
if isinstance(store, os.PathLike):
store = os.fspath(store)

if zarr_version is None:
# default to 2 if store doesn't specify it's version (e.g. a path)
zarr_version = getattr(store, "_store_version", 2)

open_kwargs = dict(
mode=mode,
synchronizer=synchronizer,
path=group,
)
open_kwargs["storage_options"] = storage_options
if zarr_version > 2:
open_kwargs["zarr_version"] = zarr_version

if consolidated or consolidate_on_close:
raise ValueError(
"consolidated metadata has not been implemented for zarr "
f"version {zarr_version} yet. Set consolidated=False for "
f"zarr version {zarr_version}. See also "
"https://github.com/zarr-developers/zarr-specs/issues/136"
)

if consolidated is None:
consolidated = False

if chunk_store:
open_kwargs["chunk_store"] = chunk_store
Expand Down Expand Up @@ -440,6 +458,12 @@ def open_store_variable(self, name, zarr_array):
zarr_array, DIMENSION_KEY, try_nczarr
)
attributes = dict(attributes)

# TODO: how to properly handle 'filters' for v3 stores
jhamman marked this conversation as resolved.
Show resolved Hide resolved
# currently these use a hack to store 'filters' within attributes
# need to drop this here for V3 store tests to succeed
attributes.pop("filters", None)
jhamman marked this conversation as resolved.
Show resolved Hide resolved

encoding = {
"chunks": zarr_array.chunks,
"preferred_chunks": dict(zip(dimensions, zarr_array.chunks)),
Expand Down Expand Up @@ -668,6 +692,7 @@ def open_zarr(
storage_options=None,
decode_timedelta=None,
use_cftime=None,
zarr_version=None,
**kwargs,
):
"""Load and decode a dataset from a Zarr store.
Expand Down Expand Up @@ -725,6 +750,9 @@ def open_zarr(
capability. Only works for stores that have already been consolidated.
By default (`consolidate=None`), attempts to read consolidated metadata,
falling back to read non-consolidated metadata if that fails.

When the experimental ``zarr_version=3``, ``consolidated`` must be
either be ``None`` or ``False``.
chunk_store : MutableMapping, optional
A separate Zarr store only for chunk data.
storage_options : dict, optional
Expand All @@ -745,6 +773,10 @@ def open_zarr(
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error.
zarr_version : int or None, optional
The desired zarr spec version to target (currently 2 or 3). The default
of None will attempt to determine the zarr version from ``store`` when
possible, otherwise defaulting to 2.

Returns
-------
Expand Down Expand Up @@ -782,6 +814,7 @@ def open_zarr(
"chunk_store": chunk_store,
"storage_options": storage_options,
"stacklevel": 4,
"zarr_version": zarr_version,
}

ds = open_dataset(
Expand All @@ -798,6 +831,7 @@ def open_zarr(
backend_kwargs=backend_kwargs,
decode_timedelta=decode_timedelta,
use_cftime=use_cftime,
zarr_version=zarr_version,
)
return ds

Expand Down Expand Up @@ -842,6 +876,7 @@ def open_dataset(
chunk_store=None,
storage_options=None,
stacklevel=3,
zarr_version: int | None = None,
):

filename_or_obj = _normalize_path(filename_or_obj)
Expand All @@ -855,6 +890,7 @@ def open_dataset(
chunk_store=chunk_store,
storage_options=storage_options,
stacklevel=stacklevel + 1,
zarr_version=zarr_version,
)

store_entrypoint = StoreBackendEntrypoint()
Expand Down
10 changes: 10 additions & 0 deletions xarray/core/dataset.py
Expand Up @@ -1930,6 +1930,7 @@ def to_zarr(
region: Mapping[str, slice] | None = None,
safe_chunks: bool = True,
storage_options: dict[str, str] | None = None,
zarr_version: int | None = None,
) -> ZarrStore:
...

Expand Down Expand Up @@ -1967,6 +1968,7 @@ def to_zarr(
region: Mapping[str, slice] | None = None,
safe_chunks: bool = True,
storage_options: dict[str, str] | None = None,
zarr_version: int | None = None,
) -> ZarrStore | Delayed:
"""Write dataset contents to a zarr group.

Expand Down Expand Up @@ -2017,6 +2019,9 @@ def to_zarr(
metadata; if False, do not. The default (`consolidated=None`) means
write consolidated metadata and attempt to read consolidated
metadata for existing stores (falling back to non-consolidated).

When the experimental ``zarr_version=3``, ``consolidated`` must be
either be ``None`` or ``False``.
append_dim : hashable, optional
If set, the dimension along which the data will be appended. All
other dimensions on overridden variables must remain the same size.
Expand Down Expand Up @@ -2048,6 +2053,10 @@ def to_zarr(
storage_options : dict, optional
Any additional parameters for the storage backend (ignored for local
paths).
zarr_version : int or None, optional
The desired zarr spec version to target (currently 2 or 3). The
default of None will attempt to determine the zarr version from
``store`` when possible, otherwise defaulting to 2.

Returns
-------
Expand Down Expand Up @@ -2092,6 +2101,7 @@ def to_zarr(
append_dim=append_dim,
region=region,
safe_chunks=safe_chunks,
zarr_version=zarr_version,
)

def __repr__(self) -> str:
Expand Down