Skip to content

zarr writing doesn't support simple chunks that are Tuple[int] #10936

@hmaarrfk

Description

@hmaarrfk

What happened?

I think that something from

https://docs.dask.org/en/stable/_modules/dask/array/core.html#normalize_chunks

should be copied in so that arrays that support

def chunks(self) -> tuple[int]:
    return self.shape

are supported.

I get tracebacks that look like

In [3]: dataset.to_zarr('z.zarr')
-------------------------------------------------------------------------
TypeError                               Traceback (most recent call last)
Cell In[3], line 1
----> 1 dataset.to_zarr('z.zarr')

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/core/dataset.py:2371, in Dataset.to_zarr(self, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region, safe_chunks, align_chunks, storage_options, zarr_version, zarr_format, write_empty_chunks, chunkmanager_store_kwargs)
   2188 """Write dataset contents to a zarr group.
   2189
   2190 Zarr chunks are determined in the following way:
   (...)   2367     The I/O user guide, with more details and examples.
   2368 """
   2369 from xarray.backends.writers import to_zarr
-> 2371 return to_zarr(  # type: ignore[call-overload,misc]
   2372     self,
   2373     store=store,
   2374     chunk_store=chunk_store,
   2375     storage_options=storage_options,
   2376     mode=mode,
   2377     synchronizer=synchronizer,
   2378     group=group,
   2379     encoding=encoding,
   2380     compute=compute,
   2381     consolidated=consolidated,
   2382     append_dim=append_dim,
   2383     region=region,
   2384     safe_chunks=safe_chunks,
   2385     align_chunks=align_chunks,
   2386     zarr_version=zarr_version,
   2387     zarr_format=zarr_format,
   2388     write_empty_chunks=write_empty_chunks,
   2389     chunkmanager_store_kwargs=chunkmanager_store_kwargs,
   2390 )

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/writers.py:797, in to_zarr(dataset, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region, safe_chunks, align_chunks, storage_options, zarr_version, zarr_format, write_empty_chunks, chunkmanager_store_kwargs)
    795 # TODO: figure out how to properly handle unlimited_dims
    796 try:
--> 797     dump_to_store(dataset, zstore, writer, encoding=encoding)
    798     writes = writer.sync(
    799         compute=compute, chunkmanager_store_kwargs=chunkmanager_store_kwargs
    800     )
    801 finally:

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/writers.py:491, in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
    488 if encoder:
    489     variables, attrs = encoder(variables, attrs)
--> 491 store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/zarr.py:1056, in ZarrStore.store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
   1053 else:
   1054     variables_to_set = variables_encoded
-> 1056 self.set_variables(
   1057     variables_to_set, check_encoding_set, writer, unlimited_dims=unlimited_dims
   1058 )
   1059 if self._consolidate_on_close:
   1060     kwargs = {}

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/zarr.py:1231, in ZarrStore.set_variables(self, variables, check_encoding_set, writer, unlimited_dims)
   1223 region = tuple(write_region[dim] for dim in dims)
   1225 # We need to do this for both new and existing variables to ensure we're not
   1226 # writing to a partial chunk, even though we don't use the `encoding` value
   1227 # when writing to an existing variable. See
   1228 # https://github.com/pydata/xarray/issues/8371 for details.
   1229 # Note: Ideally there should be two functions, one for validating the chunks and
   1230 # another one for extracting the encoding.
-> 1231 encoding = extract_zarr_variable_encoding(
   1232     v,
   1233     raise_on_invalid=vn in check_encoding_set,
   1234     name=vn,
   1235     zarr_format=3 if is_zarr_v3_format else 2,
   1236 )
   1238 if self._align_chunks and isinstance(encoding["chunks"], tuple):
   1239     v = grid_rechunk(
   1240         v=v,
   1241         enc_chunks=encoding["chunks"],
   1242         region=region,
   1243     )

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/zarr.py:473, in extract_zarr_variable_encoding(variable, raise_on_invalid, name, zarr_format)
    470         if k not in valid_encodings:
    471             del encoding[k]
--> 473 chunks = _determine_zarr_chunks(
    474     enc_chunks=encoding.get("chunks"),
    475     var_chunks=variable.chunks,
    476     ndim=variable.ndim,
    477     name=name,
    478 )
    479 if _zarr_v3() and chunks is None:
    480     chunks = "auto"

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/zarr.py:304, in _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name)
    297 # if there are no chunks in encoding but there are dask chunks, we try to
    298 # use the same chunks in zarr
    299 # However, zarr chunks needs to be uniform for each array
    300 # https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#chunks
    301 # while dask chunks can be variable sized
    302 # https://dask.pydata.org/en/latest/array-design.html#chunks
    303 if var_chunks and not enc_chunks:
--> 304     if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks):
    305         raise ValueError(
    306             "Zarr requires uniform chunk sizes except for final chunk. "
    307             f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. "
    308             "Consider rechunking using `chunk()`."
    309         )
    310     if any((chunks[0] < chunks[-1]) for chunks in var_chunks):

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/zarr.py:304, in <genexpr>(.0)
    297 # if there are no chunks in encoding but there are dask chunks, we try to
    298 # use the same chunks in zarr
    299 # However, zarr chunks needs to be uniform for each array
    300 # https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#chunks
    301 # while dask chunks can be variable sized
    302 # https://dask.pydata.org/en/latest/array-design.html#chunks
    303 if var_chunks and not enc_chunks:
--> 304     if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks):
    305         raise ValueError(
    306             "Zarr requires uniform chunk sizes except for final chunk. "
    307             f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. "
    308             "Consider rechunking using `chunk()`."
    309         )
    310     if any((chunks[0] < chunks[-1]) for chunks in var_chunks):

TypeError: 'int' object is not subscriptable

What did you expect to happen?

for it to treat my chunks nicely.

Minimal Complete Verifiable Example

import numpy as np

import xarray as xr
from xarray.namedarray.pycompat import is_chunked_array
from xarray.tests import assert_identical, requires_zarr


class SimpleChunkedArray(np.ndarray):
    """
    A custom array-like structure that exposes chunks as a simple tuple
    instead of a tuple of tuples.
    """
    def __new__(cls, input_array, chunks=None):
        obj = np.asarray(input_array).view(cls)
        obj._chunks = chunks
        return obj

    @property
    def chunks(self):
        return self._chunks


@requires_zarr
def test_zarr_with_simple_chunks_array_class(tmp_path):
    arr = np.arange(250).reshape(10, 25)

    simple_chunked_arr = SimpleChunkedArray(arr, chunks=(2, 5))

    assert simple_chunked_arr.chunks == (2, 5)
    assert is_chunked_array(simple_chunked_arr)

    ds = xr.Dataset({"test_var": (("x", "y"), simple_chunked_arr)})

    assert ds["test_var"].variable.chunks == (2, 5)

    zarr_path = tmp_path / "test.zarr"

    ds.to_zarr(zarr_path)

    with xr.open_zarr(zarr_path) as loaded:
        assert_identical(ds.load(), loaded.load())
        assert ds["test_var"].variable.chunks == (2, 5)

MVCE confirmation

  • Minimal example — the example is as focused as reasonably possible to demonstrate the underlying issue in xarray.
  • Complete example — the example is self-contained, including all data and the text of any traceback.
  • Verifiable example — the example copy & pastes into an IPython prompt or Binder notebook, returning the result.
  • New issue — a search of GitHub Issues suggests this is not a duplicate.
  • Recent environment — the issue occurs with the latest version of xarray and its dependencies.

Anything else we need to know?

Happy to provide a PR if "normalizing chunks" is something we want to support.

Environment

xr.show_versions()

INSTALLED VERSIONS

commit: eb01d9c
python: 3.12.12
python-bits: 64
OS: Linux
OS-release: 6.17.0-6-generic
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: ('en_US', 'UTF-8')
libhdf5: 1.14.6
libnetcdf: 4.9.3

xarray: 0.0.0
pandas: 2.3.3
numpy: 2.3.4
scipy: 1.16.3
netCDF4: 1.7.3
pydap: None
h5netcdf: 1.7.3
h5py: 3.15.1
zarr: 3.1.3
cftime: 1.6.4
nc_time_axis: None
iris: None
bottleneck: None
dask: 2025.11.0
distributed: None
matplotlib: 3.10.8
cartopy: None
seaborn: None
numbagg: None
fsspec: 2025.10.0
cupy: None
pint: None
sparse: None
flox: None
numpy_groupies: None
setuptools: 80.9.0
pip: 25.3
conda: None
pytest: 9.0.1
mypy: None
IPython: 9.7.0
sphinx: 8.2.3

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugneeds triageIssue that has not been reviewed by xarray team member

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions