-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Description
What happened?
I think that something from
https://docs.dask.org/en/stable/_modules/dask/array/core.html#normalize_chunks
should be copied in so that arrays that support
def chunks(self) -> tuple[int]:
return self.shape
are supported.
I get tracebacks that look like
In [3]: dataset.to_zarr('z.zarr')
-------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[3], line 1
----> 1 dataset.to_zarr('z.zarr')
File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/core/dataset.py:2371, in Dataset.to_zarr(self, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region, safe_chunks, align_chunks, storage_options, zarr_version, zarr_format, write_empty_chunks, chunkmanager_store_kwargs)
2188 """Write dataset contents to a zarr group.
2189
2190 Zarr chunks are determined in the following way:
(...) 2367 The I/O user guide, with more details and examples.
2368 """
2369 from xarray.backends.writers import to_zarr
-> 2371 return to_zarr( # type: ignore[call-overload,misc]
2372 self,
2373 store=store,
2374 chunk_store=chunk_store,
2375 storage_options=storage_options,
2376 mode=mode,
2377 synchronizer=synchronizer,
2378 group=group,
2379 encoding=encoding,
2380 compute=compute,
2381 consolidated=consolidated,
2382 append_dim=append_dim,
2383 region=region,
2384 safe_chunks=safe_chunks,
2385 align_chunks=align_chunks,
2386 zarr_version=zarr_version,
2387 zarr_format=zarr_format,
2388 write_empty_chunks=write_empty_chunks,
2389 chunkmanager_store_kwargs=chunkmanager_store_kwargs,
2390 )
File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/writers.py:797, in to_zarr(dataset, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region, safe_chunks, align_chunks, storage_options, zarr_version, zarr_format, write_empty_chunks, chunkmanager_store_kwargs)
795 # TODO: figure out how to properly handle unlimited_dims
796 try:
--> 797 dump_to_store(dataset, zstore, writer, encoding=encoding)
798 writes = writer.sync(
799 compute=compute, chunkmanager_store_kwargs=chunkmanager_store_kwargs
800 )
801 finally:
File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/writers.py:491, in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
488 if encoder:
489 variables, attrs = encoder(variables, attrs)
--> 491 store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)
File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/zarr.py:1056, in ZarrStore.store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
1053 else:
1054 variables_to_set = variables_encoded
-> 1056 self.set_variables(
1057 variables_to_set, check_encoding_set, writer, unlimited_dims=unlimited_dims
1058 )
1059 if self._consolidate_on_close:
1060 kwargs = {}
File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/zarr.py:1231, in ZarrStore.set_variables(self, variables, check_encoding_set, writer, unlimited_dims)
1223 region = tuple(write_region[dim] for dim in dims)
1225 # We need to do this for both new and existing variables to ensure we're not
1226 # writing to a partial chunk, even though we don't use the `encoding` value
1227 # when writing to an existing variable. See
1228 # https://github.com/pydata/xarray/issues/8371 for details.
1229 # Note: Ideally there should be two functions, one for validating the chunks and
1230 # another one for extracting the encoding.
-> 1231 encoding = extract_zarr_variable_encoding(
1232 v,
1233 raise_on_invalid=vn in check_encoding_set,
1234 name=vn,
1235 zarr_format=3 if is_zarr_v3_format else 2,
1236 )
1238 if self._align_chunks and isinstance(encoding["chunks"], tuple):
1239 v = grid_rechunk(
1240 v=v,
1241 enc_chunks=encoding["chunks"],
1242 region=region,
1243 )
File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/zarr.py:473, in extract_zarr_variable_encoding(variable, raise_on_invalid, name, zarr_format)
470 if k not in valid_encodings:
471 del encoding[k]
--> 473 chunks = _determine_zarr_chunks(
474 enc_chunks=encoding.get("chunks"),
475 var_chunks=variable.chunks,
476 ndim=variable.ndim,
477 name=name,
478 )
479 if _zarr_v3() and chunks is None:
480 chunks = "auto"
File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/zarr.py:304, in _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name)
297 # if there are no chunks in encoding but there are dask chunks, we try to
298 # use the same chunks in zarr
299 # However, zarr chunks needs to be uniform for each array
300 # https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#chunks
301 # while dask chunks can be variable sized
302 # https://dask.pydata.org/en/latest/array-design.html#chunks
303 if var_chunks and not enc_chunks:
--> 304 if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks):
305 raise ValueError(
306 "Zarr requires uniform chunk sizes except for final chunk. "
307 f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. "
308 "Consider rechunking using `chunk()`."
309 )
310 if any((chunks[0] < chunks[-1]) for chunks in var_chunks):
File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/zarr.py:304, in <genexpr>(.0)
297 # if there are no chunks in encoding but there are dask chunks, we try to
298 # use the same chunks in zarr
299 # However, zarr chunks needs to be uniform for each array
300 # https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#chunks
301 # while dask chunks can be variable sized
302 # https://dask.pydata.org/en/latest/array-design.html#chunks
303 if var_chunks and not enc_chunks:
--> 304 if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks):
305 raise ValueError(
306 "Zarr requires uniform chunk sizes except for final chunk. "
307 f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. "
308 "Consider rechunking using `chunk()`."
309 )
310 if any((chunks[0] < chunks[-1]) for chunks in var_chunks):
TypeError: 'int' object is not subscriptable
What did you expect to happen?
for it to treat my chunks nicely.
Minimal Complete Verifiable Example
import numpy as np
import xarray as xr
from xarray.namedarray.pycompat import is_chunked_array
from xarray.tests import assert_identical, requires_zarr
class SimpleChunkedArray(np.ndarray):
"""
A custom array-like structure that exposes chunks as a simple tuple
instead of a tuple of tuples.
"""
def __new__(cls, input_array, chunks=None):
obj = np.asarray(input_array).view(cls)
obj._chunks = chunks
return obj
@property
def chunks(self):
return self._chunks
@requires_zarr
def test_zarr_with_simple_chunks_array_class(tmp_path):
arr = np.arange(250).reshape(10, 25)
simple_chunked_arr = SimpleChunkedArray(arr, chunks=(2, 5))
assert simple_chunked_arr.chunks == (2, 5)
assert is_chunked_array(simple_chunked_arr)
ds = xr.Dataset({"test_var": (("x", "y"), simple_chunked_arr)})
assert ds["test_var"].variable.chunks == (2, 5)
zarr_path = tmp_path / "test.zarr"
ds.to_zarr(zarr_path)
with xr.open_zarr(zarr_path) as loaded:
assert_identical(ds.load(), loaded.load())
assert ds["test_var"].variable.chunks == (2, 5)
MVCE confirmation
- Minimal example — the example is as focused as reasonably possible to demonstrate the underlying issue in xarray.
- Complete example — the example is self-contained, including all data and the text of any traceback.
- Verifiable example — the example copy & pastes into an IPython prompt or Binder notebook, returning the result.
- New issue — a search of GitHub Issues suggests this is not a duplicate.
- Recent environment — the issue occurs with the latest version of xarray and its dependencies.
Anything else we need to know?
Happy to provide a PR if "normalizing chunks" is something we want to support.
Environment
xr.show_versions()
INSTALLED VERSIONS
commit: eb01d9c
python: 3.12.12
python-bits: 64
OS: Linux
OS-release: 6.17.0-6-generic
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: ('en_US', 'UTF-8')
libhdf5: 1.14.6
libnetcdf: 4.9.3
xarray: 0.0.0
pandas: 2.3.3
numpy: 2.3.4
scipy: 1.16.3
netCDF4: 1.7.3
pydap: None
h5netcdf: 1.7.3
h5py: 3.15.1
zarr: 3.1.3
cftime: 1.6.4
nc_time_axis: None
iris: None
bottleneck: None
dask: 2025.11.0
distributed: None
matplotlib: 3.10.8
cartopy: None
seaborn: None
numbagg: None
fsspec: 2025.10.0
cupy: None
pint: None
sparse: None
flox: None
numpy_groupies: None
setuptools: 80.9.0
pip: 25.3
conda: None
pytest: 9.0.1
mypy: None
IPython: 9.7.0
sphinx: 8.2.3