diff --git a/python/benchmarks/single-node-io.py b/python/benchmarks/single-node-io.py index f8255bec46..0492100216 100644 --- a/python/benchmarks/single-node-io.py +++ b/python/benchmarks/single-node-io.py @@ -3,7 +3,6 @@ import argparse import contextlib -import functools import os import os.path import pathlib @@ -193,30 +192,30 @@ def run_posix(args): return read_time, write_time -def run_zarr(store_type, args): +def run_zarr(args): """Use the Zarr API""" import zarr - import zarr.cupy import kvikio.zarr + dir_path = args.dir / "zarr" + + if not hasattr(zarr.Array, "meta_array"): + raise RuntimeError("requires Zarr v2.13+") + a = cupy.arange(args.nbytes, dtype="uint8") - # Retrieve the store and compressor to use based on `store_type` - shutil.rmtree(str(args.dir / store_type), ignore_errors=True) - store, compressor = { - "gds": (kvikio.zarr.GDSStore(args.dir / store_type), None), - "posix": ( - zarr.DirectoryStore(args.dir / store_type), - zarr.cupy.CuPyCPUCompressor(), - ), - }[store_type] + shutil.rmtree(str(dir_path), ignore_errors=True) # Write t0 = clock() z = zarr.array( - a, chunks=False, compressor=compressor, store=store, meta_array=cupy.empty(()) + a, + chunks=False, + compressor=None, + store=kvikio.zarr.GDSStore(dir_path), + meta_array=cupy.empty(()), ) write_time = clock() - t0 @@ -231,8 +230,7 @@ def run_zarr(store_type, args): API = { "cufile": run_cufile, - "zarr-gds": functools.partial(run_zarr, "gds"), - "zarr-posix": functools.partial(run_zarr, "posix"), + "zarr": run_zarr, "posix": run_posix, "cufile-mfma": run_cufile_multiple_files_multiple_arrays, "cufile-mf": run_cufile_multiple_files, diff --git a/python/kvikio/zarr.py b/python/kvikio/zarr.py index 453609cd25..252237d3f9 100644 --- a/python/kvikio/zarr.py +++ b/python/kvikio/zarr.py @@ -1,15 +1,11 @@ # Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. # See file LICENSE for terms. -import errno import os import os.path -import shutil -import uuid import cupy import zarr.storage -from zarr.util import retry_call import kvikio from kvikio._lib.arr import asarray @@ -55,59 +51,3 @@ def _tofile(self, a, fn): assert written == a.nbytes else: super()._tofile(a.obj, fn) - - def __setitem__(self, key, value): - """ - We have to overwrite this because `DirectoryStore.__setitem__` - converts `value` to a NumPy array always - """ - key = self._normalize_key(key) - - # coerce to flat, contiguous buffer (ideally without copying) - arr = asarray(value) - if arr.contiguous: - value = arr - else: - if arr.cuda: - # value = cupy.ascontiguousarray(value) - value = arr.reshape(-1, order="A") - else: - # can flatten without copy - value = arr.reshape(-1, order="A") - - # destination path for key - file_path = os.path.join(self.path, key) - - # ensure there is no directory in the way - if os.path.isdir(file_path): - shutil.rmtree(file_path) - - # ensure containing directory exists - dir_path, file_name = os.path.split(file_path) - if os.path.isfile(dir_path): - raise KeyError(key) - if not os.path.exists(dir_path): - try: - os.makedirs(dir_path) - except OSError as e: - if e.errno != errno.EEXIST: - raise KeyError(key) - - # write to temporary file - # note we're not using tempfile.NamedTemporaryFile to avoid - # restrictive file permissions - temp_name = file_name + "." + uuid.uuid4().hex + ".partial" - temp_path = os.path.join(dir_path, temp_name) - try: - self._tofile(value, temp_path) - - # move temporary file into place; - # make several attempts at writing the temporary file to get past - # potential antivirus file locking issues - retry_call( - os.replace, (temp_path, file_path), exceptions=(PermissionError,) - ) - finally: - # clean up if temp file still exists for whatever reason - if os.path.exists(temp_path): # pragma: no cover - os.remove(temp_path) diff --git a/python/tests/test_benchmarks.py b/python/tests/test_benchmarks.py index 772aa3f92b..c888fdbd19 100644 --- a/python/tests/test_benchmarks.py +++ b/python/tests/test_benchmarks.py @@ -21,21 +21,16 @@ "cufile-mfma", "cufile-mf", "cufile-ma", - "zarr-gds", - "zarr-posix", + "zarr", ], ) def test_single_node_io(run_cmd, tmp_path, api): """Test benchmarks/single-node-io.py""" if "zarr" in api: - pytest.importorskip( - "zarr.cupy", - reason=( - "To use Zarr arrays with GDS directly, Zarr needs CuPy support: " - "" - ), - ) + zarr = pytest.importorskip("zarr") + if not hasattr(zarr.Array, "meta_array"): + pytest.skip("requires Zarr v2.13+") retcode = run_cmd( cmd=[ diff --git a/python/tests/test_zarr.py b/python/tests/test_zarr.py index 6907e3be08..8e1a027430 100644 --- a/python/tests/test_zarr.py +++ b/python/tests/test_zarr.py @@ -8,6 +8,11 @@ zarr = pytest.importorskip("zarr") GDSStore = pytest.importorskip("kvikio.zarr").GDSStore +# To support CuPy arrays, we need the `meta_array` argument introduced in +# Zarr v2.13, see +if not hasattr(zarr.Array, "meta_array"): + pytest.skip("requires Zarr v2.13+", allow_module_level=True) + @pytest.fixture def store(tmp_path): @@ -32,14 +37,6 @@ def test_direct_store_access(store, array_type): def test_array(store): """Test Zarr array""" - pytest.importorskip( - "zarr.cupy", - reason=( - "To use Zarr arrays with GDS directly, Zarr needs CuPy support: " - "" - ), - ) - a = cupy.arange(100) z = zarr.array( a, chunks=10, compressor=None, store=store, meta_array=cupy.empty(()) @@ -53,14 +50,6 @@ def test_array(store): def test_group(store): """Test Zarr group""" - pytest.importorskip( - "zarr.cupy", - reason=( - "To use Zarr arrays with GDS directly, Zarr needs CuPy support: " - "" - ), - ) - g = zarr.open_group(store, meta_array=cupy.empty(())) g.ones("data", shape=(10, 11), dtype=int, compressor=None) a = g["data"]