From 91cb3e80a5157bb3115cf66e6085a81dd9e2e71b Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 9 Sep 2022 09:36:28 +0200 Subject: [PATCH 1/3] Use Zarr v2.13.0a2 which include https://github.com/zarr-developers/zarr-python/pull/934 --- python/benchmarks/single-node-io.py | 28 +++++++++++++--------------- python/tests/test_benchmarks.py | 13 ++++--------- python/tests/test_zarr.py | 24 ++++++++---------------- 3 files changed, 25 insertions(+), 40 deletions(-) diff --git a/python/benchmarks/single-node-io.py b/python/benchmarks/single-node-io.py index f8255bec46..4b56090f77 100644 --- a/python/benchmarks/single-node-io.py +++ b/python/benchmarks/single-node-io.py @@ -3,7 +3,6 @@ import argparse import contextlib -import functools import os import os.path import pathlib @@ -193,30 +192,30 @@ def run_posix(args): return read_time, write_time -def run_zarr(store_type, args): +def run_zarr(args): """Use the Zarr API""" import zarr - import zarr.cupy import kvikio.zarr + dir_path = args.dir / "zarr" + + if not hasattr(zarr.Array, "meta_array"): + RuntimeError("Requires Zarr v2.13.0+ for CuPy support") + a = cupy.arange(args.nbytes, dtype="uint8") - # Retrieve the store and compressor to use based on `store_type` - shutil.rmtree(str(args.dir / store_type), ignore_errors=True) - store, compressor = { - "gds": (kvikio.zarr.GDSStore(args.dir / store_type), None), - "posix": ( - zarr.DirectoryStore(args.dir / store_type), - zarr.cupy.CuPyCPUCompressor(), - ), - }[store_type] + shutil.rmtree(str(dir_path), ignore_errors=True) # Write t0 = clock() z = zarr.array( - a, chunks=False, compressor=compressor, store=store, meta_array=cupy.empty(()) + a, + chunks=False, + compressor=None, + store=kvikio.zarr.GDSStore(dir_path), + meta_array=cupy.empty(()), ) write_time = clock() - t0 @@ -231,8 +230,7 @@ def run_zarr(store_type, args): API = { "cufile": run_cufile, - "zarr-gds": functools.partial(run_zarr, "gds"), - "zarr-posix": functools.partial(run_zarr, "posix"), + "zarr": run_zarr, "posix": run_posix, "cufile-mfma": run_cufile_multiple_files_multiple_arrays, "cufile-mf": run_cufile_multiple_files, diff --git a/python/tests/test_benchmarks.py b/python/tests/test_benchmarks.py index 772aa3f92b..21ebb2f737 100644 --- a/python/tests/test_benchmarks.py +++ b/python/tests/test_benchmarks.py @@ -21,21 +21,16 @@ "cufile-mfma", "cufile-mf", "cufile-ma", - "zarr-gds", - "zarr-posix", + "zarr", ], ) def test_single_node_io(run_cmd, tmp_path, api): """Test benchmarks/single-node-io.py""" if "zarr" in api: - pytest.importorskip( - "zarr.cupy", - reason=( - "To use Zarr arrays with GDS directly, Zarr needs CuPy support: " - "" - ), - ) + zarr = pytest.importorskip("zarr") + if not hasattr(zarr.Array, "meta_array"): + pytest.skip("Requires Zarr v2.13.0+ for CuPy support") retcode = run_cmd( cmd=[ diff --git a/python/tests/test_zarr.py b/python/tests/test_zarr.py index 6907e3be08..757c2bd22a 100644 --- a/python/tests/test_zarr.py +++ b/python/tests/test_zarr.py @@ -9,6 +9,12 @@ GDSStore = pytest.importorskip("kvikio.zarr").GDSStore +cupy_support = pytest.mark.skipif( + not hasattr(zarr.Array, "meta_array"), + reason="Requires Zarr v2.13.0+ for CuPy support", +) + + @pytest.fixture def store(tmp_path): """Fixture that creates a GDS Store""" @@ -29,17 +35,10 @@ def test_direct_store_access(store, array_type): cupy.testing.assert_array_equal(a, b) +@cupy_support def test_array(store): """Test Zarr array""" - pytest.importorskip( - "zarr.cupy", - reason=( - "To use Zarr arrays with GDS directly, Zarr needs CuPy support: " - "" - ), - ) - a = cupy.arange(100) z = zarr.array( a, chunks=10, compressor=None, store=store, meta_array=cupy.empty(()) @@ -50,17 +49,10 @@ def test_array(store): cupy.testing.assert_array_equal(a, z[:]) +@cupy_support def test_group(store): """Test Zarr group""" - pytest.importorskip( - "zarr.cupy", - reason=( - "To use Zarr arrays with GDS directly, Zarr needs CuPy support: " - "" - ), - ) - g = zarr.open_group(store, meta_array=cupy.empty(())) g.ones("data", shape=(10, 11), dtype=int, compressor=None) a = g["data"] From a4703df2da016166308829c5861c833a08ed3393 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 9 Sep 2022 09:59:50 +0200 Subject: [PATCH 2/3] Overwriting DirectoryStore.__setitem__ is not needed anymore --- python/kvikio/zarr.py | 60 ------------------------------------------- 1 file changed, 60 deletions(-) diff --git a/python/kvikio/zarr.py b/python/kvikio/zarr.py index 453609cd25..252237d3f9 100644 --- a/python/kvikio/zarr.py +++ b/python/kvikio/zarr.py @@ -1,15 +1,11 @@ # Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. # See file LICENSE for terms. -import errno import os import os.path -import shutil -import uuid import cupy import zarr.storage -from zarr.util import retry_call import kvikio from kvikio._lib.arr import asarray @@ -55,59 +51,3 @@ def _tofile(self, a, fn): assert written == a.nbytes else: super()._tofile(a.obj, fn) - - def __setitem__(self, key, value): - """ - We have to overwrite this because `DirectoryStore.__setitem__` - converts `value` to a NumPy array always - """ - key = self._normalize_key(key) - - # coerce to flat, contiguous buffer (ideally without copying) - arr = asarray(value) - if arr.contiguous: - value = arr - else: - if arr.cuda: - # value = cupy.ascontiguousarray(value) - value = arr.reshape(-1, order="A") - else: - # can flatten without copy - value = arr.reshape(-1, order="A") - - # destination path for key - file_path = os.path.join(self.path, key) - - # ensure there is no directory in the way - if os.path.isdir(file_path): - shutil.rmtree(file_path) - - # ensure containing directory exists - dir_path, file_name = os.path.split(file_path) - if os.path.isfile(dir_path): - raise KeyError(key) - if not os.path.exists(dir_path): - try: - os.makedirs(dir_path) - except OSError as e: - if e.errno != errno.EEXIST: - raise KeyError(key) - - # write to temporary file - # note we're not using tempfile.NamedTemporaryFile to avoid - # restrictive file permissions - temp_name = file_name + "." + uuid.uuid4().hex + ".partial" - temp_path = os.path.join(dir_path, temp_name) - try: - self._tofile(value, temp_path) - - # move temporary file into place; - # make several attempts at writing the temporary file to get past - # potential antivirus file locking issues - retry_call( - os.replace, (temp_path, file_path), exceptions=(PermissionError,) - ) - finally: - # clean up if temp file still exists for whatever reason - if os.path.exists(temp_path): # pragma: no cover - os.remove(temp_path) From 59703b4860db8bc25d1c04c8258f78fac4d75d9a Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 9 Sep 2022 12:39:08 +0200 Subject: [PATCH 3/3] requires Zarr v2.13+ --- python/benchmarks/single-node-io.py | 2 +- python/tests/test_benchmarks.py | 2 +- python/tests/test_zarr.py | 11 ++++------- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/python/benchmarks/single-node-io.py b/python/benchmarks/single-node-io.py index 4b56090f77..0492100216 100644 --- a/python/benchmarks/single-node-io.py +++ b/python/benchmarks/single-node-io.py @@ -202,7 +202,7 @@ def run_zarr(args): dir_path = args.dir / "zarr" if not hasattr(zarr.Array, "meta_array"): - RuntimeError("Requires Zarr v2.13.0+ for CuPy support") + raise RuntimeError("requires Zarr v2.13+") a = cupy.arange(args.nbytes, dtype="uint8") diff --git a/python/tests/test_benchmarks.py b/python/tests/test_benchmarks.py index 21ebb2f737..c888fdbd19 100644 --- a/python/tests/test_benchmarks.py +++ b/python/tests/test_benchmarks.py @@ -30,7 +30,7 @@ def test_single_node_io(run_cmd, tmp_path, api): if "zarr" in api: zarr = pytest.importorskip("zarr") if not hasattr(zarr.Array, "meta_array"): - pytest.skip("Requires Zarr v2.13.0+ for CuPy support") + pytest.skip("requires Zarr v2.13+") retcode = run_cmd( cmd=[ diff --git a/python/tests/test_zarr.py b/python/tests/test_zarr.py index 757c2bd22a..8e1a027430 100644 --- a/python/tests/test_zarr.py +++ b/python/tests/test_zarr.py @@ -8,11 +8,10 @@ zarr = pytest.importorskip("zarr") GDSStore = pytest.importorskip("kvikio.zarr").GDSStore - -cupy_support = pytest.mark.skipif( - not hasattr(zarr.Array, "meta_array"), - reason="Requires Zarr v2.13.0+ for CuPy support", -) +# To support CuPy arrays, we need the `meta_array` argument introduced in +# Zarr v2.13, see +if not hasattr(zarr.Array, "meta_array"): + pytest.skip("requires Zarr v2.13+", allow_module_level=True) @pytest.fixture @@ -35,7 +34,6 @@ def test_direct_store_access(store, array_type): cupy.testing.assert_array_equal(a, b) -@cupy_support def test_array(store): """Test Zarr array""" @@ -49,7 +47,6 @@ def test_array(store): cupy.testing.assert_array_equal(a, z[:]) -@cupy_support def test_group(store): """Test Zarr group"""