Skip to content

Commit

Permalink
Use Zarr v2.13.0a2 (#129)
Browse files Browse the repository at this point in the history
which include zarr-developers/zarr-python#934

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)

Approvers:
  - Benjamin Zaitlen (https://github.com/quasiben)

URL: #129
  • Loading branch information
madsbk committed Sep 12, 2022
1 parent f8f4063 commit 0ad73c8
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 100 deletions.
28 changes: 13 additions & 15 deletions python/benchmarks/single-node-io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import argparse
import contextlib
import functools
import os
import os.path
import pathlib
Expand Down Expand Up @@ -193,30 +192,30 @@ def run_posix(args):
return read_time, write_time


def run_zarr(store_type, args):
def run_zarr(args):
"""Use the Zarr API"""

import zarr
import zarr.cupy

import kvikio.zarr

dir_path = args.dir / "zarr"

if not hasattr(zarr.Array, "meta_array"):
raise RuntimeError("requires Zarr v2.13+")

a = cupy.arange(args.nbytes, dtype="uint8")

# Retrieve the store and compressor to use based on `store_type`
shutil.rmtree(str(args.dir / store_type), ignore_errors=True)
store, compressor = {
"gds": (kvikio.zarr.GDSStore(args.dir / store_type), None),
"posix": (
zarr.DirectoryStore(args.dir / store_type),
zarr.cupy.CuPyCPUCompressor(),
),
}[store_type]
shutil.rmtree(str(dir_path), ignore_errors=True)

# Write
t0 = clock()
z = zarr.array(
a, chunks=False, compressor=compressor, store=store, meta_array=cupy.empty(())
a,
chunks=False,
compressor=None,
store=kvikio.zarr.GDSStore(dir_path),
meta_array=cupy.empty(()),
)
write_time = clock() - t0

Expand All @@ -231,8 +230,7 @@ def run_zarr(store_type, args):

API = {
"cufile": run_cufile,
"zarr-gds": functools.partial(run_zarr, "gds"),
"zarr-posix": functools.partial(run_zarr, "posix"),
"zarr": run_zarr,
"posix": run_posix,
"cufile-mfma": run_cufile_multiple_files_multiple_arrays,
"cufile-mf": run_cufile_multiple_files,
Expand Down
60 changes: 0 additions & 60 deletions python/kvikio/zarr.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
# See file LICENSE for terms.

import errno
import os
import os.path
import shutil
import uuid

import cupy
import zarr.storage
from zarr.util import retry_call

import kvikio
from kvikio._lib.arr import asarray
Expand Down Expand Up @@ -55,59 +51,3 @@ def _tofile(self, a, fn):
assert written == a.nbytes
else:
super()._tofile(a.obj, fn)

def __setitem__(self, key, value):
"""
We have to overwrite this because `DirectoryStore.__setitem__`
converts `value` to a NumPy array always
"""
key = self._normalize_key(key)

# coerce to flat, contiguous buffer (ideally without copying)
arr = asarray(value)
if arr.contiguous:
value = arr
else:
if arr.cuda:
# value = cupy.ascontiguousarray(value)
value = arr.reshape(-1, order="A")
else:
# can flatten without copy
value = arr.reshape(-1, order="A")

# destination path for key
file_path = os.path.join(self.path, key)

# ensure there is no directory in the way
if os.path.isdir(file_path):
shutil.rmtree(file_path)

# ensure containing directory exists
dir_path, file_name = os.path.split(file_path)
if os.path.isfile(dir_path):
raise KeyError(key)
if not os.path.exists(dir_path):
try:
os.makedirs(dir_path)
except OSError as e:
if e.errno != errno.EEXIST:
raise KeyError(key)

# write to temporary file
# note we're not using tempfile.NamedTemporaryFile to avoid
# restrictive file permissions
temp_name = file_name + "." + uuid.uuid4().hex + ".partial"
temp_path = os.path.join(dir_path, temp_name)
try:
self._tofile(value, temp_path)

# move temporary file into place;
# make several attempts at writing the temporary file to get past
# potential antivirus file locking issues
retry_call(
os.replace, (temp_path, file_path), exceptions=(PermissionError,)
)
finally:
# clean up if temp file still exists for whatever reason
if os.path.exists(temp_path): # pragma: no cover
os.remove(temp_path)
13 changes: 4 additions & 9 deletions python/tests/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,16 @@
"cufile-mfma",
"cufile-mf",
"cufile-ma",
"zarr-gds",
"zarr-posix",
"zarr",
],
)
def test_single_node_io(run_cmd, tmp_path, api):
"""Test benchmarks/single-node-io.py"""

if "zarr" in api:
pytest.importorskip(
"zarr.cupy",
reason=(
"To use Zarr arrays with GDS directly, Zarr needs CuPy support: "
"<https://github.com/zarr-developers/zarr-python/pull/934>"
),
)
zarr = pytest.importorskip("zarr")
if not hasattr(zarr.Array, "meta_array"):
pytest.skip("requires Zarr v2.13+")

retcode = run_cmd(
cmd=[
Expand Down
21 changes: 5 additions & 16 deletions python/tests/test_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
zarr = pytest.importorskip("zarr")
GDSStore = pytest.importorskip("kvikio.zarr").GDSStore

# To support CuPy arrays, we need the `meta_array` argument introduced in
# Zarr v2.13, see <https://github.com/zarr-developers/zarr-python/pull/934>
if not hasattr(zarr.Array, "meta_array"):
pytest.skip("requires Zarr v2.13+", allow_module_level=True)


@pytest.fixture
def store(tmp_path):
Expand All @@ -32,14 +37,6 @@ def test_direct_store_access(store, array_type):
def test_array(store):
"""Test Zarr array"""

pytest.importorskip(
"zarr.cupy",
reason=(
"To use Zarr arrays with GDS directly, Zarr needs CuPy support: "
"<https://github.com/zarr-developers/zarr-python/pull/934>"
),
)

a = cupy.arange(100)
z = zarr.array(
a, chunks=10, compressor=None, store=store, meta_array=cupy.empty(())
Expand All @@ -53,14 +50,6 @@ def test_array(store):
def test_group(store):
"""Test Zarr group"""

pytest.importorskip(
"zarr.cupy",
reason=(
"To use Zarr arrays with GDS directly, Zarr needs CuPy support: "
"<https://github.com/zarr-developers/zarr-python/pull/934>"
),
)

g = zarr.open_group(store, meta_array=cupy.empty(()))
g.ones("data", shape=(10, 11), dtype=int, compressor=None)
a = g["data"]
Expand Down

0 comments on commit 0ad73c8

Please sign in to comment.