Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Zarr v2.13.0a2 #129

Merged
merged 3 commits into from
Sep 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 13 additions & 15 deletions python/benchmarks/single-node-io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import argparse
import contextlib
import functools
import os
import os.path
import pathlib
Expand Down Expand Up @@ -193,30 +192,30 @@ def run_posix(args):
return read_time, write_time


def run_zarr(store_type, args):
def run_zarr(args):
"""Use the Zarr API"""

import zarr
import zarr.cupy

import kvikio.zarr

dir_path = args.dir / "zarr"

if not hasattr(zarr.Array, "meta_array"):
raise RuntimeError("requires Zarr v2.13+")

a = cupy.arange(args.nbytes, dtype="uint8")

# Retrieve the store and compressor to use based on `store_type`
shutil.rmtree(str(args.dir / store_type), ignore_errors=True)
store, compressor = {
"gds": (kvikio.zarr.GDSStore(args.dir / store_type), None),
"posix": (
zarr.DirectoryStore(args.dir / store_type),
zarr.cupy.CuPyCPUCompressor(),
),
}[store_type]
shutil.rmtree(str(dir_path), ignore_errors=True)

# Write
t0 = clock()
z = zarr.array(
a, chunks=False, compressor=compressor, store=store, meta_array=cupy.empty(())
a,
chunks=False,
compressor=None,
store=kvikio.zarr.GDSStore(dir_path),
meta_array=cupy.empty(()),
)
write_time = clock() - t0

Expand All @@ -231,8 +230,7 @@ def run_zarr(store_type, args):

API = {
"cufile": run_cufile,
"zarr-gds": functools.partial(run_zarr, "gds"),
"zarr-posix": functools.partial(run_zarr, "posix"),
"zarr": run_zarr,
"posix": run_posix,
"cufile-mfma": run_cufile_multiple_files_multiple_arrays,
"cufile-mf": run_cufile_multiple_files,
Expand Down
60 changes: 0 additions & 60 deletions python/kvikio/zarr.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
# See file LICENSE for terms.

import errno
import os
import os.path
import shutil
import uuid

import cupy
import zarr.storage
from zarr.util import retry_call

import kvikio
from kvikio._lib.arr import asarray
Expand Down Expand Up @@ -55,59 +51,3 @@ def _tofile(self, a, fn):
assert written == a.nbytes
else:
super()._tofile(a.obj, fn)

def __setitem__(self, key, value):
"""
We have to overwrite this because `DirectoryStore.__setitem__`
converts `value` to a NumPy array always
"""
key = self._normalize_key(key)

# coerce to flat, contiguous buffer (ideally without copying)
arr = asarray(value)
if arr.contiguous:
value = arr
else:
if arr.cuda:
# value = cupy.ascontiguousarray(value)
value = arr.reshape(-1, order="A")
else:
# can flatten without copy
value = arr.reshape(-1, order="A")

# destination path for key
file_path = os.path.join(self.path, key)

# ensure there is no directory in the way
if os.path.isdir(file_path):
shutil.rmtree(file_path)

# ensure containing directory exists
dir_path, file_name = os.path.split(file_path)
if os.path.isfile(dir_path):
raise KeyError(key)
if not os.path.exists(dir_path):
try:
os.makedirs(dir_path)
except OSError as e:
if e.errno != errno.EEXIST:
raise KeyError(key)

# write to temporary file
# note we're not using tempfile.NamedTemporaryFile to avoid
# restrictive file permissions
temp_name = file_name + "." + uuid.uuid4().hex + ".partial"
temp_path = os.path.join(dir_path, temp_name)
try:
self._tofile(value, temp_path)

# move temporary file into place;
# make several attempts at writing the temporary file to get past
# potential antivirus file locking issues
retry_call(
os.replace, (temp_path, file_path), exceptions=(PermissionError,)
)
finally:
# clean up if temp file still exists for whatever reason
if os.path.exists(temp_path): # pragma: no cover
os.remove(temp_path)
13 changes: 4 additions & 9 deletions python/tests/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,16 @@
"cufile-mfma",
"cufile-mf",
"cufile-ma",
"zarr-gds",
"zarr-posix",
"zarr",
],
)
def test_single_node_io(run_cmd, tmp_path, api):
"""Test benchmarks/single-node-io.py"""

if "zarr" in api:
pytest.importorskip(
"zarr.cupy",
reason=(
"To use Zarr arrays with GDS directly, Zarr needs CuPy support: "
"<https://github.com/zarr-developers/zarr-python/pull/934>"
),
)
zarr = pytest.importorskip("zarr")
if not hasattr(zarr.Array, "meta_array"):
pytest.skip("requires Zarr v2.13+")

retcode = run_cmd(
cmd=[
Expand Down
21 changes: 5 additions & 16 deletions python/tests/test_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
zarr = pytest.importorskip("zarr")
GDSStore = pytest.importorskip("kvikio.zarr").GDSStore

# To support CuPy arrays, we need the `meta_array` argument introduced in
# Zarr v2.13, see <https://github.com/zarr-developers/zarr-python/pull/934>
if not hasattr(zarr.Array, "meta_array"):
pytest.skip("requires Zarr v2.13+", allow_module_level=True)


@pytest.fixture
def store(tmp_path):
Expand All @@ -32,14 +37,6 @@ def test_direct_store_access(store, array_type):
def test_array(store):
"""Test Zarr array"""

pytest.importorskip(
"zarr.cupy",
reason=(
"To use Zarr arrays with GDS directly, Zarr needs CuPy support: "
"<https://github.com/zarr-developers/zarr-python/pull/934>"
),
)

a = cupy.arange(100)
z = zarr.array(
a, chunks=10, compressor=None, store=store, meta_array=cupy.empty(())
Expand All @@ -53,14 +50,6 @@ def test_array(store):
def test_group(store):
"""Test Zarr group"""

pytest.importorskip(
"zarr.cupy",
reason=(
"To use Zarr arrays with GDS directly, Zarr needs CuPy support: "
"<https://github.com/zarr-developers/zarr-python/pull/934>"
),
)

g = zarr.open_group(store, meta_array=cupy.empty(()))
g.ones("data", shape=(10, 11), dtype=int, compressor=None)
a = g["data"]
Expand Down