In [23]:
"""
In this tutorial, we will show how to use KvikIO to read and write GPU memory directly to/from Zarr files.
"""
import json
import shutil
import numpy
import cupy
import zarr
import kvikio
import kvikio.zarr
from kvikio.nvcomp_codec import NvCompBatchCodec
from numcodecs import LZ4

We need to set three Zarr arguments: 
 - `meta_array`: in order to make Zarr read into GPU memory (instead of CPU memory), we set the `meta_array` argument to an empty CuPy array. 
 - `store`: we need to use a GPU compatible Zarr Store, which will be KvikIO’s GDS store in our case. 
 - `compressor`: finally, we need to use a GPU compatible compressor (or `None`). KvikIO provides a nvCOMP compressor `kvikio.nvcomp_codec.NvCompBatchCodec` that we will use.

In [24]:
# Let's create a new Zarr array using KvikIO's GDS store and LZ4 compression
z = zarr.array(
    cupy.arange(10), 
    chunks=2, 
    store=kvikio.zarr.GDSStore("my-zarr-file.zarr"), 
    meta_array=cupy.empty(()),
    compressor=NvCompBatchCodec("lz4"),
    overwrite=True,
)
z, z.compressor, z.store

(<zarr.core.Array (10,) int64>,
 NvCompBatchCodec(algorithm='lz4', options={}),
 <kvikio.zarr.GDSStore at 0x7fd42021ac20>)

In [25]:
# And because we set the `meta_array` argument, reading the Zarr array returns a CuPy array
type(z[:])

cupy.ndarray

From this point onwards, `z` can be used just like any other Zarr array.

In [26]:
z[1:9]

array([1, 2, 3, 4, 5, 6, 7, 8])

In [27]:
z[:] + 42

array([42, 43, 44, 45, 46, 47, 48, 49, 50, 51])

### GPU compression / CPU decompression

In order to read GPU-written Zarr file into a NumPy array, we simply open that file **without** setting the `meta_array` argument:

In [28]:
z = zarr.open_array(kvikio.zarr.GDSStore("my-zarr-file.zarr"))
type(z[:]), type(z.compressor), z[:]

(numpy.ndarray,
 kvikio.nvcomp_codec.NvCompBatchCodec,
 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))

And we don't need to use `kvikio.zarr.GDSStore` either:

In [29]:
z = zarr.open_array("my-zarr-file.zarr")
type(z[:]), type(z.compressor), z[:]

(numpy.ndarray,
 kvikio.nvcomp_codec.NvCompBatchCodec,
 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))

However, the above use `NvCompBatchCodec("lz4")` for decompression. In the following, we will show how to read Zarr file written and compressed using a GPU on the CPU.

Some algorithms, such as LZ4, can be used interchangeably on CPU and GPU but Zarr will always use the compressor used to write the Zarr file. We are working with the Zarr team to fix this shortcoming but for now, we will use a workaround where we _patch_ the metadata manually.

In [30]:
# Read the Zarr metadata and replace the compressor with a CPU implementation of LZ4
store = zarr.DirectoryStore("my-zarr-file.zarr")  # We could also have used kvikio.zarr.GDSStore
meta = json.loads(store[".zarray"])
meta["compressor"] = LZ4().get_config()
store[".zarray"] = json.dumps(meta).encode()  # NB: this changes the Zarr metadata on disk

# And then open the file as usually
z = zarr.open_array(store)
type(z[:]), type(z.compressor), z[:]

(numpy.ndarray, numcodecs.lz4.LZ4, array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))

### CPU compression / GPU decompression

Now, let's try the otherway around.

In [31]:
import numcodecs
# Let's create a new Zarr array using the default compression.
z = zarr.array(
    numpy.arange(10), 
    chunks=2, 
    store="my-zarr-file.zarr", 
    overwrite=True,
    # The default (CPU) implementation of LZ4 codec.
    compressor=numcodecs.registry.get_codec({"id": "lz4"})
)
z, z.compressor, z.store

(<zarr.core.Array (10,) int64>,
 LZ4(acceleration=1),
 <zarr.storage.DirectoryStore at 0x7fd351e7a9b0>)

Again, we will use a workaround where we _patch_ the metadata manually.

In [32]:
# Read the Zarr metadata and replace the compressor with a GPU implementation of LZ4
store = kvikio.zarr.GDSStore("my-zarr-file.zarr")  # We could also have used zarr.DirectoryStore
meta = json.loads(store[".zarray"])
meta["compressor"] = NvCompBatchCodec("lz4").get_config()
store[".zarray"] = json.dumps(meta).encode()  # NB: this changes the Zarr metadata on disk

# And then open the file as usually
z = zarr.open_array(store, meta_array=cupy.empty(()))
type(z[:]), type(z.compressor), z[:]

(cupy.ndarray,
 kvikio.nvcomp_codec.NvCompBatchCodec,
 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))

In [33]:
# Clean up
shutil.rmtree("my-zarr-file.zarr", ignore_errors=True)