In [1]:
import json

import numcodecs

import numpy as np

import zarr

from IPython.display import display

np.set_printoptions(precision=4, suppress=True)

### Basic usage

Get nvCOMP codec from numcodecs registry:

In [2]:
NVCOMP_CODEC_ID = "nvcomp_batch"

# Currently supported algorithms.
LZ4_ALGO = "LZ4"
GDEFLATE_ALGO = "Gdeflate"
SNAPPY_ALGO = "snappy"
ZSTD_ALGO = "zstd"

codec = numcodecs.registry.get_codec(dict(id=NVCOMP_CODEC_ID, algorithm=LZ4_ALGO))
# To pass algorithm-specific options, use options parameter:
# codec = numcodecs.registry.get_codec(dict(id=NVCOMP_CODEC_ID, algo=LZ4_ALGO, options={"data_type": 1}))

display(codec)

NvCompBatchCodec(algorithm='lz4', options={})

Create data:

In [3]:
shape = (100, 100)
chunks = (10, 10)

np.random.seed(1)

x = zarr.array(np.random.randn(*shape).astype(np.float32), chunks=chunks, compressor=codec)
display(x[:])
display(x.info)

array([[ 1.6243, -0.6118, -0.5282, ...,  0.0436, -0.62  ,  0.698 ],
       [-0.4471,  1.2245,  0.4035, ...,  0.4203,  0.811 ,  1.0444],
       [-0.4009,  0.824 , -0.5623, ...,  0.7848, -0.9554,  0.5859],
       ...,
       [ 1.3797,  0.1387,  1.2255, ...,  1.8051,  0.3722,  0.1253],
       [ 0.7348, -0.7115, -0.1248, ..., -1.9533, -0.7684, -0.5345],
       [ 0.2183, -0.8654,  0.8886, ..., -1.0141, -0.0627, -1.4379]],
      dtype=float32)

0,1
Type,zarr.core.Array
Data type,float32
Shape,"(100, 100)"
Chunk shape,"(10, 10)"
Order,C
Read-only,False
Compressor,"NvCompBatchCodec(algorithm='lz4', options={})"
Store type,zarr.storage.KVStore
No. bytes,40000 (39.1K)
No. bytes stored,41006 (40.0K)


Store and load back the data:

In [4]:
# Use simple dictionary store, see zarr documentation for other options.
zarr_store = {}

zarr.save_array(zarr_store, x, compressor=codec)

# Check stored metadata.
meta_info = json.loads(zarr_store[".zarray"])
display(meta_info)

{'chunks': [10, 10],
 'compressor': {'algorithm': 'lz4', 'id': 'nvcomp_batch', 'options': {}},
 'dtype': '<f4',
 'fill_value': 0.0,
 'filters': None,
 'order': 'C',
 'shape': [100, 100],
 'zarr_format': 2}

In [5]:
y = zarr.open_array(zarr_store)
display(y.info)

0,1
Type,zarr.core.Array
Data type,float32
Shape,"(100, 100)"
Chunk shape,"(10, 10)"
Order,C
Read-only,False
Compressor,"NvCompBatchCodec(algorithm='lz4', options={})"
Store type,zarr.storage.KVStore
No. bytes,40000 (39.1K)
No. bytes stored,41006 (40.0K)


In [6]:
# Test the roundtrip.
np.testing.assert_equal(y[:], x[:])

### CPU compression / GPU decompression

Some algorithms, such as LZ4, can be used interchangeably on CPU and GPU. For example, the data might be created using CPU LZ4 codec and then decompressed using GPU version of LZ4 codec.

In [7]:
# Get default (CPU) implementation of LZ4 codec.
cpu_codec = numcodecs.registry.get_codec({"id": "lz4"})

x = zarr.array(np.random.randn(*shape).astype(np.float32), chunks=chunks, compressor=cpu_codec)
# Define a simple, dictionary-based store. In real scenarios this can be a filesystem or some other persistent store.
store = {}
zarr.save_array(store, x, compressor=cpu_codec)

# Check that the data was written by the expected codec.
meta = json.loads(store[".zarray"])
display(meta)
assert meta["compressor"]["id"] == "lz4"

# Change codec to GPU/nvCOMP-based.
meta["compressor"] = {"id": NVCOMP_CODEC_ID, "algorithm": LZ4_ALGO}
store[".zarray"] = json.dumps(meta).encode()

y = zarr.open_array(store, compressor=codec)

display(x.info)
display(y.info)

np.testing.assert_equal(x[:], y[:])


{'chunks': [10, 10],
 'compressor': {'acceleration': 1, 'id': 'lz4'},
 'dtype': '<f4',
 'fill_value': 0.0,
 'filters': None,
 'order': 'C',
 'shape': [100, 100],
 'zarr_format': 2}

0,1
Type,zarr.core.Array
Data type,float32
Shape,"(100, 100)"
Chunk shape,"(10, 10)"
Order,C
Read-only,False
Compressor,LZ4(acceleration=1)
Store type,zarr.storage.KVStore
No. bytes,40000 (39.1K)
No. bytes stored,40973 (40.0K)


0,1
Type,zarr.core.Array
Data type,float32
Shape,"(100, 100)"
Chunk shape,"(10, 10)"
Order,C
Read-only,False
Compressor,"NvCompBatchCodec(algorithm='lz4', options={})"
Store type,zarr.storage.KVStore
No. bytes,40000 (39.1K)
No. bytes stored,40883 (39.9K)
