# Zarr-Python 3 Demo

<img src="https://raw.githubusercontent.com/zarr-developers/zarr-logo/main/zarr-pink-stacked.svg" alt="drawing" width="250"/>




In [None]:
import asyncio
import numpy as np
import tempfile
from directory_tree import display_tree
from pprint import pprint

import zarr
from zarr.buffer import default_buffer_prototype

In [None]:
zarr.__version__

## The basics

Most things will feel the same.

- as much as possible, we're keeping the top level API the same
- expect some deprecations to parts of the v2 API
- some folks will notice changes to the Store API.

In [None]:
# create a store
store = zarr.store.LocalStore(
    root=tempfile.mkdtemp(),
    mode='w',                              # new in 3.0: store open modes
)

# create a root group
root = zarr.group(
    store=store,                            
    zarr_format=3,                         # new in 3.0: support for v2 and v3 specs
    attributes={"name": "demo root group"} # new in 3.0: pass attributes at creation time
)

In [None]:
# create a store
store = zarr.store.LocalStore(
    root=tempfile.mkdtemp(),
    mode='w',                               # new in 3.0: store's are opened in read or write mode
)

# create a root group
root = zarr.group(
    store=store,                            # new in 3.0: transition to keyword only constructors (wip)
    zarr_format=3,                          # new in 3.0: support for v2 and v3 specs
    attributes={"name": "demo root group"}  # new in 3.0: pass attributes at group creation time
)

In [None]:
# create an array
arr = root.create_array(
    name="foo",
    shape=(10, 10),
    chunks=(5, 5),
    dtype='i4',
    dimension_names=('x', 'y'),  # new in 3.0: support array dimension names (v3 only)
    attributes={'units': 'foo'}  # new in 3.0: pass attributes at array creation time
)

# update an attribute
arr.attrs['name'] = 'foo-array'

# and write data to it
arr[:] = np.random.randint(0, 10, size=arr.shape)

In [None]:
# v3 spec store layout / metadata keys / chunk key encoding
display_tree(store.root)

## Some new things

In [None]:
# create a sharded array
from zarr.codecs import ShardingCodec, TransposeCodec, BytesCodec, BloscCodec

In [None]:
sharded_arr = root.create_array(
    name="bar",
    shape=(100, 100),
    chunks=(50, 50),
    dtype='i4',
    dimension_names=('x', 'y'),    # new in 3.0: support array dimension names (v3 only)
    attributes={'units': 'bar'},   # new in 3.0: pass attributes at array creation time
    codecs=[                       # new in 3.0: codec pipelines
        zarr.codecs.ShardingCodec( # new in 3.0: sharding codec
            chunk_shape=(5, 5),
            codecs=[
                zarr.codecs.TransposeCodec(order=(0, 1)),
                zarr.codecs.BytesCodec(),
                zarr.codecs.BloscCodec(cname="lz4"),
            ],
            index_location="start",
        )
    ],
)

# and write data to it
sharded_arr[:] = np.random.randint(0, 10, size=sharded_arr.shape)

In [None]:
# bar/c only has 4 objects despite there being 400 chunks!
display_tree(store.root)

In [None]:
# the store interface is entirely async... `await` everything
# get a metadata object out of the store
(await store.get("foo/zarr.json", prototype=default_buffer_prototype)).to_bytes()

In [None]:
# new metadata DataClasses / property
pprint(root.metadata)
pprint(arr.metadata)
pprint(sharded_arr.metadata)

In [None]:
# 100% type hint coverage
zarr.Array.create?

In [None]:
# new global config (uses donfig)
with zarr.config.set({'codec_pipeline.batch_size': 4}):
    zarr.config.pprint()

## New Async API

Zarr-Python 3 will include an AsyncIO interface. Why? We want to take advantage of concurrency everywhere we can.

In [None]:
# create a store
store = zarr.store.LocalStore(
    root=tempfile.mkdtemp(),
    mode='w'
)

# create a root group
root = await zarr.api.asynchronous.group(
    store=store,                            # new in 3.0: transition to keyword only constructors (wip)
    zarr_format=3,                          # new in 3.0: support for v2 and v3 specs
    attributes={"name": "demo root group"}  # new in 3.0: pass attributes at group creation time
)
root

In [None]:
# create 5 arrays concurrently
awaitables = []
for name in ["foo", "bar", "spam", "baz", "qux"]:
    awaitables.append(
        root.create_array(
            path=name,
            shape=(10, 10),
            chunks=(5, 5),
            dtype='i4',
            attributes={'title': f'{name} demo'}
        )
    )
arrays = await asyncio.gather(*awaitables)

In [None]:
keys = [k async for k in root.array_keys()]
keys

In [None]:
# now load all these arrays concurrently
await asyncio.gather(*[root.getitem(k) for k in keys])

In [None]:
# we can read/write data using the asyncio interface too
arr = await root.create_array('foo/bar', shape=(10, 10), chunks=(5, 5), dtype='i4')  

data = np.random.randint(0, 10, size=(10, 10))
await arr.setitem(slice(None), data)

# rather than the __getitem__ syntax, we use the getitem method
part = await arr.getitem((slice(5), slice(3)))
part

In [None]:
display_tree(store.root)