# Openstack Swift Object Store backend for zarr

This backend enables direct access to Swift Object Storage to read and write zarr files.

In [1]:
import os
import zarr
from zarrswift import SwiftStore

using pre-authenticated token and storage_url

In [2]:
getenv = os.environ.get
auth = {
    "authurl": getenv("ST_AUTH"),
    "user": getenv("ST_USER"),
    "key": getenv("ST_KEY"),
}

# zarr example

In [3]:
store = SwiftStore(container='demo', prefix='zarr-demo', storage_options=auth)
root = zarr.group(store=store, overwrite=True)
z = root.zeros('foo/bar', shape=(100, 100), chunks=(50, 50), dtype='i4')
z[:] = 42
z

<zarr.core.Array '/foo/bar' (100, 100) int32>

check array contents

In [4]:
z[:5, :5]

array([[42, 42, 42, 42, 42],
       [42, 42, 42, 42, 42],
       [42, 42, 42, 42, 42],
       [42, 42, 42, 42, 42],
       [42, 42, 42, 42, 42]], dtype=int32)

Listing store contents

In [5]:
store.listdir()

['.zgroup', 'foo']

In [6]:
store.keys()

['.zgroup',
 'foo/.zgroup',
 'foo/bar/.zarray',
 'foo/bar/0.0',
 'foo/bar/0.1',
 'foo/bar/1.0',
 'foo/bar/1.1']

Listing store contents using swift cli

In [7]:
! swift list demo

zarr-demo/.zgroup
zarr-demo/foo/.zgroup
zarr-demo/foo/bar/.zarray
zarr-demo/foo/bar/0.0
zarr-demo/foo/bar/0.1
zarr-demo/foo/bar/1.0
zarr-demo/foo/bar/1.1


## xarray example

In [8]:
import xarray as xr
import numpy as np

ds = xr.Dataset(
    {"foo": (('x', 'y'), np.random.rand(4, 5))},
    coords = {
        'x': [10, 20, 30, 40],
        'y': [1, 2, 3, 4, 5],
    },
)

# setup the store
store = SwiftStore(container='demo', prefix='xarray-demo', storage_options=auth)
store.listdir()

[]

saving dataset to store

In [9]:
ds.to_zarr(store=store, mode='w', consolidated=True)
store.keys()

['.zattrs',
 '.zgroup',
 '.zmetadata',
 'foo/.zarray',
 'foo/.zattrs',
 'foo/0.0',
 'x/.zarray',
 'x/.zattrs',
 'x/0',
 'y/.zarray',
 'y/.zattrs',
 'y/0']

Loading dataset from store

In [10]:
ds = xr.open_zarr(store, consolidated=True)
ds

## storage_options

These options are required by `swiftclient.Connection` to establish a connection to Swift Object Store. Depending on the Swift authentication version, these options may vary.

For Swift authentication version 1.0, these option could be either (`authurl`, `user`, `key`) or pre-authenticated (`preauthurl`, `preauthtoken`) values.
For swift cli to work, set the corresponding environment variables (`ST_AUTH`, `ST_USER`, `ST_KEY`) or (`OS_STORAGE_URL`, `OS_AUTH_TOKEN`).

Initial authentication using (`authurl`, `user`, `key`) generates a `token` and a `storage_url`. These pre-authenticated values can be used to further create new store objects. Here are some convenience function available in utils. 

In [11]:
from zarrswift.utils import getenv_auth, acquire_token

use `getenv_auth` if necessary authentication information is already available in environment variables

In [12]:
auth = getenv_auth()
list(auth.keys())

['authurl', 'user', 'key']

otherwise use `acquire_token`

In [13]:
auth = acquire_token(authurl=auth['authurl'], user=auth['user'], key=auth['key'], update_env=True)
list(auth.keys())

Token expires in: 22:33:41


['preauthurl', 'preauthtoken']

using `preauthurl` and `preauthtoken` to connect to the store

In [14]:
store = SwiftStore(container='demo', prefix='xarray-demo', storage_options=auth)
store.listdir()

['.zattrs', '.zgroup', '.zmetadata', 'foo', 'x', 'y']

## loading data using fsspec

Making container public allows sharing data with wider audience and the easiest way to access the data is via fsspec package. In this case, no access token is required.

For containers with limited access rights (controlled via read and write ACL's), access token may be required.

In [15]:
from zarrswift.utils import is_public, toggle_public
import fsspec

check if store is public

In [16]:
is_public(store)

False

Since the store (container) is not public, an auth token is required to access the store. Depending on the ACL setting, say members in the same project have read permissions.

In [17]:
http = fsspec.filesystem('http', headers={'X-Auth-Token': auth['preauthtoken']})
fs = http.get_mapper(store.url)
ds = xr.open_zarr(fs, consolidated=True)
ds

In [18]:
toggle_public(store)
is_public(store)

True

As the store is made public, anyone can read the data with out auth token

In [19]:
ds = xr.open_zarr(fsspec.get_mapper(store.url), consolidated=True)
ds

In [20]:
toggle_public(store)
is_public(store)

False