In [1]:
import zarr
import numpy as np

In [2]:
z = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype="i4")
z

<zarr.core.Array (10000, 10000) int32>

In [3]:
z[:] = 42 # full colon represents from where to where, "a slice"

In [4]:
z[:]

array([[42, 42, 42, ..., 42, 42, 42],
       [42, 42, 42, ..., 42, 42, 42],
       [42, 42, 42, ..., 42, 42, 42],
       ...,
       [42, 42, 42, ..., 42, 42, 42],
       [42, 42, 42, ..., 42, 42, 42],
       [42, 42, 42, ..., 42, 42, 42]], dtype=int32)

In [5]:
print(z[0][0])
print(z[0,0])

42
42


In [6]:
z[:].shape

(10000, 10000)

In [7]:
a = np.arange(10000)
b = np.arange(10000)
b

array([   0,    1,    2, ..., 9997, 9998, 9999])

In [8]:
z[0, :] # [,] similar to accessing multi dim array [][]

array([42, 42, 42, ..., 42, 42, 42], dtype=int32)

In [9]:
z[0, :] = a
z[:, 0] = b

In [10]:
z[:]

array([[   0,    1,    2, ..., 9997, 9998, 9999],
       [   1,   42,   42, ...,   42,   42,   42],
       [   2,   42,   42, ...,   42,   42,   42],
       ...,
       [9997,   42,   42, ...,   42,   42,   42],
       [9998,   42,   42, ...,   42,   42,   42],
       [9999,   42,   42, ...,   42,   42,   42]], dtype=int32)

In [11]:
z[-1, 0]

9999

## GROUP

For hierarchical organization of arrays

Idea taken from groups in HDF5

tldr, Its similar to Dictonaries in python

In [12]:
root = zarr.group()
root

<zarr.hierarchy.Group '/'>

In [13]:
# group can contain other groups
foo = root.create_group("foo")
bar = root.create_group("bar")

In [14]:
foo

<zarr.hierarchy.Group '/foo'>

In [15]:
z1 = bar.zeros('baz', shape = (10000, 10000), chunks= (1000, 1000), dtype = "i4")
z1

<zarr.core.Array '/bar/baz' (10000, 10000) int32>

In [16]:
root.bar.baz[:]

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int32)

In HDF5 arrays are called DATASET
so is it in here.

In [17]:
z = bar.create_dataset('quux', shape=(10000, 10000), chunk=(1000, 1000), dtype='i4')
z

  warn('ignoring keyword argument %r' % k)


<zarr.core.Array '/bar/quux' (10000, 10000) int32>

In [18]:
root.tree()

Tree(nodes=(Node(disabled=True, name='/', nodes=(Node(disabled=True, name='bar', nodes=(Node(disabled=True, ic…

NOTE:

IN POINT CLOUD TILESET generation,
generation is done on a zarr file type

i.e. any input file is converted into zarr type // "Ingested"
Then the zarr file type is converted into 3d tiles.

The intermediate zarr file contains various Groups. Eg.
Time
Location
value
chunk_id

Each of the groups has several datasets(array), named numerically. \
I guess each dataset has some chunk data for the Group. \
Eg. 1.0 file has some chunk of dataset (array) for Time Group \
&&  9.0 file has some more chunk of dataset (array) for Time Group. 

### ROUGH

In [30]:
store1 = zarr.DirectoryStore("/tmp/crs_olympex/zarr")
root1 = zarr.group(store=store1)

In [31]:
root1.tree()

Tree(nodes=(Node(disabled=True, name='/', nodes=(Node(disabled=True, name='20151110', nodes=(Node(disabled=Tru…

In [32]:
root1['20151110']["chunk_id"][:]

array([[         0, 1447176496],
       [    262144, 1447177273],
       [    524288, 1447178079]])

In [33]:
root1['20151110']["location"][:]

array([[ -118.1554  ,    35.18708 , 19257.947   ],
       [ -118.15517 ,    35.186512, 18473.303   ],
       [ -118.155174,    35.18652 , 18510.703   ],
       ...,
       [ -119.918884,    34.246967, 17638.7     ],
       [ -119.9189  ,    34.24694 , 17601.469   ],
       [ -119.91896 ,    34.24685 , 17489.777   ]], dtype=float32)

In [34]:
root1['20151110']["time"][:]

array([   0,    0,    0, ..., 1969, 1969, 1969], dtype=int32)

In [35]:
root1['20151110']["value"]["ref"][:]

array([-1101.3055  ,   -24.597084,   -26.546768, ...,   -38.94864 ,
         -36.910416,   -38.653282], dtype=float32)

FOR CPL

In [37]:
store2 = zarr.DirectoryStore("/tmp/cpl_olympex/zarr")
root2 = zarr.group(store=store2)

In [38]:
root2.tree()

Tree(nodes=(Node(disabled=True, name='/', nodes=(Node(disabled=True, name='CPL', nodes=(Node(disabled=True, ic…

In [39]:
root2['CPL']["chunk_id"][:]

array([[      0,   69920],
       [ 262144,   70265],
       [ 524288,   70594],
       [ 786432,   70912],
       [1048576,   71231],
       [1310720,   71549],
       [1572864,   71870],
       [1835008,   72190],
       [2097152,   72510],
       [2359296,   72826],
       [2621440,   73143],
       [2883584,   73464],
       [3145728,   73784],
       [3407872,   74105],
       [3670016,   74423],
       [3932160,   74743],
       [4194304,   75062]])

In [40]:
root2['CPL']["location"][:]

array([[ -118.18983 ,    34.908337, 12611.171   ],
       [ -118.18981 ,    34.908337, 12641.127   ],
       [ -118.189804,    34.908333, 12671.086   ],
       ...,
       [ -118.58397 ,    34.709915,  6686.8984  ],
       [ -118.583916,    34.709904,  7586.284   ],
       [ -118.58288 ,    34.709705, 24494.723   ]], dtype=float32)

In [41]:
root2['CPL']["time"][:]

array([   0,    0,    0, ..., 5401, 5401, 5401], dtype=int32)

In [42]:
root2['CPL']["value"]["ref"][:]

array([ 7.7530061e-04, -2.5145346e-04, -7.0659170e-04, ...,
        2.0124032e-06,  1.4174973e-04, -6.7999674e-04], dtype=float32)

In [43]:
root2['CPL']["time"][1405001:1405009]

array([1744, 1744, 1744, 1744, 1744, 1744, 1744, 1744], dtype=int32)