## Reading zarr consolidated metadata depends on the number of chunks

In [4]:
import os
import glob
import numpy as np
import xarray as xr
import dask.array as da
import timeit

### Functions to write and read dummy zarr stores

In [2]:
NX = NY = NT = 20
NV = 8

def create_store(chunksize, store):
    print(f"\nWriting zarr store with chunksize={chunksize}")

    dset = xr.Dataset()
    darr = xr.DataArray(
        data=da.zeros((NT, NY, NX), chunks=3 * (chunksize,)),
        coords={"time": np.arange(NT), "lat": np.arange(NY), "lon": np.arange(NX)},
        dims=("time", "lat", "lon"),
    )
    for n in range(1, NV + 1):
        dset[f"v{n}"] = darr
    dset.to_zarr(store, consolidated=True, mode="w")
    print(dset)


def read_metadata(store):
    dset = xr.open_zarr(store, consolidated=True)
    return dset

### Create zarr stores with varying chunksizes

In [27]:
chunksizes = [1, 2, 20]
stores = [f"store-chunk-{chunksize:02.0f}" for chunksize in chunksizes]

for chunksize, store in zip(chunksizes, stores):
    if not os.path.isdir(store):
        create_store(chunksize, store)

print(read_metadata("store-chunk-01"))
print()
print(read_metadata("store-chunk-20"))

<xarray.Dataset>
Dimensions:  (lat: 20, lon: 20, time: 20)
Coordinates:
  * lat      (lat) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
  * lon      (lon) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
  * time     (time) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
Data variables:
    v1       (time, lat, lon) float64 dask.array<chunksize=(1, 1, 1), meta=np.ndarray>
    v2       (time, lat, lon) float64 dask.array<chunksize=(1, 1, 1), meta=np.ndarray>
    v3       (time, lat, lon) float64 dask.array<chunksize=(1, 1, 1), meta=np.ndarray>
    v4       (time, lat, lon) float64 dask.array<chunksize=(1, 1, 1), meta=np.ndarray>
    v5       (time, lat, lon) float64 dask.array<chunksize=(1, 1, 1), meta=np.ndarray>
    v6       (time, lat, lon) float64 dask.array<chunksize=(1, 1, 1), meta=np.ndarray>
    v7       (time, lat, lon) float64 dask.array<chunksize=(1, 1, 1), meta=np.ndarray>
    v8       (time, lat, lon) float64 dask.array<chunksize=(1, 1, 1), meta=np

### Read metadata from each store 50 times

In [10]:
nchunks = [NV * NT * NY * NX / (chunksize**3) for chunksize in chunksizes]

results = {}
for store, nchunk in zip(stores, nchunks):
    result = timeit.timeit(f"read_metadata('{store}')", setup="from __main__ import read_metadata", number=50)
    results.update({f"{nchunk:0.0f} chunks": f"{result:0.2f}s"})
results

{'64000 chunks': '1.15s', '8000 chunks': '0.29s', '8 chunks': '0.20s'}

## Profiling

### Fast store

In [18]:
%prun -l 10 read_metadata("store-chunk-20")

 

         17461 function calls (17117 primitive calls) in 0.020 seconds

   Ordered by: internal time
   List reduced from 472 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      262    0.001    0.000    0.002    0.000 indexing.py:556(shape)
2931/2906    0.001    0.000    0.001    0.000 {built-in method builtins.isinstance}
      135    0.000    0.000    0.003    0.000 variable.py:308(__init__)
        6    0.000    0.000    0.000    0.000 {built-in method posix.stat}
       98    0.000    0.000    0.001    0.000 util.py:277(normalize_storage_path)
        4    0.000    0.000    0.000    0.000 {built-in method io.open}
       24    0.000    0.000    0.001    0.000 core.py:2434(normalize_chunks)
       11    0.000    0.000    0.004    0.000 conventions.py:262(decode_cf_variable)
1619/1577    0.000    0.000    0.000    0.000 {built-in method builtins.len}
       16    0.000    0.000    0.001    0.000 inspect.py:2112(_signature_from_

### Slow store

In [20]:
%prun -l 10 read_metadata("store-chunk-01")


 

         20621 function calls (20277 primitive calls) in 0.185 seconds

   Ordered by: internal time
   List reduced from 472 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        8    0.116    0.015    0.139    0.017 core.py:222(getem)
        8    0.012    0.002    0.012    0.002 core.py:259(<listcomp>)
        8    0.008    0.001    0.009    0.001 core.py:203(slices_from_chunks)
        1    0.005    0.005    0.185    0.185 <string>:1(<module>)
      262    0.003    0.000    0.004    0.000 indexing.py:556(shape)
2931/2906    0.001    0.000    0.002    0.000 {built-in method builtins.isinstance}
       98    0.001    0.000    0.002    0.000 util.py:277(normalize_storage_path)
        6    0.001    0.000    0.001    0.000 {built-in method posix.stat}
        8    0.001    0.000    0.144    0.018 core.py:2760(from_array)
      135    0.001    0.000    0.007    0.000 variable.py:308(__init__)