In [1]:
import zarr
import xarray as xr
import os
from config import token_path, bucket_name

# obstore==0.7.1, tested on 26.07.2025, pangeo-notebook:2025.07.09, bumping to zarr==3.1.0, 
## ObjectStore(GCSStore), 

In [2]:
import obstore
from obstore.auth.google import GoogleCredentialProvider
from google.oauth2 import service_account
from obstore.store import GCSStore, S3Store
from zarr.storage import ObjectStore, FsspecStore

In [3]:
print(obstore.__version__, zarr.__version__)

0.7.1 3.1.0


In [4]:
# Load credentials
datafile = '/velocity_product/version_0/plevel20.zarr'
creds = service_account.Credentials.from_service_account_file(
    filename = token_path,
    scopes = ["https://www.googleapis.com/auth/cloud-platform"]
)
credential_provider = GoogleCredentialProvider(credentials=creds)
store = ObjectStore(
    store = GCSStore(
        bucket = bucket_name,
        prefix = datafile, 
        credential_provider = credential_provider
    ),
    read_only=True,
)

In [5]:
ds = xr.open_zarr(store=store)

In [6]:
%%timeit
ds = xr.open_zarr(store=store)

2.61 s ± 178 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
%%timeit
ds = xr.open_zarr(store=store, consolidated=True)

2.67 s ± 147 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%%timeit
ds = xr.open_zarr(store=store, consolidated=True, zarr_format=2)

2.18 s ± 636 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# obstore==0.7.1, zarr==3.0.10, (tested on 25.07.2025, pangeo-notebook:2025.07.09)
## ObjectStore(GCSStore), 

In [2]:
import obstore
from obstore.auth.google import GoogleCredentialProvider
from google.oauth2 import service_account
from obstore.store import GCSStore, S3Store
from zarr.storage import ObjectStore, FsspecStore

In [3]:
# Load credentials
datafile = '/velocity_product/version_0/plevel20.zarr'
creds = service_account.Credentials.from_service_account_file(
    filename = token_path,
    scopes = ["https://www.googleapis.com/auth/cloud-platform"]
)
credential_provider = GoogleCredentialProvider(credentials=creds)
store = ObjectStore(
    store = GCSStore(
        bucket = bucket_name,
        prefix = datafile, 
        credential_provider = credential_provider
    ),
    read_only=True,
)

In [4]:
ds = xr.open_zarr(store=store)

In [6]:
%%timeit
ds = xr.open_zarr(store=store)

2.7 s ± 493 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
%%timeit
ds = xr.open_zarr(store=store, consolidated=True)

2.69 s ± 548 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%%timeit
ds = xr.open_zarr(store=store, consolidated=True, zarr_format=2)

2.23 s ± 336 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# obstore==0.7.0, zarr==3.0.10, (tested on 23.07.2025, pangeo-notebook:2025.07.09)
## ObjectStore(GCSStore), 

In [2]:
zarr.__version__

'3.0.10'

In [3]:
import obstore
from obstore.auth.google import GoogleCredentialProvider
from google.oauth2 import service_account
from obstore.store import GCSStore, S3Store
from zarr.storage import ObjectStore, FsspecStore

In [4]:
# Load credentials
datafile = '/velocity_product/version_0/plevel20.zarr'
creds = service_account.Credentials.from_service_account_file(
    filename = token_path,
    scopes = ["https://www.googleapis.com/auth/cloud-platform"]
)
credential_provider = GoogleCredentialProvider(credentials=creds)
store = ObjectStore(
    store = GCSStore(
        bucket = bucket_name,
        prefix = datafile, 
        credential_provider = credential_provider
    ),
    read_only=True,
)

In [5]:
ds = xr.open_zarr(store=store)

In [6]:
%%timeit
ds = xr.open_zarr(store=store)

2.59 s ± 183 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
%%timeit
ds = xr.open_zarr(store=store, consolidated=True)

2.51 s ± 522 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%%timeit
ds = xr.open_zarr(store=store, consolidated=True, zarr_format=2)

2.45 s ± 379 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# zarr==3.0.10, (tested on 20.07.2025, pangeo-notebook:2025.07.09)
## gcsfs 

In [17]:
import gcsfs
import json

In [29]:
with open(token_path) as f:
    token = json.load(f)
gcs = gcsfs.GCSFileSystem(token=token)

In [30]:
mapper = gcs.get_mapper(bucket_name+datafile)

In [31]:
%%timeit
ds = xr.open_zarr(store=mapper)

1.55 s ± 17.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [32]:
%%timeit
ds = xr.open_zarr(store=mapper, consolidated=True)

1.53 s ± 13.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [33]:
%%timeit
ds = xr.open_zarr(store=mapper, consolidated=True, zarr_format=2)

1.43 s ± 121 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### FsspecStore(fs=gcsgs.GCSFileSystem, ...)

In [34]:
store = FsspecStore(fs=gcs, path=bucket_name+datafile)

  store = FsspecStore(fs=gcs, path=bucket_name+datafile)


In [35]:
with open(token_path) as f:
    token = json.load(f)
gcs = gcsfs.GCSFileSystem(token=token, asynchronous=True)
store = FsspecStore(fs=gcs, path=bucket_name+datafile)

In [36]:
%%timeit
ds = xr.open_zarr(store=store)

1.54 s ± 28.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [37]:
%%timeit
ds = xr.open_zarr(store=store, consolidated=True)

1.59 s ± 127 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [38]:
%%timeit
ds = xr.open_zarr(store=store, consolidated=True, zarr_format=2)

1.41 s ± 70.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# zarr==2.18.3, pangeo-notebook:2024.11.11

In [10]:
import zarr;zarr.__version__

'2.18.3'

In [11]:
import gcsfs
import json
from config import token_path, bucket_name

In [12]:
datafile = '/velocity_product/version_0/plevel20.zarr'
with open(token_path) as f:
    token = json.load(f)
gcs = gcsfs.GCSFileSystem(token=token)
mapper = gcs.get_mapper(bucket_name+datafile)

In [13]:
%%timeit
ds = xr.open_zarr(store=mapper)

2.52 s ± 293 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
%%timeit
ds = xr.open_zarr(store=mapper, consolidated=True)

2.08 s ± 649 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
%%timeit
ds = xr.open_zarr(store=mapper, consolidated=True, zarr_format=2)

1.92 s ± 414 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
