Skip to content

Commit

Permalink
Use Zarr v3 stores for all Formats. Use zarr.open for ZarrLocation init
Browse files Browse the repository at this point in the history
  • Loading branch information
will-moore committed Jun 12, 2024
1 parent 767b642 commit d5b37ac
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 61 deletions.
79 changes: 42 additions & 37 deletions ome_zarr/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

from zarr.v2.storage import FSStore

from zarr.store import LocalStore, RemoteStore
from zarr.store import StoreLike, StorePath
from zarr.abc.store import Store

LOGGER = logging.getLogger("ome_zarr.format")

NGFF_URL_0_5 = "https://ngff.openmicroscopy.org/0.5"
Expand Down Expand Up @@ -62,7 +66,7 @@ def matches(self, metadata: dict) -> bool: # pragma: no cover
raise NotImplementedError()

@abstractmethod
def init_store(self, path: str, mode: str = "r") -> FSStore:
def init_store(self, path: str, mode: str = "r") -> Store:
raise NotImplementedError()

# @abstractmethod
Expand Down Expand Up @@ -136,6 +140,7 @@ def matches(self, metadata: dict) -> bool:
LOGGER.debug("%s matches %s?", self.version, version)
return version == self.version

# TODO Fix to return v3 Store
def init_store(self, path: str, mode: str = "r") -> FSStore:
store = FSStore(path, mode=mode, dimension_separator=".")
LOGGER.debug("Created legacy flat FSStore(%s, %s)", path, mode)
Expand Down Expand Up @@ -182,30 +187,51 @@ class FormatV02(FormatV01):
def version(self) -> str:
return "0.2"

def init_store(self, path: str, mode: str = "r") -> FSStore:
# def init_store(self, path: str, mode: str = "r") -> FSStore:
# """
# Not ideal. Stores should remain hidden
# TODO: could also check dimension_separator
# """

# kwargs = {
# "dimension_separator": "/",
# "normalize_keys": False,
# }

# mkdir = True


# if "r" in mode or path.startswith(("http", "s3")):
# # Could be simplified on the fsspec side
# mkdir = False
# if mkdir:
# kwargs["auto_mkdir"] = True

# store = FSStore(
# path,
# mode=mode,
# **kwargs,
# ) # TODO: open issue for using Path
# LOGGER.debug("Created nested FSStore(%s, %s, %s)", path, mode, kwargs)
# return store

def init_store(self, path: str, mode: str = "r") -> Store:
"""
Not ideal. Stores should remain hidden
TODO: could also check dimension_separator
Returns a Zarr v3 PathStore
"""

kwargs = {
"dimension_separator": "/",
"normalize_keys": False,
}
cls = LocalStore
kwargs = {}

mkdir = True
if "r" in mode or path.startswith(("http", "s3")):
# Could be simplified on the fsspec side
mkdir = False
if mkdir:
kwargs["auto_mkdir"] = True
if path.startswith(("http", "s3")):
cls = RemoteStore

store = FSStore(
store = cls(
path,
mode=mode,
**kwargs,
) # TODO: open issue for using Path
LOGGER.debug("Created nested FSStore(%s, %s, %s)", path, mode, kwargs)
print("Created %s store %s(%s, %s, %s)" % (self.version, cls, path, mode, kwargs))
return store


Expand Down Expand Up @@ -358,27 +384,6 @@ def version(self) -> str:
def version_key(self) -> str:
return NGFF_URL_0_5

def init_store(self, path: str, mode: str = "r") -> FSStore:
"""
Returns a Zarr v3 PathStore
"""

from zarr.store import LocalStore, RemoteStore

cls = LocalStore
kwargs = {}

if path.startswith(("http", "s3")):
cls = RemoteStore

store = cls(
path,
mode=mode,
**kwargs,
) # TODO: open issue for using Path
print("Created v0.5 store %s(%s, %s, %s)" % (cls, path, mode, kwargs))
return store

def matches(self, metadata: dict) -> bool:
"""Version 0.5+ defined by version_key (URL)"""
version = self._get_metadata_version(metadata)
Expand Down
57 changes: 38 additions & 19 deletions ome_zarr/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ def __init__(
self.__init_metadata()
print("ZarrLocation init self.__metadata", self.__metadata)
detected = detect_format(self.__metadata, loader)
LOGGER.debug("ZarrLocation.__init__ %s detected: %s", self.__path, detected)
print("ZarrLocation.__init__ %s detected: %s", self.__path, detected)
if detected != self.__fmt:
LOGGER.warning(
print(
"version mismatch: detected: %s, requested: %s", detected, self.__fmt
)
self.__fmt = detected
Expand Down Expand Up @@ -109,18 +109,31 @@ def __init_metadata(self) -> None:
try:
# NB: zarr_format not supported in Group.open() or Array.open() yet
# We want to use zarr_format=None to handle v2 or v3
zarr_group = Group.open(self.__store) #, zarr_format=None)
self.zgroup = zarr_group.metadata.to_dict()
self.__metadata = self.zgroup
except FileNotFoundError:
# group doesn't exist yet, try array
try:
zarr_array = Array.open(self.__store) #, zarr_format=None)
self.zarray = zarr_array.metadata.to_dict()
self.__metadata = self.zarray
except (ValueError, KeyError, FileNotFoundError):
# exceptions raised may change here?
self.__exists = False
print("ZarrLocation __init_metadata: TRY to open group...")
# zarr_group = Group.open(self.__store) #, zarr_format=None)

# NB: If the store is writable, open() will fail IF location doesn't exist because
# zarr v3 will try to create an Array (instead of looking instead for a Group)
# and fails because 'shape' is not provided - see TypeError below.
# NB: we need zarr_format here to open V2 groups
# see https://github.com/zarr-developers/zarr-python/issues/1958
array_or_group = open(store=self.__store, zarr_format=2)
print("ZarrLocation __init metadata array_or_group", array_or_group)

self.__metadata = array_or_group.metadata.to_dict()
if isinstance(array_or_group, Group):
# {'attributes': {'_creator': {'name': 'omero-zarr', 'version': '0.3.1.dev10+geab4dde'}, 'multiscales': [{'name': 'My_i
# Need to "unwrap" the 'attributes' to get group metadata
self.zgroup = self.__metadata["attributes"]
self.__metadata = self.zgroup
else:
self.zarray = self.__metadata
except (ValueError, KeyError, FileNotFoundError):
# exceptions raised may change here?
self.__exists = False
except TypeError:
# open() tried to open_array() but we didn't supply 'shape' argument
self.__exists = False

# self.zarray: JSONDict = await self.get_json(".zarray")
# self.zgroup: JSONDict = await self.get_json(".zgroup")
Expand Down Expand Up @@ -174,7 +187,10 @@ def root_attrs(self) -> JSONDict:

def load(self, subpath: str = "") -> da.core.Array:
"""Use dask.array.from_zarr to load the subpath."""
return da.from_zarr(self.__store, subpath)
# return da.from_zarr(self.__store, subpath)
from zarr import load
# returns zarr Array (no chunks) instead of Dask
return load(store=self.__store, path=subpath)

def __eq__(self, rhs: object) -> bool:
if type(self) is not type(rhs):
Expand All @@ -201,6 +217,7 @@ def create(self, path: str) -> "ZarrLocation":
"""Create a new Zarr location for the given path."""
subpath = self.subpath(path)
LOGGER.debug("open(%s(%s))", self.__class__.__name__, subpath)
print("ZarrLocation.create() subpath", subpath)
return self.__class__(subpath, mode=self.__mode, fmt=self.__fmt)

async def get_json(self, subpath: str) -> JSONDict:
Expand Down Expand Up @@ -254,10 +271,12 @@ def _isfile(self) -> bool:
Return whether the current underlying implementation
points to a local file or not.
"""
return self.__store.fs.protocol == "file" or self.__store.fs.protocol == (
"file",
"local",
)
# TODO: TEMP!
return True
# return self.__store.fs.protocol == "file" or self.__store.fs.protocol == (
# "file",
# "local",
# )

def _ishttp(self) -> bool:
"""
Expand Down
19 changes: 14 additions & 5 deletions ome_zarr/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def matches(zarr: ZarrLocation) -> bool:
def __init__(self, node: Node) -> None:
super().__init__(node)
label_names = self.lookup("labels", [])
print("Labels Spec __init__ label_names", label_names)
for name in label_names:
child_zarr = self.zarr.create(name)
if child_zarr.exists():
Expand Down Expand Up @@ -269,6 +270,7 @@ def __init__(self, node: Node) -> None:
)
if properties:
node.metadata.update({"properties": properties})
print("Label Spec __init__ END")


class Multiscales(Spec):
Expand Down Expand Up @@ -307,25 +309,32 @@ def __init__(self, node: Node) -> None:

for resolution in self.datasets:
data: da.core.Array = self.array(resolution, version)
chunk_sizes = [
str(c[0]) + (" (+ %s)" % c[-1] if c[-1] != c[0] else "")
for c in data.chunks
]
# TODO: TEMP ignore chunks since data is numpy array not Dask Array
# (Dask not working with Zarr v3 just yet)
# chunk_sizes = [
# str(c[0]) + (" (+ %s)" % c[-1] if c[-1] != c[0] else "")
# for c in data.chunks
# ]
LOGGER.info("resolution: %s", resolution)
axes_names = None
if axes is not None:
axes_names = tuple(
axis if isinstance(axis, str) else axis["name"] for axis in axes
)
LOGGER.info(" - shape %s = %s", axes_names, data.shape)
LOGGER.info(" - chunks = %s", chunk_sizes)
# LOGGER.info(" - chunks = %s", chunk_sizes)
LOGGER.info(" - dtype = %s", data.dtype)
node.data.append(data)

# Load possible node data
# When this Multiscales is itself a Labels image, this child_zarr won't exist
# e.g. 6001240.zarr/labels/0/labels doesn't exist
# BUT calling this with zarr v3 fails since
child_zarr = self.zarr.create("labels")
print("Multiscales child_zarr 'labels' exists??", child_zarr, child_zarr.exists())
if child_zarr.exists():
node.add(child_zarr, visibility=False)
print("Multiscales __init__ END")

def array(self, resolution: str, version: str) -> da.core.Array:
# data.shape is (t, c, z, y, x) by convention
Expand Down

0 comments on commit d5b37ac

Please sign in to comment.