Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix IO error reporting #1273

Merged
merged 13 commits into from Jan 4, 2024
Merged
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -5,6 +5,7 @@
# Caches for compiled and downloaded files
__pycache__/
/*cache/
/node_modules/
/data/

# Distribution / packaging
Expand Down
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Expand Up @@ -13,6 +13,8 @@ repos:
rev: v4.0.0-alpha.4
hooks:
- id: prettier
exclude_types:
- markdown
Zethson marked this conversation as resolved.
Show resolved Hide resolved
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
Expand Down
53 changes: 32 additions & 21 deletions anndata/_io/utils.py
@@ -1,20 +1,21 @@
from __future__ import annotations

from functools import wraps
from typing import Callable, Literal
from typing import Callable, Literal, Union, cast
from warnings import warn

import h5py
from packaging.version import Version

from anndata.compat import H5Group, ZarrGroup, add_note

from .._core.sparse_dataset import BaseCompressedSparseDataset
from ..compat import H5Array, H5Group, ZarrArray, ZarrGroup, add_note

# For allowing h5py v3
# https://github.com/scverse/anndata/issues/442
H5PY_V3 = Version(h5py.__version__).major >= 3

Elem = Union[ZarrGroup, ZarrArray, H5Group, H5Array, BaseCompressedSparseDataset]

# -------------------------------------------------------------------------------
# Type conversion
# -------------------------------------------------------------------------------
Expand Down Expand Up @@ -151,31 +152,32 @@ class AnnDataReadError(OSError):
pass


def _get_parent(elem):
def _get_path(elem: Elem) -> str:
try:
import zarr
except ImportError:
zarr = None
if zarr and isinstance(elem, (zarr.Group, zarr.Array)):
parent = elem.store # Not sure how to always get a name out of this
elif isinstance(elem, BaseCompressedSparseDataset):
parent = elem.group.file.name
path = elem.path
if isinstance(elem, BaseCompressedSparseDataset):
path = elem.group.name
else:
parent = elem.file.name
return parent
path = elem.name
return f'/{path.removeprefix("/")}'


def add_key_note(e: BaseException, elem, key, op=Literal["read", "writ"]) -> None:
def add_key_note(
e: BaseException, elem: Elem, path: str, key: str, op: Literal["read", "writ"]
) -> None:
if any(
f"Error raised while {op}ing key" in note
for note in getattr(e, "__notes__", [])
):
return
parent = _get_parent(elem)
add_note(
e,
f"Error raised while {op}ing key {key!r} of {type(elem)} to " f"{parent}",
)

dir = "to" if op == "writ" else "from"
msg = f"Error raised while {op}ing key {key!r} of {type(elem)} {dir} {path}"
add_note(e, msg)


def report_read_key_on_error(func):
Expand All @@ -198,13 +200,18 @@ def func_wrapper(*args, **kwargs):
from anndata._io.specs import Reader

# Figure out signature (method vs function) by going through args
for elem in args:
if not isinstance(elem, Reader):
elem: Elem
for arg in args:
if not isinstance(arg, Reader):
elem = cast(Elem, arg)
break
else:
raise ValueError("No element found in args.")
try:
return func(*args, **kwargs)
except Exception as e:
add_key_note(e, elem, elem.name, "read")
path, key = _get_path(elem).rsplit("/", 1)
add_key_note(e, elem, path or "/", key, "read")
raise

return func_wrapper
Expand All @@ -231,14 +238,18 @@ def func_wrapper(*args, **kwargs):

# Figure out signature (method vs function) by going through args
for i in range(len(args)):
elem = args[i]
arg = args[i]
key = args[i + 1]
if not isinstance(elem, Writer):
if not isinstance(arg, Writer):
elem = cast(Elem, arg)
break
else:
raise ValueError("No element found in args.")
try:
return func(*args, **kwargs)
except Exception as e:
add_key_note(e, elem, key, "writ")
path = _get_path(elem)
add_key_note(e, elem, path, key, "writ")
raise

return func_wrapper
Expand Down
37 changes: 27 additions & 10 deletions anndata/tests/test_io_utils.py
@@ -1,6 +1,7 @@
from __future__ import annotations

from contextlib import suppress
from contextlib import AbstractContextManager, suppress
from typing import TYPE_CHECKING

import h5py
import pandas as pd
Expand All @@ -9,13 +10,15 @@

import anndata as ad
from anndata._io.specs.registry import IORegistryError
from anndata._io.utils import (
report_read_key_on_error,
)
from anndata._io.utils import report_read_key_on_error
from anndata.compat import _clean_uns
from anndata.experimental import read_elem, write_elem
from anndata.tests.helpers import pytest_8_raises

if TYPE_CHECKING:
from collections.abc import Callable
from pathlib import Path


@pytest.fixture(params=["h5ad", "zarr"])
def diskfmt(request):
Expand All @@ -29,20 +32,32 @@ def diskfmt(request):
pytest.param(lambda p: h5py.File(p / "test.h5", mode="a"), id="h5py"),
],
)
def test_key_error(tmp_path, group_fn):
@pytest.mark.parametrize("nested", [True, False], ids=["nested", "root"])
def test_key_error(
tmp_path, group_fn: Callable[[Path], zarr.Group | h5py.Group], nested: bool
):
@report_read_key_on_error
def read_attr(_):
raise NotImplementedError()

group = group_fn(tmp_path)
with group if hasattr(group, "__enter__") else suppress():
with group if isinstance(group, AbstractContextManager) else suppress():
if nested:
group = group.create_group("nested")
path = "/nested"
else:
path = "/"
group["X"] = [1, 2, 3]
group.create_group("group")

with pytest_8_raises(NotImplementedError, match=r"/X"):
with pytest_8_raises(
NotImplementedError, match=rf"reading key 'X'.*from {path}$"
):
read_attr(group["X"])

with pytest_8_raises(NotImplementedError, match=r"/group"):
with pytest_8_raises(
NotImplementedError, match=rf"reading key 'group'.*from {path}$"
):
read_attr(group["group"])


Expand All @@ -53,7 +68,9 @@ def test_write_error_info(diskfmt, tmp_path):
# Assuming we don't define a writer for tuples
a = ad.AnnData(uns={"a": {"b": {"c": (1, 2, 3)}}})

with pytest_8_raises(IORegistryError, match=r"Error raised while writing key 'c'"):
with pytest_8_raises(
IORegistryError, match=r"Error raised while writing key 'c'.*to /uns/a/b"
):
write(a)


Expand Down Expand Up @@ -89,7 +106,7 @@ class Foo:
# (?!...) is a negative lookahead
# (?s) enables the dot to match newlines
# https://stackoverflow.com/a/406408/130164 <- copilot suggested lol
pattern = r"(?s)((?!Error raised while writing key '/?a').)*$"
pattern = r"(?s)^((?!Error raised while writing key '/?a').)*$"

with pytest_8_raises(IORegistryError, match=pattern):
write_elem(group, "/", {"a": {"b": Foo()}})
Expand Down
1 change: 1 addition & 0 deletions docs/release-notes/0.10.4.md
Expand Up @@ -7,6 +7,7 @@
* `adata[:, []]` now returns an `AnnData` object empty on the appropriate dimensions instead of erroring {pr}`1243` {user}`ilan-gold`
* `adata.X[mask]` works in newer `numpy` versions when `X` is `backed` {pr}`1255` {user}`ilan-gold`
* `adata.X[...]` fixed for `X` as a `BaseCompressedSparseDataset` with `zarr` backend {pr}`1265` {user}`ilan-gold`
* Improve read/write error reporting {pr}`1273` {user}`flying-sheep`

```{rubric} Documentation
```
Expand Down