From f0a5deacaf17654d43287736d5d928d2d9c9e4f4 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Wed, 20 Dec 2023 12:24:31 +0100 Subject: [PATCH 01/12] Fix error reporting --- anndata/_io/utils.py | 31 ++++++++++++++++++------------- anndata/tests/helpers.py | 3 ++- anndata/tests/test_io_utils.py | 4 +++- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/anndata/_io/utils.py b/anndata/_io/utils.py index cd90be473..3d6d64edc 100644 --- a/anndata/_io/utils.py +++ b/anndata/_io/utils.py @@ -7,7 +7,7 @@ import h5py from packaging.version import Version -from anndata.compat import H5Group, ZarrGroup, add_note +from anndata.compat import H5Array, H5Group, ZarrArray, ZarrGroup, add_note from .._core.sparse_dataset import BaseCompressedSparseDataset @@ -151,31 +151,36 @@ class AnnDataReadError(OSError): pass -def _get_parent(elem): +def _get_path( + elem: ZarrGroup | ZarrArray | H5Group | H5Array | BaseCompressedSparseDataset, +) -> str: try: import zarr except ImportError: zarr = None if zarr and isinstance(elem, (zarr.Group, zarr.Array)): - parent = elem.store # Not sure how to always get a name out of this - elif isinstance(elem, BaseCompressedSparseDataset): - parent = elem.group.file.name + path = elem.path + if isinstance(elem, BaseCompressedSparseDataset): + path = elem.group.name else: - parent = elem.file.name - return parent + path = elem.name + return f'/{path.removeprefix("/")}' -def add_key_note(e: BaseException, elem, key, op=Literal["read", "writ"]) -> None: +def add_key_note( + e: BaseException, + elem: ZarrGroup | ZarrArray | H5Group | H5Array | BaseCompressedSparseDataset, + key: str, + op: Literal["read", "writ"], +) -> None: if any( f"Error raised while {op}ing key" in note for note in getattr(e, "__notes__", []) ): return - parent = _get_parent(elem) - add_note( - e, - f"Error raised while {op}ing key {key!r} of {type(elem)} to " f"{parent}", - ) + + msg = f"Error raised while {op}ing key {key!r} of {type(elem)} to {_get_path(elem)}" + add_note(e, msg) def report_read_key_on_error(func): diff --git a/anndata/tests/helpers.py b/anndata/tests/helpers.py index 316fc991e..fc4b57c80 100644 --- a/anndata/tests/helpers.py +++ b/anndata/tests/helpers.py @@ -651,7 +651,8 @@ def check_error_or_notes_match(e: pytest.ExceptionInfo, pattern: str | re.Patter import traceback message = "".join(traceback.format_exception_only(e.type, e.value)) - assert re.search( + # To match pytest semantics, this must be `match`, not `search`. + assert re.match( pattern, message ), f"Could not find pattern: '{pattern}' in error:\n\n{message}\n" diff --git a/anndata/tests/test_io_utils.py b/anndata/tests/test_io_utils.py index c70091474..df5d70c84 100644 --- a/anndata/tests/test_io_utils.py +++ b/anndata/tests/test_io_utils.py @@ -53,7 +53,9 @@ def test_write_error_info(diskfmt, tmp_path): # Assuming we don't define a writer for tuples a = ad.AnnData(uns={"a": {"b": {"c": (1, 2, 3)}}}) - with pytest_8_raises(IORegistryError, match=r"Error raised while writing key 'c'"): + with pytest_8_raises( + IORegistryError, match=r"Error raised while writing key 'c'.*to /uns/a/b" + ): write(a) From 143141ab161ed2567bf2ae5390d8bc85815b77fd Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Wed, 20 Dec 2023 12:38:43 +0100 Subject: [PATCH 02/12] correct fix --- anndata/tests/helpers.py | 3 +-- anndata/tests/test_io_utils.py | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/anndata/tests/helpers.py b/anndata/tests/helpers.py index fc4b57c80..316fc991e 100644 --- a/anndata/tests/helpers.py +++ b/anndata/tests/helpers.py @@ -651,8 +651,7 @@ def check_error_or_notes_match(e: pytest.ExceptionInfo, pattern: str | re.Patter import traceback message = "".join(traceback.format_exception_only(e.type, e.value)) - # To match pytest semantics, this must be `match`, not `search`. - assert re.match( + assert re.search( pattern, message ), f"Could not find pattern: '{pattern}' in error:\n\n{message}\n" diff --git a/anndata/tests/test_io_utils.py b/anndata/tests/test_io_utils.py index df5d70c84..453beaccd 100644 --- a/anndata/tests/test_io_utils.py +++ b/anndata/tests/test_io_utils.py @@ -39,10 +39,10 @@ def read_attr(_): group["X"] = [1, 2, 3] group.create_group("group") - with pytest_8_raises(NotImplementedError, match=r"/X"): + with pytest_8_raises(NotImplementedError, match=r".*/X$"): read_attr(group["X"]) - with pytest_8_raises(NotImplementedError, match=r"/group"): + with pytest_8_raises(NotImplementedError, match=r".*/group$"): read_attr(group["group"]) @@ -91,7 +91,7 @@ class Foo: # (?!...) is a negative lookahead # (?s) enables the dot to match newlines # https://stackoverflow.com/a/406408/130164 <- copilot suggested lol - pattern = r"(?s)((?!Error raised while writing key '/?a').)*$" + pattern = r"(?s)^((?!Error raised while writing key '/?a').)*$" with pytest_8_raises(IORegistryError, match=pattern): write_elem(group, "/", {"a": {"b": Foo()}}) From b4961021ee32f28c32278c923211ca9aef8403d5 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Wed, 20 Dec 2023 17:05:47 +0100 Subject: [PATCH 03/12] Fix read --- .gitignore | 1 + anndata/_io/utils.py | 40 +++++++++++++++++++--------------- anndata/tests/test_io_utils.py | 31 +++++++++++++++++++------- 3 files changed, 47 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index dded609a6..88f0e90c3 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ # Caches for compiled and downloaded files __pycache__/ /*cache/ +/node_modules/ /data/ # Distribution / packaging diff --git a/anndata/_io/utils.py b/anndata/_io/utils.py index 3d6d64edc..4c9c41791 100644 --- a/anndata/_io/utils.py +++ b/anndata/_io/utils.py @@ -1,20 +1,21 @@ from __future__ import annotations from functools import wraps -from typing import Callable, Literal +from typing import Callable, Literal, Union, cast from warnings import warn import h5py from packaging.version import Version -from anndata.compat import H5Array, H5Group, ZarrArray, ZarrGroup, add_note - from .._core.sparse_dataset import BaseCompressedSparseDataset +from ..compat import H5Array, H5Group, ZarrArray, ZarrGroup, add_note # For allowing h5py v3 # https://github.com/scverse/anndata/issues/442 H5PY_V3 = Version(h5py.__version__).major >= 3 +Elem = Union[ZarrGroup, ZarrArray, H5Group, H5Array, BaseCompressedSparseDataset] + # ------------------------------------------------------------------------------- # Type conversion # ------------------------------------------------------------------------------- @@ -151,9 +152,7 @@ class AnnDataReadError(OSError): pass -def _get_path( - elem: ZarrGroup | ZarrArray | H5Group | H5Array | BaseCompressedSparseDataset, -) -> str: +def _get_path(elem: Elem) -> str: try: import zarr except ImportError: @@ -168,10 +167,7 @@ def _get_path( def add_key_note( - e: BaseException, - elem: ZarrGroup | ZarrArray | H5Group | H5Array | BaseCompressedSparseDataset, - key: str, - op: Literal["read", "writ"], + e: BaseException, elem: Elem, path: str, key: str, op: Literal["read", "writ"] ) -> None: if any( f"Error raised while {op}ing key" in note @@ -179,7 +175,8 @@ def add_key_note( ): return - msg = f"Error raised while {op}ing key {key!r} of {type(elem)} to {_get_path(elem)}" + dir = "to" if op == "writ" else "from" + msg = f"Error raised while {op}ing key {key!r} of {type(elem)} {dir} {path}" add_note(e, msg) @@ -203,13 +200,18 @@ def func_wrapper(*args, **kwargs): from anndata._io.specs import Reader # Figure out signature (method vs function) by going through args - for elem in args: - if not isinstance(elem, Reader): + elem: Elem + for arg in args: + if not isinstance(arg, Reader): + elem = cast(Elem, arg) break + else: + raise ValueError("No element found in args.") try: return func(*args, **kwargs) except Exception as e: - add_key_note(e, elem, elem.name, "read") + path, key = _get_path(elem).rsplit("/", 1) + add_key_note(e, elem, path or "/", key, "read") raise return func_wrapper @@ -236,14 +238,18 @@ def func_wrapper(*args, **kwargs): # Figure out signature (method vs function) by going through args for i in range(len(args)): - elem = args[i] + arg = args[i] key = args[i + 1] - if not isinstance(elem, Writer): + if not isinstance(arg, Writer): + elem = cast(Elem, arg) break + else: + raise ValueError("No element found in args.") try: return func(*args, **kwargs) except Exception as e: - add_key_note(e, elem, key, "writ") + path = _get_path(elem) + add_key_note(e, elem, path, key, "writ") raise return func_wrapper diff --git a/anndata/tests/test_io_utils.py b/anndata/tests/test_io_utils.py index 453beaccd..803a4ad72 100644 --- a/anndata/tests/test_io_utils.py +++ b/anndata/tests/test_io_utils.py @@ -1,6 +1,7 @@ from __future__ import annotations -from contextlib import suppress +from contextlib import AbstractContextManager, suppress +from typing import TYPE_CHECKING import h5py import pandas as pd @@ -9,13 +10,15 @@ import anndata as ad from anndata._io.specs.registry import IORegistryError -from anndata._io.utils import ( - report_read_key_on_error, -) +from anndata._io.utils import report_read_key_on_error from anndata.compat import _clean_uns from anndata.experimental import read_elem, write_elem from anndata.tests.helpers import pytest_8_raises +if TYPE_CHECKING: + from collections.abc import Callable + from pathlib import Path + @pytest.fixture(params=["h5ad", "zarr"]) def diskfmt(request): @@ -29,20 +32,32 @@ def diskfmt(request): pytest.param(lambda p: h5py.File(p / "test.h5", mode="a"), id="h5py"), ], ) -def test_key_error(tmp_path, group_fn): +@pytest.mark.parametrize("nested", [True, False], ids=["nested", "root"]) +def test_key_error( + tmp_path, group_fn: Callable[[Path], zarr.Group | h5py.Group], nested: bool +): @report_read_key_on_error def read_attr(_): raise NotImplementedError() group = group_fn(tmp_path) - with group if hasattr(group, "__enter__") else suppress(): + with group if isinstance(group, AbstractContextManager) else suppress(): + if nested: + group = group.create_group("nested") + path = "/nested" + else: + path = "/" group["X"] = [1, 2, 3] group.create_group("group") - with pytest_8_raises(NotImplementedError, match=r".*/X$"): + with pytest_8_raises( + NotImplementedError, match=rf"reading key 'X'.*from {path}$" + ): read_attr(group["X"]) - with pytest_8_raises(NotImplementedError, match=r".*/group$"): + with pytest_8_raises( + NotImplementedError, match=rf"reading key 'group'.*from {path}$" + ): read_attr(group["group"]) From 05321aca0a008afcc66ef62cfed4b7214550eeb4 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Wed, 20 Dec 2023 17:15:56 +0100 Subject: [PATCH 04/12] relnotes --- .pre-commit-config.yaml | 2 ++ docs/release-notes/0.10.4.md | 1 + 2 files changed, 3 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 67ab9d1f4..16fefdf71 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,6 +13,8 @@ repos: rev: v4.0.0-alpha.4 hooks: - id: prettier + exclude_types: + - markdown - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 hooks: diff --git a/docs/release-notes/0.10.4.md b/docs/release-notes/0.10.4.md index 2739f5c7f..ee1560ebe 100644 --- a/docs/release-notes/0.10.4.md +++ b/docs/release-notes/0.10.4.md @@ -7,6 +7,7 @@ * `adata[:, []]` now returns an `AnnData` object empty on the appropriate dimensions instead of erroring {pr}`1243` {user}`ilan-gold` * `adata.X[mask]` works in newer `numpy` versions when `X` is `backed` {pr}`1255` {user}`ilan-gold` * `adata.X[...]` fixed for `X` as a `BaseCompressedSparseDataset` with `zarr` backend {pr}`1265` {user}`ilan-gold` +* Improve read/write error reporting {pr}`1273` {user}`flying-sheep` ```{rubric} Documentation ``` From 2ffc11af59d5a97e1758d786fa6b554e1d764818 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 2 Jan 2024 14:49:35 +0100 Subject: [PATCH 05/12] Fix test regex --- anndata/tests/test_readwrite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anndata/tests/test_readwrite.py b/anndata/tests/test_readwrite.py index 22b1aaffc..67341fe57 100644 --- a/anndata/tests/test_readwrite.py +++ b/anndata/tests/test_readwrite.py @@ -277,7 +277,7 @@ def test_read_full_io_error(tmp_path, name, read, write): store["obs"].attrs["encoding-type"] = "invalid" with pytest_8_raises( IORegistryError, - match=r"raised while reading key '/obs'", + match=r"raised while reading key 'obs'.*from /$", ) as exc_info: read(path) assert re.search( From 36c8e2166c1e1e5bc3fac6485ac22e3074d8916b Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 2 Jan 2024 15:07:56 +0100 Subject: [PATCH 06/12] more defensive --- anndata/_core/anndata.py | 2 +- anndata/_io/utils.py | 18 ++++++------------ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py index b37e33177..a2b010bdf 100644 --- a/anndata/_core/anndata.py +++ b/anndata/_core/anndata.py @@ -74,7 +74,7 @@ class StorageType(Enum): DaskArray = DaskArray CupyArray = CupyArray CupySparseMatrix = CupySparseMatrix - BackedSparseMAtrix = BaseCompressedSparseDataset + BackedSparseMatrix = BaseCompressedSparseDataset @classmethod def classes(cls): diff --git a/anndata/_io/utils.py b/anndata/_io/utils.py index 4c9c41791..0429137cf 100644 --- a/anndata/_io/utils.py +++ b/anndata/_io/utils.py @@ -1,7 +1,7 @@ from __future__ import annotations from functools import wraps -from typing import Callable, Literal, Union, cast +from typing import TYPE_CHECKING, Callable, Literal, Union, cast from warnings import warn import h5py @@ -10,12 +10,13 @@ from .._core.sparse_dataset import BaseCompressedSparseDataset from ..compat import H5Array, H5Group, ZarrArray, ZarrGroup, add_note +if TYPE_CHECKING: + Elem = Union[ZarrGroup, ZarrArray, H5Group, H5Array, BaseCompressedSparseDataset] + # For allowing h5py v3 # https://github.com/scverse/anndata/issues/442 H5PY_V3 = Version(h5py.__version__).major >= 3 -Elem = Union[ZarrGroup, ZarrArray, H5Group, H5Array, BaseCompressedSparseDataset] - # ------------------------------------------------------------------------------- # Type conversion # ------------------------------------------------------------------------------- @@ -153,16 +154,9 @@ class AnnDataReadError(OSError): def _get_path(elem: Elem) -> str: - try: - import zarr - except ImportError: - zarr = None - if zarr and isinstance(elem, (zarr.Group, zarr.Array)): - path = elem.path if isinstance(elem, BaseCompressedSparseDataset): - path = elem.group.name - else: - path = elem.name + elem = elem.group + path = elem.name or "??" # can be None return f'/{path.removeprefix("/")}' From 34b445c6b24c18470dfbf8aad1c31091b11a7cd8 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 2 Jan 2024 15:36:16 +0100 Subject: [PATCH 07/12] Fix tests --- anndata/_io/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/anndata/_io/utils.py b/anndata/_io/utils.py index 0429137cf..651b7293c 100644 --- a/anndata/_io/utils.py +++ b/anndata/_io/utils.py @@ -154,6 +154,7 @@ class AnnDataReadError(OSError): def _get_path(elem: Elem) -> str: + """Return an absolute path of an element (always starts with “/”).""" if isinstance(elem, BaseCompressedSparseDataset): elem = elem.group path = elem.name or "??" # can be None @@ -197,7 +198,7 @@ def func_wrapper(*args, **kwargs): elem: Elem for arg in args: if not isinstance(arg, Reader): - elem = cast(Elem, arg) + elem = cast("Elem", arg) break else: raise ValueError("No element found in args.") @@ -235,7 +236,7 @@ def func_wrapper(*args, **kwargs): arg = args[i] key = args[i + 1] if not isinstance(arg, Writer): - elem = cast(Elem, arg) + elem = cast("Elem", arg) break else: raise ValueError("No element found in args.") From 9dad3498bb9aa6113c78c451c405cfcb9fb555d4 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 2 Jan 2024 18:05:07 +0100 Subject: [PATCH 08/12] Pytest 8.0.0rc1 --- anndata/tests/test_awkward.py | 13 +++++-------- anndata/tests/test_io_warnings.py | 4 +++- pyproject.toml | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/anndata/tests/test_awkward.py b/anndata/tests/test_awkward.py index 9e780c8a8..2e996bfc6 100644 --- a/anndata/tests/test_awkward.py +++ b/anndata/tests/test_awkward.py @@ -1,7 +1,7 @@ """Tests related to awkward arrays""" from __future__ import annotations -from contextlib import nullcontext +import warnings import numpy as np import numpy.testing as npt @@ -381,15 +381,12 @@ def test_concat_mixed_types(key, arrays, expected, join): to_concat.append(tmp_adata) if isinstance(expected, type) and issubclass(expected, Exception): - ctx = ( - pytest.warns( + with pytest.raises(expected), warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + r"The behavior of DataFrame concatenation with empty or all-NA entries is deprecated", FutureWarning, - match=r"The behavior of DataFrame concatenation with empty or all-NA entries is deprecated", ) - if any(df.empty for df in arrays if isinstance(df, pd.DataFrame)) - else nullcontext() - ) - with pytest.raises(expected), ctx: anndata.concat(to_concat, axis=axis, join=join) else: result_adata = anndata.concat(to_concat, axis=axis, join=join) diff --git a/anndata/tests/test_io_warnings.py b/anndata/tests/test_io_warnings.py index ac704c249..c6a089158 100644 --- a/anndata/tests/test_io_warnings.py +++ b/anndata/tests/test_io_warnings.py @@ -14,7 +14,9 @@ def test_old_format_warning_thrown(): import scanpy as sc - with pytest.warns(ad._warnings.OldFormatWarning): + with pytest.warns(ad._warnings.OldFormatWarning), pytest.warns( + FutureWarning, match=r"Moving element from \.uns.*to \.obsp" + ): pth = Path(sc.datasets.__file__).parent / "10x_pbmc68k_reduced.h5ad" ad.read_h5ad(pth) diff --git a/pyproject.toml b/pyproject.toml index c705aff43..ffb275bde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,7 +80,7 @@ doc = [ ] test = [ "loompy>=3.0.5", - "pytest>=6.0", + "pytest >=6.0, !=8.0.0rc1", # https://github.com/pytest-dev/pytest/issues/11759 "pytest-cov>=2.10", "zarr", "matplotlib", From 7f5a02738077346c7c1dad1cac83de9b2de9c32d Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 2 Jan 2024 18:16:08 +0100 Subject: [PATCH 09/12] hm --- anndata/tests/test_io_warnings.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/anndata/tests/test_io_warnings.py b/anndata/tests/test_io_warnings.py index c6a089158..ac704c249 100644 --- a/anndata/tests/test_io_warnings.py +++ b/anndata/tests/test_io_warnings.py @@ -14,9 +14,7 @@ def test_old_format_warning_thrown(): import scanpy as sc - with pytest.warns(ad._warnings.OldFormatWarning), pytest.warns( - FutureWarning, match=r"Moving element from \.uns.*to \.obsp" - ): + with pytest.warns(ad._warnings.OldFormatWarning): pth = Path(sc.datasets.__file__).parent / "10x_pbmc68k_reduced.h5ad" ad.read_h5ad(pth) From a70fc16b259bf9d0685198009100b5a3bee3584d Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 4 Jan 2024 13:44:48 +0100 Subject: [PATCH 10/12] clarifications --- anndata/_io/h5ad.py | 17 +++++--- anndata/_io/specs/registry.py | 81 ++++++++++++++++------------------- anndata/_io/utils.py | 36 ++++++++-------- anndata/_io/zarr.py | 2 +- 4 files changed, 66 insertions(+), 70 deletions(-) diff --git a/anndata/_io/h5ad.py b/anndata/_io/h5ad.py index 337f13f92..faacdab6e 100644 --- a/anndata/_io/h5ad.py +++ b/anndata/_io/h5ad.py @@ -112,7 +112,12 @@ def write_h5ad( @report_write_key_on_error @write_spec(IOSpec("array", "0.2.0")) -def write_sparse_as_dense(f, key, value, dataset_kwargs=MappingProxyType({})): +def write_sparse_as_dense( + f: h5py.Group, + key: str, + value: sparse.spmatrix | BaseCompressedSparseDataset, + dataset_kwargs=MappingProxyType({}), +): real_key = None # Flag for if temporary key was used if key in f: if isinstance(value, BaseCompressedSparseDataset) and ( @@ -269,7 +274,7 @@ def callback(func, elem_name: str, elem, iospec): def _read_raw( f: h5py.File | AnnDataFileManager, as_sparse: Collection[str] = (), - rdasp: Callable[[h5py.Dataset], sparse.spmatrix] = None, + rdasp: Callable[[h5py.Dataset], sparse.spmatrix] | None = None, *, attrs: Collection[str] = ("X", "var", "varm"), ) -> dict: @@ -286,7 +291,7 @@ def _read_raw( @report_read_key_on_error -def read_dataframe_legacy(dataset) -> pd.DataFrame: +def read_dataframe_legacy(dataset: h5py.Dataset) -> pd.DataFrame: """Read pre-anndata 0.7 dataframes.""" warn( f"'{dataset.name}' was written with a very old version of AnnData. " @@ -305,7 +310,7 @@ def read_dataframe_legacy(dataset) -> pd.DataFrame: return df -def read_dataframe(group) -> pd.DataFrame: +def read_dataframe(group: h5py.Group | h5py.Dataset) -> pd.DataFrame: """Backwards compat function""" if not isinstance(group, h5py.Group): return read_dataframe_legacy(group) @@ -352,7 +357,7 @@ def read_dense_as_sparse( raise ValueError(f"Cannot read dense array as type: {sparse_format}") -def read_dense_as_csr(dataset, axis_chunk=6000): +def read_dense_as_csr(dataset: h5py.Dataset, axis_chunk: int = 6000): sub_matrices = [] for idx in idx_chunks_along_axis(dataset.shape, 0, axis_chunk): dense_chunk = dataset[idx] @@ -361,7 +366,7 @@ def read_dense_as_csr(dataset, axis_chunk=6000): return sparse.vstack(sub_matrices, format="csr") -def read_dense_as_csc(dataset, axis_chunk=6000): +def read_dense_as_csc(dataset: h5py.Dataset, axis_chunk: int = 6000): sub_matrices = [] for idx in idx_chunks_along_axis(dataset.shape, 1, axis_chunk): sub_matrix = sparse.csc_matrix(dataset[idx]) diff --git a/anndata/_io/specs/registry.py b/anndata/_io/specs/registry.py index 1f0b137f4..a8357295d 100644 --- a/anndata/_io/specs/registry.py +++ b/anndata/_io/specs/registry.py @@ -1,6 +1,6 @@ from __future__ import annotations -from collections.abc import Callable, Iterable, Mapping +from collections.abc import Mapping from dataclasses import dataclass from functools import singledispatch, wraps from types import MappingProxyType @@ -10,12 +10,13 @@ from anndata.compat import _read_attr if TYPE_CHECKING: + from collections.abc import Callable, Generator, Iterable + from anndata._types import GroupStorageType, StorageType + # TODO: This probably should be replaced by a hashable Mapping due to conversion b/w "_" and "-" # TODO: Should filetype be included in the IOSpec if it changes the encoding? Or does the intent that these things be "the same" overrule that? - - @dataclass(frozen=True) class IOSpec: encoding_type: str @@ -25,7 +26,9 @@ class IOSpec: # TODO: Should this subclass from LookupError? class IORegistryError(Exception): @classmethod - def _from_write_parts(cls, dest_type, typ, modifiers) -> IORegistryError: + def _from_write_parts( + cls, dest_type: type, typ: type, modifiers: frozenset[str] + ) -> IORegistryError: msg = f"No method registered for writing {typ} into {dest_type}" if modifiers: msg += f" with {modifiers}" @@ -36,7 +39,7 @@ def _from_read_parts( cls, method: str, registry: Mapping, - src_typ: StorageType, + src_typ: type[StorageType], spec: IOSpec, ) -> IORegistryError: # TODO: Improve error message if type exists, but version does not @@ -50,7 +53,7 @@ def _from_read_parts( def write_spec(spec: IOSpec): def decorator(func: Callable): @wraps(func) - def wrapper(g, k, *args, **kwargs): + def wrapper(g: GroupStorageType, k: str, *args, **kwargs): result = func(g, k, *args, **kwargs) g[k].attrs.setdefault("encoding-type", spec.encoding_type) g[k].attrs.setdefault("encoding-version", spec.encoding_version) @@ -193,12 +196,12 @@ def proc_spec(spec) -> IOSpec: @proc_spec.register(IOSpec) -def proc_spec_spec(spec) -> IOSpec: +def proc_spec_spec(spec: IOSpec) -> IOSpec: return spec @proc_spec.register(Mapping) -def proc_spec_mapping(spec) -> IOSpec: +def proc_spec_mapping(spec: Mapping[str, str]) -> IOSpec: return IOSpec(**{k.replace("-", "_"): v for k, v in spec.items()}) @@ -213,7 +216,9 @@ def get_spec( ) -def _iter_patterns(elem): +def _iter_patterns( + elem, +) -> Generator[tuple[type, type | str] | tuple[type, type, str], None, None]: """Iterates over possible patterns for an element in order of precedence.""" from anndata.compat import DaskArray @@ -236,40 +241,27 @@ def __init__(self, registry: IORegistry, callback: Callable | None = None) -> No def read_elem( self, elem: StorageType, - modifiers: frozenset(str) = frozenset(), + modifiers: frozenset[str] = frozenset(), ) -> Any: """Read an element from a store. See exported function for more details.""" from functools import partial - read_func = self.registry.get_reader( - type(elem), get_spec(elem), frozenset(modifiers) + iospec = get_spec(elem) + read_func = partial( + self.registry.get_reader(type(elem), iospec, modifiers), + _reader=self, ) - read_func = partial(read_func, _reader=self) - if self.callback is not None: - return self.callback(read_func, elem.name, elem, iospec=get_spec(elem)) - else: + if self.callback is None: return read_func(elem) + return self.callback(read_func, elem.name, elem, iospec=iospec) class Writer: - def __init__( - self, - registry: IORegistry, - callback: Callable[ - [ - GroupStorageType, - str, - StorageType, - dict, - ], - None, - ] - | None = None, - ): + def __init__(self, registry: IORegistry, callback: Callable | None = None): self.registry = registry self.callback = callback - def find_writer(self, dest_type, elem, modifiers): + def find_writer(self, dest_type: type, elem, modifiers: frozenset[str]): for pattern in _iter_patterns(elem): if self.registry.has_writer(dest_type, pattern, modifiers): return self.registry.get_writer(dest_type, pattern, modifiers) @@ -281,10 +273,10 @@ def write_elem( self, store: GroupStorageType, k: str, - elem, + elem: Any, *, - dataset_kwargs=MappingProxyType({}), - modifiers=frozenset(), + dataset_kwargs: Mapping[str, Any] = MappingProxyType({}), + modifiers: frozenset[str] = frozenset(), ): from functools import partial from pathlib import PurePosixPath @@ -313,17 +305,16 @@ def write_elem( _writer=self, ) - if self.callback is not None: - return self.callback( - write_func, - store, - k, - elem, - dataset_kwargs=dataset_kwargs, - iospec=self.registry.get_spec(elem), - ) - else: + if self.callback is None: return write_func(store, k, elem, dataset_kwargs=dataset_kwargs) + return self.callback( + write_func, + store, + k, + elem, + dataset_kwargs=dataset_kwargs, + iospec=self.registry.get_spec(elem), + ) def read_elem(elem: StorageType) -> Any: @@ -346,7 +337,7 @@ def write_elem( k: str, elem: Any, *, - dataset_kwargs: Mapping = MappingProxyType({}), + dataset_kwargs: Mapping[str, Any] = MappingProxyType({}), ) -> None: """ Write an element to a storage group using anndata encoding. diff --git a/anndata/_io/utils.py b/anndata/_io/utils.py index 651b7293c..ed5d4673b 100644 --- a/anndata/_io/utils.py +++ b/anndata/_io/utils.py @@ -1,6 +1,7 @@ from __future__ import annotations from functools import wraps +from itertools import pairwise from typing import TYPE_CHECKING, Callable, Literal, Union, cast from warnings import warn @@ -8,10 +9,12 @@ from packaging.version import Version from .._core.sparse_dataset import BaseCompressedSparseDataset -from ..compat import H5Array, H5Group, ZarrArray, ZarrGroup, add_note +from ..compat import H5Group, ZarrGroup, add_note if TYPE_CHECKING: - Elem = Union[ZarrGroup, ZarrArray, H5Group, H5Array, BaseCompressedSparseDataset] + from .._types import StorageType + + Storage = Union[StorageType, BaseCompressedSparseDataset] # For allowing h5py v3 # https://github.com/scverse/anndata/issues/442 @@ -153,16 +156,16 @@ class AnnDataReadError(OSError): pass -def _get_path(elem: Elem) -> str: +def _get_display_path(store: Storage) -> str: """Return an absolute path of an element (always starts with “/”).""" - if isinstance(elem, BaseCompressedSparseDataset): - elem = elem.group - path = elem.name or "??" # can be None + if isinstance(store, BaseCompressedSparseDataset): + store = store.group + path = store.name or "??" # can be None return f'/{path.removeprefix("/")}' def add_key_note( - e: BaseException, elem: Elem, path: str, key: str, op: Literal["read", "writ"] + e: BaseException, store: Storage, path: str, key: str, op: Literal["read", "writ"] ) -> None: if any( f"Error raised while {op}ing key" in note @@ -171,7 +174,7 @@ def add_key_note( return dir = "to" if op == "writ" else "from" - msg = f"Error raised while {op}ing key {key!r} of {type(elem)} {dir} {path}" + msg = f"Error raised while {op}ing key {key!r} of {type(store)} {dir} {path}" add_note(e, msg) @@ -195,18 +198,17 @@ def func_wrapper(*args, **kwargs): from anndata._io.specs import Reader # Figure out signature (method vs function) by going through args - elem: Elem for arg in args: if not isinstance(arg, Reader): - elem = cast("Elem", arg) + store = cast("Storage", arg) break else: raise ValueError("No element found in args.") try: return func(*args, **kwargs) except Exception as e: - path, key = _get_path(elem).rsplit("/", 1) - add_key_note(e, elem, path or "/", key, "read") + path, key = _get_display_path(store).rsplit("/", 1) + add_key_note(e, store, path or "/", key, "read") raise return func_wrapper @@ -232,19 +234,17 @@ def func_wrapper(*args, **kwargs): from anndata._io.specs import Writer # Figure out signature (method vs function) by going through args - for i in range(len(args)): - arg = args[i] - key = args[i + 1] + for arg, key in pairwise(args): if not isinstance(arg, Writer): - elem = cast("Elem", arg) + store = cast("Storage", arg) break else: raise ValueError("No element found in args.") try: return func(*args, **kwargs) except Exception as e: - path = _get_path(elem) - add_key_note(e, elem, path, key, "writ") + path = _get_display_path(store) + add_key_note(e, store, path, key, "writ") raise return func_wrapper diff --git a/anndata/_io/zarr.py b/anndata/_io/zarr.py index 00f9766f0..864475848 100644 --- a/anndata/_io/zarr.py +++ b/anndata/_io/zarr.py @@ -133,7 +133,7 @@ def read_dataframe_legacy(dataset: zarr.Array) -> pd.DataFrame: @report_read_key_on_error -def read_dataframe(group) -> pd.DataFrame: +def read_dataframe(group: zarr.Group | zarr.Array) -> pd.DataFrame: # Fast paths if isinstance(group, zarr.Array): return read_dataframe_legacy(group) From e68f268fb2ced5f9f6083218cc2c2d72ab2dc1e3 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 4 Jan 2024 13:47:49 +0100 Subject: [PATCH 11/12] more types --- anndata/_io/h5ad.py | 4 +++- anndata/experimental/_dispatch_io.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/anndata/_io/h5ad.py b/anndata/_io/h5ad.py index faacdab6e..5f31da04a 100644 --- a/anndata/_io/h5ad.py +++ b/anndata/_io/h5ad.py @@ -6,6 +6,7 @@ from types import MappingProxyType from typing import ( TYPE_CHECKING, + Any, Callable, Literal, TypeVar, @@ -116,7 +117,8 @@ def write_sparse_as_dense( f: h5py.Group, key: str, value: sparse.spmatrix | BaseCompressedSparseDataset, - dataset_kwargs=MappingProxyType({}), + *, + dataset_kwargs: Mapping[str, Any] = MappingProxyType({}), ): real_key = None # Flag for if temporary key was used if key in f: diff --git a/anndata/experimental/_dispatch_io.py b/anndata/experimental/_dispatch_io.py index 4df4d417a..2a399d540 100644 --- a/anndata/experimental/_dispatch_io.py +++ b/anndata/experimental/_dispatch_io.py @@ -4,6 +4,8 @@ from typing import TYPE_CHECKING, Any, Callable if TYPE_CHECKING: + from collections.abc import Mapping + from anndata._io.specs import IOSpec from anndata._types import GroupStorageType, StorageType @@ -55,7 +57,7 @@ def write_dispatched( None, ], *, - dataset_kwargs=MappingProxyType({}), + dataset_kwargs: Mapping[str, Any] = MappingProxyType({}), ) -> None: """ Write elem to store, recursively calling callback at each sub-element. From 0a51b5408beeeae5353a1c78920a3e0988f32c4e Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 4 Jan 2024 13:53:31 +0100 Subject: [PATCH 12/12] compat --- anndata/_io/utils.py | 3 +-- anndata/compat/__init__.py | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/anndata/_io/utils.py b/anndata/_io/utils.py index ed5d4673b..6f46ad7eb 100644 --- a/anndata/_io/utils.py +++ b/anndata/_io/utils.py @@ -1,7 +1,6 @@ from __future__ import annotations from functools import wraps -from itertools import pairwise from typing import TYPE_CHECKING, Callable, Literal, Union, cast from warnings import warn @@ -9,7 +8,7 @@ from packaging.version import Version from .._core.sparse_dataset import BaseCompressedSparseDataset -from ..compat import H5Group, ZarrGroup, add_note +from ..compat import H5Group, ZarrGroup, add_note, pairwise if TYPE_CHECKING: from .._types import StorageType diff --git a/anndata/compat/__init__.py b/anndata/compat/__init__.py index 24f3875b3..39323d73a 100644 --- a/anndata/compat/__init__.py +++ b/anndata/compat/__init__.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import sys from codecs import decode from collections.abc import Mapping from contextlib import AbstractContextManager @@ -35,9 +36,9 @@ class Empty: ############################# -try: +if sys.version_info >= (3, 11): from contextlib import chdir -except ImportError: # Python < 3.11 +else: @dataclass class chdir(AbstractContextManager): @@ -52,6 +53,18 @@ def __exit__(self, *_exc_info) -> None: os.chdir(self._old_cwd.pop()) +if sys.version_info >= (3, 10): + from itertools import pairwise +else: + + def pairwise(iterable): + from itertools import tee + + a, b = tee(iterable) + next(b, None) + return zip(a, b) + + ############################# # Optional deps #############################