diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2244b1d..bda02e9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,10 +42,16 @@ jobs: test: name: Tests needs: get-environments - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: env: ${{ fromJSON(needs.get-environments.outputs.envs) }} + os: [ubuntu-latest] + include: + - env: + name: hatch-test.py3.13-full + python: "3.13" + os: macos-latest steps: - uses: actions/checkout@v4 with: diff --git a/.vscode/settings.json b/.vscode/settings.json index f6c77c2..ebc3a5d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -15,6 +15,6 @@ "source.organizeImports": "explicit", }, }, - "python.testing.pytestArgs": ["-vv", "--color=yes"], + "python.testing.pytestArgs": ["-vv", "--color=yes", "-m", "not benchmark"], "python.testing.pytestEnabled": true, } diff --git a/pyproject.toml b/pyproject.toml index 1e33cd6..ac18001 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,7 +100,7 @@ overrides.matrix.resolution.features = [ { if = [ "lowest" ], value = "min-reqs" }, # feature added by hatch-min-requirements ] overrides.matrix.resolution.dependencies = [ - # TODO: move to min dep once this is fixed: https://github.com/tlambert03/hatch-min-requirements/issues/5 + # TODO: move to min dep once this is fixed: https://github.com/tlambert03/hatch-min-requirements/issues/11 { if = [ "lowest" ], value = "dask==2023.6.1" }, ] diff --git a/src/fast_array_utils/stats/_mean.py b/src/fast_array_utils/stats/_mean.py index 93c5a82..9588a77 100644 --- a/src/fast_array_utils/stats/_mean.py +++ b/src/fast_array_utils/stats/_mean.py @@ -26,4 +26,4 @@ def mean_( ) -> NDArray[np.number[Any]] | np.number[Any] | types.DaskArray: total = sum_(x, axis=axis, dtype=dtype) n = np.prod(x.shape) if axis is None else x.shape[axis] - return total / n # type: ignore[call-overload,operator,return-value] + return total / n # type: ignore[operator,return-value] diff --git a/src/fast_array_utils/stats/_mean_var.py b/src/fast_array_utils/stats/_mean_var.py index df2d127..9037567 100644 --- a/src/fast_array_utils/stats/_mean_var.py +++ b/src/fast_array_utils/stats/_mean_var.py @@ -37,7 +37,7 @@ def mean_var_( mean_, var = _sparse_mean_var(x, axis=axis) else: mean_ = mean(x, axis=axis, dtype=np.float64) - mean_sq = mean(power(x, 2), axis=axis, dtype=np.float64) + mean_sq = mean(power(x, 2, dtype=np.float64), axis=axis) if isinstance(x, types.DaskArray) else mean(power(x, 2), axis=axis, dtype=np.float64) var = mean_sq - mean_**2 if correction: # R convention == 1 (unbiased estimator) n = np.prod(x.shape) if axis is None else x.shape[axis] diff --git a/src/fast_array_utils/stats/_power.py b/src/fast_array_utils/stats/_power.py index ac1f6c1..43dada2 100644 --- a/src/fast_array_utils/stats/_power.py +++ b/src/fast_array_utils/stats/_power.py @@ -4,38 +4,45 @@ from functools import singledispatch from typing import TYPE_CHECKING +import numpy as np + from .. import types if TYPE_CHECKING: from typing import TypeAlias, TypeVar + from numpy.typing import DTypeLike + from fast_array_utils.typing import CpuArray, GpuArray # All supported array types except for disk ones and CSDataset Array: TypeAlias = CpuArray | GpuArray | types.DaskArray _Arr = TypeVar("_Arr", bound=Array) + _Mat = TypeVar("_Mat", bound=types.CSBase | types.CupyCSMatrix) -def power(x: _Arr, n: int, /) -> _Arr: +def power(x: _Arr, n: int, /, dtype: DTypeLike | None = None) -> _Arr: """Take array or matrix to a power.""" # This wrapper is necessary because TypeVars can’t be used in `singledispatch` functions - return _power(x, n) # type: ignore[return-value] + return _power(x, n, dtype=dtype) # type: ignore[return-value] @singledispatch -def _power(x: Array, n: int, /) -> Array: +def _power(x: Array, n: int, /, dtype: DTypeLike | None = None) -> Array: if TYPE_CHECKING: - assert not isinstance(x, types.DaskArray | types.CSMatrix) - return x**n # type: ignore[operator] + assert not isinstance(x, types.DaskArray | types.CSBase | types.CupyCSMatrix) + return x**n if dtype is None else np.power(x, n, dtype=dtype) # type: ignore[operator] -@_power.register(types.CSMatrix | types.CupyCSMatrix) -def _power_cs(x: types.CSMatrix | types.CupyCSMatrix, n: int, /) -> types.CSMatrix | types.CupyCSMatrix: - return x.power(n) +@_power.register(types.CSBase | types.CupyCSMatrix) +def _power_cs(x: _Mat, n: int, /, dtype: DTypeLike | None = None) -> _Mat: + new_data = power(x.data, n, dtype=dtype) + return type(x)((new_data, x.indices, x.indptr), shape=x.shape, dtype=new_data.dtype) # type: ignore[call-overload,return-value] @_power.register(types.DaskArray) -def _power_dask(x: types.DaskArray, n: int, /) -> types.DaskArray: - return x.map_blocks(lambda c: power(c, n)) # type: ignore[type-var] +def _power_dask(x: types.DaskArray, n: int, /, dtype: DTypeLike | None = None) -> types.DaskArray: + meta = x._meta.astype(dtype or x.dtype) # noqa: SLF001 + return x.map_blocks(lambda c: power(c, n, dtype=dtype), dtype=dtype, meta=meta) # type: ignore[type-var,arg-type] diff --git a/tests/data/pbmc68k_reduced_raw_csr.npz b/tests/data/pbmc68k_reduced_raw_csr.npz new file mode 100644 index 0000000..bed5cc6 Binary files /dev/null and b/tests/data/pbmc68k_reduced_raw_csr.npz differ diff --git a/tests/test_stats.py b/tests/test_stats.py index 9a2ed90..ffde919 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -2,16 +2,21 @@ from __future__ import annotations from importlib.util import find_spec +from pathlib import Path from typing import TYPE_CHECKING, cast import numpy as np import pytest +import scipy.sparse as sps from numpy.exceptions import AxisError from fast_array_utils import stats, types from testing.fast_array_utils import SUPPORTED_TYPES, Flags +DATA_DIR = Path(__file__).parent / "data" + + if TYPE_CHECKING: from collections.abc import Callable from typing import Any, Literal, Protocol, TypeAlias @@ -126,6 +131,21 @@ def to_np_dense_checked( return stat +@pytest.fixture(scope="session") +def pbmc64k_reduced_raw() -> sps.csr_array[np.float32]: + """Scanpy’s pbmc68k_reduced raw data. + + Data was created using: + >>> if not find_spec("scanpy"): + ... pytest.skip() + >>> import scanpy as sc + >>> import scipy.sparse as sps + >>> arr = sps.csr_array(sc.datasets.pbmc68k_reduced().raw.X) + >>> sps.save_npz("pbmc68k_reduced_raw_csr.npz", arr) + """ + return cast("sps.csr_array[np.float32]", sps.load_npz(DATA_DIR / "pbmc68k_reduced_raw_csr.npz")) + + @pytest.mark.array_type(skip={*ATS_SPARSE_DS, Flags.Matrix}) @pytest.mark.parametrize("func", STAT_FUNCS) @pytest.mark.parametrize(("ndim", "axis"), [(1, 0), (2, 3), (2, -1)], ids=["1d-ax0", "2d-ax3", "2d-axneg"]) @@ -273,6 +293,23 @@ def test_mean_var_sparse_32(array_type: ArrayType[types.CSArray]) -> None: assert resid_fau < resid_skl +@pytest.mark.array_type({at for at in SUPPORTED_TYPES if at.flags & Flags.Sparse and at.flags & Flags.Dask}) +def test_mean_var_pbmc_dask(array_type: ArrayType[types.DaskArray], pbmc64k_reduced_raw: sps.csr_array[np.float32]) -> None: + """Test float32 precision for bigger data. + + This test is flaky for sparse-in-dask for some reason. + """ + mat = pbmc64k_reduced_raw + arr = array_type(mat) + + mean_mat, var_mat = stats.mean_var(mat, axis=0, correction=1) + mean_arr, var_arr = (to_np_dense_checked(a, 0, arr) for a in stats.mean_var(arr, axis=0, correction=1)) + + rtol = 1.0e-5 if array_type.flags & Flags.Gpu else 1.0e-7 + np.testing.assert_allclose(mean_arr, mean_mat, rtol=rtol) + np.testing.assert_allclose(var_arr, var_mat, rtol=rtol) + + @pytest.mark.array_type(skip={Flags.Disk, *ATS_CUPY_SPARSE}) @pytest.mark.parametrize( ("axis", "expected"), diff --git a/typings/cupy/_core/core.pyi b/typings/cupy/_core/core.pyi index e1a5231..7d1bf96 100644 --- a/typings/cupy/_core/core.pyi +++ b/typings/cupy/_core/core.pyi @@ -29,6 +29,9 @@ class ndarray: def __power__(self, other: int) -> Self: ... # methods + def astype( + self, dtype: DTypeLike | None, order: Literal["C", "F", "A", "K"] = "K", casting: None = None, subok: None = None, copy: bool = True + ) -> Self: ... @property def T(self) -> Self: ... # noqa: N802 @overload diff --git a/typings/cupyx/scipy/sparse/_compressed.pyi b/typings/cupyx/scipy/sparse/_compressed.pyi index b697183..4f1035d 100644 --- a/typings/cupyx/scipy/sparse/_compressed.pyi +++ b/typings/cupyx/scipy/sparse/_compressed.pyi @@ -8,6 +8,9 @@ from ._base import spmatrix class _compressed_sparse_matrix(spmatrix): format: Literal["csr", "csc"] + data: ndarray + indices: ndarray + indptr: ndarray @overload def __init__(self, arg1: ndarray | spmatrix) -> None: ... @@ -19,5 +22,6 @@ class _compressed_sparse_matrix(spmatrix): def __init__(self, arg1: tuple[ndarray, ndarray, ndarray], shape: tuple[int, int] | None = None) -> None: ... # methods + def astype(self, dtype: DTypeLike | None) -> Self: ... def power(self, n: int, dtype: DTypeLike | None = None) -> Self: ... def sum(self, axis: Literal[0, 1] | None = None, dtype: DTypeLike | None = None, out: Self | None = None) -> ndarray: ...