Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,16 @@ jobs:
test:
name: Tests
needs: get-environments
runs-on: ubuntu-latest
runs-on: ${{ matrix.os }}
strategy:
matrix:
env: ${{ fromJSON(needs.get-environments.outputs.envs) }}
os: [ubuntu-latest]
include:
- env:
name: hatch-test.py3.13-full
python: "3.13"
os: macos-latest
steps:
- uses: actions/checkout@v4
with:
Expand Down
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@
"source.organizeImports": "explicit",
},
},
"python.testing.pytestArgs": ["-vv", "--color=yes"],
"python.testing.pytestArgs": ["-vv", "--color=yes", "-m", "not benchmark"],
"python.testing.pytestEnabled": true,
}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ overrides.matrix.resolution.features = [
{ if = [ "lowest" ], value = "min-reqs" }, # feature added by hatch-min-requirements
]
overrides.matrix.resolution.dependencies = [
# TODO: move to min dep once this is fixed: https://github.com/tlambert03/hatch-min-requirements/issues/5
# TODO: move to min dep once this is fixed: https://github.com/tlambert03/hatch-min-requirements/issues/11
{ if = [ "lowest" ], value = "dask==2023.6.1" },
]

Expand Down
2 changes: 1 addition & 1 deletion src/fast_array_utils/stats/_mean.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ def mean_(
) -> NDArray[np.number[Any]] | np.number[Any] | types.DaskArray:
total = sum_(x, axis=axis, dtype=dtype)
n = np.prod(x.shape) if axis is None else x.shape[axis]
return total / n # type: ignore[call-overload,operator,return-value]
return total / n # type: ignore[operator,return-value]
2 changes: 1 addition & 1 deletion src/fast_array_utils/stats/_mean_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def mean_var_(
mean_, var = _sparse_mean_var(x, axis=axis)
else:
mean_ = mean(x, axis=axis, dtype=np.float64)
mean_sq = mean(power(x, 2), axis=axis, dtype=np.float64)
mean_sq = mean(power(x, 2, dtype=np.float64), axis=axis) if isinstance(x, types.DaskArray) else mean(power(x, 2), axis=axis, dtype=np.float64)
var = mean_sq - mean_**2
if correction: # R convention == 1 (unbiased estimator)
n = np.prod(x.shape) if axis is None else x.shape[axis]
Expand Down
27 changes: 17 additions & 10 deletions src/fast_array_utils/stats/_power.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,45 @@
from functools import singledispatch
from typing import TYPE_CHECKING

import numpy as np

from .. import types


if TYPE_CHECKING:
from typing import TypeAlias, TypeVar

from numpy.typing import DTypeLike

from fast_array_utils.typing import CpuArray, GpuArray

# All supported array types except for disk ones and CSDataset
Array: TypeAlias = CpuArray | GpuArray | types.DaskArray

_Arr = TypeVar("_Arr", bound=Array)
_Mat = TypeVar("_Mat", bound=types.CSBase | types.CupyCSMatrix)


def power(x: _Arr, n: int, /) -> _Arr:
def power(x: _Arr, n: int, /, dtype: DTypeLike | None = None) -> _Arr:
"""Take array or matrix to a power."""
# This wrapper is necessary because TypeVars can’t be used in `singledispatch` functions
return _power(x, n) # type: ignore[return-value]
return _power(x, n, dtype=dtype) # type: ignore[return-value]


@singledispatch
def _power(x: Array, n: int, /) -> Array:
def _power(x: Array, n: int, /, dtype: DTypeLike | None = None) -> Array:
if TYPE_CHECKING:
assert not isinstance(x, types.DaskArray | types.CSMatrix)
return x**n # type: ignore[operator]
assert not isinstance(x, types.DaskArray | types.CSBase | types.CupyCSMatrix)
return x**n if dtype is None else np.power(x, n, dtype=dtype) # type: ignore[operator]


@_power.register(types.CSMatrix | types.CupyCSMatrix)
def _power_cs(x: types.CSMatrix | types.CupyCSMatrix, n: int, /) -> types.CSMatrix | types.CupyCSMatrix:
return x.power(n)
@_power.register(types.CSBase | types.CupyCSMatrix)
def _power_cs(x: _Mat, n: int, /, dtype: DTypeLike | None = None) -> _Mat:
new_data = power(x.data, n, dtype=dtype)
return type(x)((new_data, x.indices, x.indptr), shape=x.shape, dtype=new_data.dtype) # type: ignore[call-overload,return-value]


@_power.register(types.DaskArray)
def _power_dask(x: types.DaskArray, n: int, /) -> types.DaskArray:
return x.map_blocks(lambda c: power(c, n)) # type: ignore[type-var]
def _power_dask(x: types.DaskArray, n: int, /, dtype: DTypeLike | None = None) -> types.DaskArray:
meta = x._meta.astype(dtype or x.dtype) # noqa: SLF001
return x.map_blocks(lambda c: power(c, n, dtype=dtype), dtype=dtype, meta=meta) # type: ignore[type-var,arg-type]
Binary file added tests/data/pbmc68k_reduced_raw_csr.npz
Binary file not shown.
37 changes: 37 additions & 0 deletions tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,21 @@
from __future__ import annotations

from importlib.util import find_spec
from pathlib import Path
from typing import TYPE_CHECKING, cast

import numpy as np
import pytest
import scipy.sparse as sps
from numpy.exceptions import AxisError

from fast_array_utils import stats, types
from testing.fast_array_utils import SUPPORTED_TYPES, Flags


DATA_DIR = Path(__file__).parent / "data"


if TYPE_CHECKING:
from collections.abc import Callable
from typing import Any, Literal, Protocol, TypeAlias
Expand Down Expand Up @@ -126,6 +131,21 @@ def to_np_dense_checked(
return stat


@pytest.fixture(scope="session")
def pbmc64k_reduced_raw() -> sps.csr_array[np.float32]:
"""Scanpy’s pbmc68k_reduced raw data.

Data was created using:
>>> if not find_spec("scanpy"):
... pytest.skip()
>>> import scanpy as sc
>>> import scipy.sparse as sps
>>> arr = sps.csr_array(sc.datasets.pbmc68k_reduced().raw.X)
>>> sps.save_npz("pbmc68k_reduced_raw_csr.npz", arr)
"""
return cast("sps.csr_array[np.float32]", sps.load_npz(DATA_DIR / "pbmc68k_reduced_raw_csr.npz"))


@pytest.mark.array_type(skip={*ATS_SPARSE_DS, Flags.Matrix})
@pytest.mark.parametrize("func", STAT_FUNCS)
@pytest.mark.parametrize(("ndim", "axis"), [(1, 0), (2, 3), (2, -1)], ids=["1d-ax0", "2d-ax3", "2d-axneg"])
Expand Down Expand Up @@ -273,6 +293,23 @@ def test_mean_var_sparse_32(array_type: ArrayType[types.CSArray]) -> None:
assert resid_fau < resid_skl


@pytest.mark.array_type({at for at in SUPPORTED_TYPES if at.flags & Flags.Sparse and at.flags & Flags.Dask})
def test_mean_var_pbmc_dask(array_type: ArrayType[types.DaskArray], pbmc64k_reduced_raw: sps.csr_array[np.float32]) -> None:
"""Test float32 precision for bigger data.

This test is flaky for sparse-in-dask for some reason.
"""
mat = pbmc64k_reduced_raw
arr = array_type(mat)

mean_mat, var_mat = stats.mean_var(mat, axis=0, correction=1)
mean_arr, var_arr = (to_np_dense_checked(a, 0, arr) for a in stats.mean_var(arr, axis=0, correction=1))

rtol = 1.0e-5 if array_type.flags & Flags.Gpu else 1.0e-7
np.testing.assert_allclose(mean_arr, mean_mat, rtol=rtol)
np.testing.assert_allclose(var_arr, var_mat, rtol=rtol)


@pytest.mark.array_type(skip={Flags.Disk, *ATS_CUPY_SPARSE})
@pytest.mark.parametrize(
("axis", "expected"),
Expand Down
3 changes: 3 additions & 0 deletions typings/cupy/_core/core.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ class ndarray:
def __power__(self, other: int) -> Self: ...

# methods
def astype(
self, dtype: DTypeLike | None, order: Literal["C", "F", "A", "K"] = "K", casting: None = None, subok: None = None, copy: bool = True
) -> Self: ...
@property
def T(self) -> Self: ... # noqa: N802
@overload
Expand Down
4 changes: 4 additions & 0 deletions typings/cupyx/scipy/sparse/_compressed.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ from ._base import spmatrix

class _compressed_sparse_matrix(spmatrix):
format: Literal["csr", "csc"]
data: ndarray
indices: ndarray
indptr: ndarray

@overload
def __init__(self, arg1: ndarray | spmatrix) -> None: ...
Expand All @@ -19,5 +22,6 @@ class _compressed_sparse_matrix(spmatrix):
def __init__(self, arg1: tuple[ndarray, ndarray, ndarray], shape: tuple[int, int] | None = None) -> None: ...

# methods
def astype(self, dtype: DTypeLike | None) -> Self: ...
def power(self, n: int, dtype: DTypeLike | None = None) -> Self: ...
def sum(self, axis: Literal[0, 1] | None = None, dtype: DTypeLike | None = None, out: Self | None = None) -> ndarray: ...
Loading