diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2244b1d..bda02e9 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -42,10 +42,16 @@ jobs:
   test:
     name: Tests
     needs: get-environments
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         env: ${{ fromJSON(needs.get-environments.outputs.envs) }}
+        os: [ubuntu-latest]
+        include:
+          - env:
+              name: hatch-test.py3.13-full
+              python: "3.13"
+            os: macos-latest
     steps:
       - uses: actions/checkout@v4
         with:
diff --git a/.vscode/settings.json b/.vscode/settings.json
index f6c77c2..ebc3a5d 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -15,6 +15,6 @@
             "source.organizeImports": "explicit",
         },
     },
-    "python.testing.pytestArgs": ["-vv", "--color=yes"],
+    "python.testing.pytestArgs": ["-vv", "--color=yes", "-m", "not benchmark"],
     "python.testing.pytestEnabled": true,
 }
diff --git a/pyproject.toml b/pyproject.toml
index 1e33cd6..ac18001 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -100,7 +100,7 @@ overrides.matrix.resolution.features = [
   { if = [ "lowest" ], value = "min-reqs" }, # feature added by hatch-min-requirements
 ]
 overrides.matrix.resolution.dependencies = [
-  # TODO: move to min dep once this is fixed: https://github.com/tlambert03/hatch-min-requirements/issues/5
+  # TODO: move to min dep once this is fixed: https://github.com/tlambert03/hatch-min-requirements/issues/11
   { if = [ "lowest" ], value = "dask==2023.6.1" },
 ]
 
diff --git a/src/fast_array_utils/stats/_mean.py b/src/fast_array_utils/stats/_mean.py
index 93c5a82..9588a77 100644
--- a/src/fast_array_utils/stats/_mean.py
+++ b/src/fast_array_utils/stats/_mean.py
@@ -26,4 +26,4 @@ def mean_(
 ) -> NDArray[np.number[Any]] | np.number[Any] | types.DaskArray:
     total = sum_(x, axis=axis, dtype=dtype)
     n = np.prod(x.shape) if axis is None else x.shape[axis]
-    return total / n  # type: ignore[call-overload,operator,return-value]
+    return total / n  # type: ignore[operator,return-value]
diff --git a/src/fast_array_utils/stats/_mean_var.py b/src/fast_array_utils/stats/_mean_var.py
index df2d127..9037567 100644
--- a/src/fast_array_utils/stats/_mean_var.py
+++ b/src/fast_array_utils/stats/_mean_var.py
@@ -37,7 +37,7 @@ def mean_var_(
         mean_, var = _sparse_mean_var(x, axis=axis)
     else:
         mean_ = mean(x, axis=axis, dtype=np.float64)
-        mean_sq = mean(power(x, 2), axis=axis, dtype=np.float64)
+        mean_sq = mean(power(x, 2, dtype=np.float64), axis=axis) if isinstance(x, types.DaskArray) else mean(power(x, 2), axis=axis, dtype=np.float64)
         var = mean_sq - mean_**2
     if correction:  # R convention == 1 (unbiased estimator)
         n = np.prod(x.shape) if axis is None else x.shape[axis]
diff --git a/src/fast_array_utils/stats/_power.py b/src/fast_array_utils/stats/_power.py
index ac1f6c1..43dada2 100644
--- a/src/fast_array_utils/stats/_power.py
+++ b/src/fast_array_utils/stats/_power.py
@@ -4,38 +4,45 @@
 from functools import singledispatch
 from typing import TYPE_CHECKING
 
+import numpy as np
+
 from .. import types
 
 
 if TYPE_CHECKING:
     from typing import TypeAlias, TypeVar
 
+    from numpy.typing import DTypeLike
+
     from fast_array_utils.typing import CpuArray, GpuArray
 
     # All supported array types except for disk ones and CSDataset
     Array: TypeAlias = CpuArray | GpuArray | types.DaskArray
 
     _Arr = TypeVar("_Arr", bound=Array)
+    _Mat = TypeVar("_Mat", bound=types.CSBase | types.CupyCSMatrix)
 
 
-def power(x: _Arr, n: int, /) -> _Arr:
+def power(x: _Arr, n: int, /, dtype: DTypeLike | None = None) -> _Arr:
     """Take array or matrix to a power."""
     # This wrapper is necessary because TypeVars can’t be used in `singledispatch` functions
-    return _power(x, n)  # type: ignore[return-value]
+    return _power(x, n, dtype=dtype)  # type: ignore[return-value]
 
 
 @singledispatch
-def _power(x: Array, n: int, /) -> Array:
+def _power(x: Array, n: int, /, dtype: DTypeLike | None = None) -> Array:
     if TYPE_CHECKING:
-        assert not isinstance(x, types.DaskArray | types.CSMatrix)
-    return x**n  # type: ignore[operator]
+        assert not isinstance(x, types.DaskArray | types.CSBase | types.CupyCSMatrix)
+    return x**n if dtype is None else np.power(x, n, dtype=dtype)  # type: ignore[operator]
 
 
-@_power.register(types.CSMatrix | types.CupyCSMatrix)
-def _power_cs(x: types.CSMatrix | types.CupyCSMatrix, n: int, /) -> types.CSMatrix | types.CupyCSMatrix:
-    return x.power(n)
+@_power.register(types.CSBase | types.CupyCSMatrix)
+def _power_cs(x: _Mat, n: int, /, dtype: DTypeLike | None = None) -> _Mat:
+    new_data = power(x.data, n, dtype=dtype)
+    return type(x)((new_data, x.indices, x.indptr), shape=x.shape, dtype=new_data.dtype)  # type: ignore[call-overload,return-value]
 
 
 @_power.register(types.DaskArray)
-def _power_dask(x: types.DaskArray, n: int, /) -> types.DaskArray:
-    return x.map_blocks(lambda c: power(c, n))  # type: ignore[type-var]
+def _power_dask(x: types.DaskArray, n: int, /, dtype: DTypeLike | None = None) -> types.DaskArray:
+    meta = x._meta.astype(dtype or x.dtype)  # noqa: SLF001
+    return x.map_blocks(lambda c: power(c, n, dtype=dtype), dtype=dtype, meta=meta)  # type: ignore[type-var,arg-type]
diff --git a/tests/data/pbmc68k_reduced_raw_csr.npz b/tests/data/pbmc68k_reduced_raw_csr.npz
new file mode 100644
index 0000000..bed5cc6
Binary files /dev/null and b/tests/data/pbmc68k_reduced_raw_csr.npz differ
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 9a2ed90..ffde919 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -2,16 +2,21 @@
 from __future__ import annotations
 
 from importlib.util import find_spec
+from pathlib import Path
 from typing import TYPE_CHECKING, cast
 
 import numpy as np
 import pytest
+import scipy.sparse as sps
 from numpy.exceptions import AxisError
 
 from fast_array_utils import stats, types
 from testing.fast_array_utils import SUPPORTED_TYPES, Flags
 
 
+DATA_DIR = Path(__file__).parent / "data"
+
+
 if TYPE_CHECKING:
     from collections.abc import Callable
     from typing import Any, Literal, Protocol, TypeAlias
@@ -126,6 +131,21 @@ def to_np_dense_checked(
     return stat
 
 
+@pytest.fixture(scope="session")
+def pbmc64k_reduced_raw() -> sps.csr_array[np.float32]:
+    """Scanpy’s pbmc68k_reduced raw data.
+
+    Data was created using:
+    >>> if not find_spec("scanpy"):
+    ...     pytest.skip()
+    >>> import scanpy as sc
+    >>> import scipy.sparse as sps
+    >>> arr = sps.csr_array(sc.datasets.pbmc68k_reduced().raw.X)
+    >>> sps.save_npz("pbmc68k_reduced_raw_csr.npz", arr)
+    """
+    return cast("sps.csr_array[np.float32]", sps.load_npz(DATA_DIR / "pbmc68k_reduced_raw_csr.npz"))
+
+
 @pytest.mark.array_type(skip={*ATS_SPARSE_DS, Flags.Matrix})
 @pytest.mark.parametrize("func", STAT_FUNCS)
 @pytest.mark.parametrize(("ndim", "axis"), [(1, 0), (2, 3), (2, -1)], ids=["1d-ax0", "2d-ax3", "2d-axneg"])
@@ -273,6 +293,23 @@ def test_mean_var_sparse_32(array_type: ArrayType[types.CSArray]) -> None:
         assert resid_fau < resid_skl
 
 
+@pytest.mark.array_type({at for at in SUPPORTED_TYPES if at.flags & Flags.Sparse and at.flags & Flags.Dask})
+def test_mean_var_pbmc_dask(array_type: ArrayType[types.DaskArray], pbmc64k_reduced_raw: sps.csr_array[np.float32]) -> None:
+    """Test float32 precision for bigger data.
+
+    This test is flaky for sparse-in-dask for some reason.
+    """
+    mat = pbmc64k_reduced_raw
+    arr = array_type(mat)
+
+    mean_mat, var_mat = stats.mean_var(mat, axis=0, correction=1)
+    mean_arr, var_arr = (to_np_dense_checked(a, 0, arr) for a in stats.mean_var(arr, axis=0, correction=1))
+
+    rtol = 1.0e-5 if array_type.flags & Flags.Gpu else 1.0e-7
+    np.testing.assert_allclose(mean_arr, mean_mat, rtol=rtol)
+    np.testing.assert_allclose(var_arr, var_mat, rtol=rtol)
+
+
 @pytest.mark.array_type(skip={Flags.Disk, *ATS_CUPY_SPARSE})
 @pytest.mark.parametrize(
     ("axis", "expected"),
diff --git a/typings/cupy/_core/core.pyi b/typings/cupy/_core/core.pyi
index e1a5231..7d1bf96 100644
--- a/typings/cupy/_core/core.pyi
+++ b/typings/cupy/_core/core.pyi
@@ -29,6 +29,9 @@ class ndarray:
     def __power__(self, other: int) -> Self: ...
 
     # methods
+    def astype(
+        self, dtype: DTypeLike | None, order: Literal["C", "F", "A", "K"] = "K", casting: None = None, subok: None = None, copy: bool = True
+    ) -> Self: ...
     @property
     def T(self) -> Self: ...  # noqa: N802
     @overload
diff --git a/typings/cupyx/scipy/sparse/_compressed.pyi b/typings/cupyx/scipy/sparse/_compressed.pyi
index b697183..4f1035d 100644
--- a/typings/cupyx/scipy/sparse/_compressed.pyi
+++ b/typings/cupyx/scipy/sparse/_compressed.pyi
@@ -8,6 +8,9 @@ from ._base import spmatrix
 
 class _compressed_sparse_matrix(spmatrix):
     format: Literal["csr", "csc"]
+    data: ndarray
+    indices: ndarray
+    indptr: ndarray
 
     @overload
     def __init__(self, arg1: ndarray | spmatrix) -> None: ...
@@ -19,5 +22,6 @@ class _compressed_sparse_matrix(spmatrix):
     def __init__(self, arg1: tuple[ndarray, ndarray, ndarray], shape: tuple[int, int] | None = None) -> None: ...
 
     # methods
+    def astype(self, dtype: DTypeLike | None) -> Self: ...
     def power(self, n: int, dtype: DTypeLike | None = None) -> Self: ...
     def sum(self, axis: Literal[0, 1] | None = None, dtype: DTypeLike | None = None, out: Self | None = None) -> ndarray: ...