diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 938e222..ea4591d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -25,7 +25,8 @@ repos:
     rev: v1.18.2
     hooks:
       - id: mypy
-        args: [--config-file=pyproject.toml]
+        args: [--config-file=pyproject.toml, .]
+        pass_filenames: false
         additional_dependencies:
           - pytest
           - pytest-codspeed!=4.0.0  # https://github.com/CodSpeedHQ/pytest-codspeed/pull/84
diff --git a/src/fast_array_utils/stats/_sum.py b/src/fast_array_utils/stats/_sum.py
index a0a14f4..d3f09c8 100644
--- a/src/fast_array_utils/stats/_sum.py
+++ b/src/fast_array_utils/stats/_sum.py
@@ -63,12 +63,18 @@ def _sum_cs(
     del keep_cupy_as_array
     import scipy.sparse as sp
 
-    if isinstance(x, types.CSMatrix):
-        x = sp.csr_array(x) if x.format == "csr" else sp.csc_array(x)
+    # TODO(flying-sheep): once scipy fixes this issue, instead of all this,
+    # just convert to sparse array, then `return x.sum(dtype=dtype)`
+    # https://github.com/scipy/scipy/issues/23768
 
     if axis is None:
-        return cast("np.number[Any]", x.data.sum(dtype=dtype))
-    return cast("NDArray[Any] | np.number[Any]", x.sum(axis=axis, dtype=dtype))
+        return cast("NDArray[Any] | np.number[Any]", x.data.sum(dtype=dtype))
+
+    if TYPE_CHECKING:  # scipy-stubs thinks e.g. "int64" is invalid, which isn’t true
+        assert isinstance(dtype, np.dtype | type | None)
+    # convert to array so dimensions collapse as expected
+    x = (sp.csr_array if x.format == "csr" else sp.csc_array)(x, dtype=dtype)
+    return cast("NDArray[Any] | np.number[Any]", x.sum(axis=axis))
 
 
 @sum_.register(types.DaskArray)
@@ -92,7 +98,7 @@ def _sum_dask(
 
     rv = da.reduction(
         x,
-        sum_dask_inner,  # type: ignore[arg-type]
+        partial(sum_dask_inner, dtype=dtype),  # pyright: ignore[reportArgumentType]
         partial(sum_dask_inner, dtype=dtype),  # pyright: ignore[reportArgumentType]
         axis=axis,
         dtype=dtype,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index a33b1d3..9a2ed90 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -90,7 +90,7 @@ def dtype_in(request: pytest.FixtureRequest, array_type: ArrayType) -> type[DTyp
     return dtype
 
 
-@pytest.fixture(scope="session", params=[np.float32, np.float64, None])
+@pytest.fixture(scope="session", params=[np.float32, np.float64, np.int64, None])
 def dtype_arg(request: pytest.FixtureRequest) -> type[DTypeOut] | None:
     return cast("type[DTypeOut] | None", request.param)
 
@@ -98,12 +98,34 @@ def dtype_arg(request: pytest.FixtureRequest) -> type[DTypeOut] | None:
 @pytest.fixture
 def np_arr(dtype_in: type[DTypeIn], ndim: Literal[1, 2]) -> NDArray[DTypeIn]:
     np_arr = cast("NDArray[DTypeIn]", np.array([[1, 0], [3, 0], [5, 6]], dtype=dtype_in))
+    if np.dtype(dtype_in).kind == "f":
+        np_arr /= 4  # type: ignore[misc]
     np_arr.flags.writeable = False
     if ndim == 1:
         np_arr = np_arr.flatten()
     return np_arr
 
 
+def to_np_dense_checked(
+    stat: NDArray[DTypeOut] | np.number[Any] | types.DaskArray, axis: Literal[0, 1] | None, arr: CpuArray | GpuArray | DiskArray | types.DaskArray
+) -> NDArray[DTypeOut] | np.number[Any]:
+    match axis, arr:
+        case _, types.DaskArray():
+            assert isinstance(stat, types.DaskArray), type(stat)
+            stat = stat.compute()  # type: ignore[assignment]
+            return to_np_dense_checked(stat, axis, arr.compute())
+        case None, _:
+            assert isinstance(stat, np.floating | np.integer), type(stat)
+        case 0 | 1, types.CupyArray() | types.CupyCSRMatrix() | types.CupyCSCMatrix() | types.CupyCOOMatrix():
+            assert isinstance(stat, types.CupyArray), type(stat)
+            return to_np_dense_checked(stat.get(), axis, arr.get())
+        case 0 | 1, _:
+            assert isinstance(stat, np.ndarray), type(stat)
+        case _:
+            pytest.fail(f"Unhandled case axis {axis} for {type(arr)}: {type(stat)}")
+    return stat
+
+
 @pytest.mark.array_type(skip={*ATS_SPARSE_DS, Flags.Matrix})
 @pytest.mark.parametrize("func", STAT_FUNCS)
 @pytest.mark.parametrize(("ndim", "axis"), [(1, 0), (2, 3), (2, -1)], ids=["1d-ax0", "2d-ax3", "2d-axneg"])
@@ -127,26 +149,13 @@ def test_sum(
     axis: Literal[0, 1] | None,
     np_arr: NDArray[DTypeIn],
 ) -> None:
+    if np.dtype(dtype_arg).kind in "iu" and (array_type.flags & Flags.Gpu) and (array_type.flags & Flags.Sparse):
+        pytest.skip("GPU sparse matrices don’t support int dtypes")
     arr = array_type(np_arr.copy())
     assert arr.dtype == dtype_in
 
     sum_ = stats.sum(arr, axis=axis, dtype=dtype_arg)
-
-    match axis, arr:
-        case _, types.DaskArray():
-            assert isinstance(sum_, types.DaskArray), type(sum_)
-            sum_ = sum_.compute()  # type: ignore[assignment]
-            if isinstance(sum_, types.CupyArray):
-                sum_ = sum_.get()
-        case None, _:
-            assert isinstance(sum_, np.floating | np.integer), type(sum_)
-        case 0 | 1, types.CupyArray() | types.CupyCSRMatrix() | types.CupyCSCMatrix():
-            assert isinstance(sum_, types.CupyArray), type(sum_)
-            sum_ = sum_.get()
-        case 0 | 1, _:
-            assert isinstance(sum_, np.ndarray), type(sum_)
-        case _:
-            pytest.fail(f"Unhandled case axis {axis} for {type(arr)}: {type(sum_)}")
+    sum_ = to_np_dense_checked(sum_, axis, arr)  # type: ignore[arg-type]
 
     assert sum_.shape == () if axis is None else arr.shape[axis], (sum_.shape, arr.shape)
 
@@ -161,6 +170,19 @@ def test_sum(
     np.testing.assert_array_equal(sum_, expected)
 
 
+@pytest.mark.array_type(skip={*ATS_SPARSE_DS, Flags.Gpu})
+def test_sum_to_int(array_type: ArrayType[CpuArray | DiskArray | types.DaskArray], axis: Literal[0, 1] | None) -> None:
+    rng = np.random.default_rng(0)
+    np_arr = rng.random((100, 100))
+    arr = array_type(np_arr)
+
+    sum_ = stats.sum(arr, axis=axis, dtype=np.int64)
+    sum_ = to_np_dense_checked(sum_, axis, arr)
+
+    expected = np.zeros(() if axis is None else arr.shape[axis], dtype=np.int64)
+    np.testing.assert_array_equal(sum_, expected)
+
+
 @pytest.mark.parametrize(
     "data",
     [
diff --git a/typings/cupy/_core/core.pyi b/typings/cupy/_core/core.pyi
index f8d459e..e1a5231 100644
--- a/typings/cupy/_core/core.pyi
+++ b/typings/cupy/_core/core.pyi
@@ -5,7 +5,7 @@ from typing import Any, Literal, Self, overload
 import numpy as np
 from cupy.cuda import Stream
 from numpy._core.multiarray import flagsobj
-from numpy.typing import NDArray
+from numpy.typing import DTypeLike, NDArray
 
 class ndarray:
     dtype: np.dtype[Any]
@@ -41,6 +41,7 @@ class ndarray:
     def flatten(self, order: Literal["C", "F", "A", "K"] = "C") -> Self: ...
     @property
     def flat(self) -> _FlatIter: ...
+    def sum(self, axis: int | None = None, dtype: DTypeLike | None = None, out: ndarray | None = None, keepdims: bool = False) -> ndarray: ...
 
 class _FlatIter:
     def __next__(self) -> np.float32 | np.float64: ...
diff --git a/typings/cupyx/scipy/sparse/_compressed.pyi b/typings/cupyx/scipy/sparse/_compressed.pyi
index a53c190..b697183 100644
--- a/typings/cupyx/scipy/sparse/_compressed.pyi
+++ b/typings/cupyx/scipy/sparse/_compressed.pyi
@@ -20,3 +20,4 @@ class _compressed_sparse_matrix(spmatrix):
 
     # methods
     def power(self, n: int, dtype: DTypeLike | None = None) -> Self: ...
+    def sum(self, axis: Literal[0, 1] | None = None, dtype: DTypeLike | None = None, out: Self | None = None) -> ndarray: ...