From 4b1f39801542f60a0017201542e2dfd954de36b5 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 21 Nov 2025 13:23:14 +1300 Subject: [PATCH 01/10] :bug: use generic return type for DisjointSet.__getitem__ --- scipy-stubs/_lib/_disjoint_set.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scipy-stubs/_lib/_disjoint_set.pyi b/scipy-stubs/_lib/_disjoint_set.pyi index c71e0179..206bfd29 100644 --- a/scipy-stubs/_lib/_disjoint_set.pyi +++ b/scipy-stubs/_lib/_disjoint_set.pyi @@ -13,7 +13,7 @@ class DisjointSet(Generic[_T]): def __iter__(self, /) -> Iterator[_T]: ... def __len__(self, /) -> int: ... def __contains__(self, x: object, /) -> bool: ... - def __getitem__(self, x: _T, /) -> int: ... + def __getitem__(self, x: _T, /) -> _T: ... def add(self, /, x: _T) -> None: ... def merge(self, /, x: _T, y: _T) -> bool: ... def connected(self, /, x: _T, y: _T) -> bool: ... From b3d04a6b655934e415f0cbad72501337e7f7fb36 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 21 Nov 2025 13:48:47 +1300 Subject: [PATCH 02/10] :bug: `cluster.hierarchy`: `__init__` should accept `Iterable[_T]` not `_T` --- scipy-stubs/_lib/_disjoint_set.pyi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scipy-stubs/_lib/_disjoint_set.pyi b/scipy-stubs/_lib/_disjoint_set.pyi index 206bfd29..b96eeacc 100644 --- a/scipy-stubs/_lib/_disjoint_set.pyi +++ b/scipy-stubs/_lib/_disjoint_set.pyi @@ -1,4 +1,4 @@ -from collections.abc import Iterator +from collections.abc import Iterable, Iterator from typing import Generic from typing_extensions import TypeVar @@ -9,7 +9,7 @@ _T = TypeVar("_T", bound=op.CanHash, default=object) class DisjointSet(Generic[_T]): n_subsets: int - def __init__(self, /, elements: _T | None = None) -> None: ... + def __init__(self, /, elements: Iterable[_T] | None = None) -> None: ... def __iter__(self, /) -> Iterator[_T]: ... def __len__(self, /) -> int: ... def __contains__(self, x: object, /) -> bool: ... From 9a9988e1e62ff32148c800dea1ca1e7d198c2727 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 21 Nov 2025 15:10:09 +1300 Subject: [PATCH 03/10] :bug: `custer.hierarchy`: `__contains__` should use type `_T` --- scipy-stubs/_lib/_disjoint_set.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scipy-stubs/_lib/_disjoint_set.pyi b/scipy-stubs/_lib/_disjoint_set.pyi index b96eeacc..36e63967 100644 --- a/scipy-stubs/_lib/_disjoint_set.pyi +++ b/scipy-stubs/_lib/_disjoint_set.pyi @@ -12,7 +12,7 @@ class DisjointSet(Generic[_T]): def __init__(self, /, elements: Iterable[_T] | None = None) -> None: ... def __iter__(self, /) -> Iterator[_T]: ... def __len__(self, /) -> int: ... - def __contains__(self, x: object, /) -> bool: ... + def __contains__(self, x: _T, /) -> bool: ... def __getitem__(self, x: _T, /) -> _T: ... def add(self, /, x: _T) -> None: ... def merge(self, /, x: _T, y: _T) -> bool: ... From 830c8692db6fda99ac2b567e0e351f8da684c4e3 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 21 Nov 2025 15:14:05 +1300 Subject: [PATCH 04/10] :recycle: `cluster.hierarchy`: do not infer `object` when initialising `DisjointSet` without arguments --- scipy-stubs/_lib/_disjoint_set.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scipy-stubs/_lib/_disjoint_set.pyi b/scipy-stubs/_lib/_disjoint_set.pyi index 36e63967..fbc43ee9 100644 --- a/scipy-stubs/_lib/_disjoint_set.pyi +++ b/scipy-stubs/_lib/_disjoint_set.pyi @@ -4,7 +4,7 @@ from typing_extensions import TypeVar import optype as op -_T = TypeVar("_T", bound=op.CanHash, default=object) +_T = TypeVar("_T", bound=op.CanHash) class DisjointSet(Generic[_T]): n_subsets: int From 203f9423880daa5334d6e44402d32126284ed9c9 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 21 Nov 2025 15:15:05 +1300 Subject: [PATCH 05/10] :white_check_mark: `cluster.hierarchy`: add test suite for `DisjointSet` --- tests/cluster/test_hierarchy.pyi | 76 ++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 tests/cluster/test_hierarchy.pyi diff --git a/tests/cluster/test_hierarchy.pyi b/tests/cluster/test_hierarchy.pyi new file mode 100644 index 00000000..ec406557 --- /dev/null +++ b/tests/cluster/test_hierarchy.pyi @@ -0,0 +1,76 @@ +from collections.abc import Iterator +from typing import Any, assert_type + +import numpy as np +import optype.numpy as onp + +from scipy.cluster.hierarchy import DisjointSet + +### +# DisjointSet + +py_str_1d: list[str] +py_int_1d: list[int] + +i32_1d: onp.Array1D[np.int32] +i64_1d: onp.Array1D[np.int64] +# DisjointSet(Iterable[T]) produces a DisjointSet[T] with universal set of type T. +assert_type(DisjointSet(py_str_1d), DisjointSet[str]) +assert_type(DisjointSet(py_int_1d), DisjointSet[int]) +assert_type(DisjointSet(i32_1d), DisjointSet[np.int32]) # pyrefly: ignore[assert-type] +assert_type(DisjointSet(i64_1d), DisjointSet[np.int64]) # pyrefly: ignore[assert-type] +# DisjointSet() produces a DisjointSet[Any] in pyrefly and pyright because T is unbound. +# mypy instead returns a bottom consistent with its treatment of containers, so we expect +# >>> x = DisjointSet() +# to fail without an annotation +# >>> x: DisjointSet[str] = DisjointSet() +assert_type(DisjointSet(), DisjointSet[Any]) # type: ignore[assert-type] + +disjoint_set_str: DisjointSet[str] +disjoint_set_i64: DisjointSet[np.int64] + +# __iter__ produces an iterator over the universal set. +assert_type(iter(disjoint_set_str), Iterator[str]) +assert_type(iter(disjoint_set_i64), Iterator[np.int64]) + +# __len__ returns the length of the universal set +assert_type(len(disjoint_set_str), int) + +# __contains__ accepts an element of the universal set and returns a boolean +assert_type("a" in disjoint_set_str, bool) +_ = 1 in disjoint_set_str # type: ignore[operator] # pyright: ignore[reportOperatorIssue] +assert_type(np.int64(2) in disjoint_set_i64, bool) + +# __getitem__ returns an element of the universal set +assert_type(disjoint_set_str["a"], str) +disjoint_set_str[1] # type: ignore[index] # pyright: ignore[reportArgumentType] +assert_type(disjoint_set_i64[np.int64(1)], np.int64) + +# add accepts an element of type T and adds it to the data structure (i.e. returns None) +assert_type(disjoint_set_str.add("a"), None) +disjoint_set_str.add(1) # type: ignore[arg-type] # pyright: ignore[reportArgumentType] +assert_type(disjoint_set_i64.add(np.int64(1)), None) + +# merge accepts two elements of type T and returns a boolean indicating if they belonged to the same subset +assert_type(disjoint_set_str.merge("a", "b"), bool) +disjoint_set_str.merge(1, 2) # type: ignore[arg-type] # pyright: ignore[reportArgumentType] +assert_type(disjoint_set_i64.merge(np.int64(1), np.int64(2)), bool) + +# connected accepts two elements of type T and returns a boolean indicating if they belonged to the same subset +assert_type(disjoint_set_str.connected("a", "b"), bool) +disjoint_set_str.connected(1, 2) # type: ignore[arg-type] # pyright: ignore[reportArgumentType] +assert_type(disjoint_set_i64.connected(np.int64(1), np.int64(2)), bool) + +# subset accepts one element of type T and returns its containing subset. +assert_type(disjoint_set_str.subset("a"), set[str]) +disjoint_set_str.subset(1) # type: ignore[arg-type] # pyright: ignore[reportArgumentType] +assert_type(disjoint_set_i64.subset(np.int64(1)), set[np.int64]) + +# subset_size accepts one element of type T and returns the *size* of its subset. +assert_type(disjoint_set_str.subset_size("a"), int) +disjoint_set_str.subset_size(1) # type: ignore[arg-type] # pyright: ignore[reportArgumentType] +assert_type(disjoint_set_i64.subset_size(np.int64(1)), int) + +# subsets returns a list of all subsets of type T +assert_type(disjoint_set_str.subsets(), list[set[str]]) +assert_type(disjoint_set_i64.subsets(), list[set[np.int64]]) From c700eaa8e666c7a1e3f179f1bfb38115b329a18e Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 21 Nov 2025 16:22:16 +1300 Subject: [PATCH 06/10] :bug: `cluster.hierarchy`: appropriately set `_T` to resolve `__hash__` errors --- scipy-stubs/_lib/_disjoint_set.pyi | 5 +++-- tests/cluster/test_hierarchy.pyi | 8 ++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/scipy-stubs/_lib/_disjoint_set.pyi b/scipy-stubs/_lib/_disjoint_set.pyi index fbc43ee9..f3e4e043 100644 --- a/scipy-stubs/_lib/_disjoint_set.pyi +++ b/scipy-stubs/_lib/_disjoint_set.pyi @@ -1,10 +1,11 @@ from collections.abc import Iterable, Iterator -from typing import Generic +from typing import Any, Generic from typing_extensions import TypeVar +import numpy as np import optype as op -_T = TypeVar("_T", bound=op.CanHash) +_T = TypeVar("_T", bound=op.CanHash | np.generic, default=Any) class DisjointSet(Generic[_T]): n_subsets: int diff --git a/tests/cluster/test_hierarchy.pyi b/tests/cluster/test_hierarchy.pyi index ec406557..f6ff0ce6 100644 --- a/tests/cluster/test_hierarchy.pyi +++ b/tests/cluster/test_hierarchy.pyi @@ -19,12 +19,8 @@ assert_type(DisjointSet(py_str_1d), DisjointSet[str]) assert_type(DisjointSet(py_int_1d), DisjointSet[int]) assert_type(DisjointSet(i32_1d), DisjointSet[np.int32]) # pyrefly: ignore[assert-type] assert_type(DisjointSet(i64_1d), DisjointSet[np.int64]) # pyrefly: ignore[assert-type] -# DisjointSet() produces a DisjointSet[Any] in pyrefly and pyright because T is unbound. -# mypy instead returns a bottom consistent with its treatment of containers, so we expect -# >>> x = DisjointSet() -# to fail without an annotation -# >>> x: DisjointSet[str] = DisjointSet() -assert_type(DisjointSet(), DisjointSet[Any]) # type: ignore[assert-type] +# DisjointSet() produces a DisjointSet[Any] because T is unbound. +assert_type(DisjointSet(), DisjointSet[Any]) disjoint_set_str: DisjointSet[str] disjoint_set_i64: DisjointSet[np.int64] From 582e22ce224f4e270a6e6d0f87bcb1bb32f67a6d Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 21 Nov 2025 16:39:04 +1300 Subject: [PATCH 07/10] :white_check_mark: `cluster.hierarchy`: use assignment statements for numpy initialisation --- tests/cluster/test_hierarchy.pyi | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/cluster/test_hierarchy.pyi b/tests/cluster/test_hierarchy.pyi index f6ff0ce6..22b5739e 100644 --- a/tests/cluster/test_hierarchy.pyi +++ b/tests/cluster/test_hierarchy.pyi @@ -17,8 +17,9 @@ i64_1d: onp.Array1D[np.int64] # DisjointSet(Iterable[T]) produces a DisjointSet[T] with universal set of type T. assert_type(DisjointSet(py_str_1d), DisjointSet[str]) assert_type(DisjointSet(py_int_1d), DisjointSet[int]) -assert_type(DisjointSet(i32_1d), DisjointSet[np.int32]) # pyrefly: ignore[assert-type] -assert_type(DisjointSet(i64_1d), DisjointSet[np.int64]) # pyrefly: ignore[assert-type] +# NOTE: Directly using assert_type fails with numpy arrays for all numpy<=2.0. Instead, use assignment statements. +_10: DisjointSet[np.int32] = DisjointSet(i32_1d) +_11: DisjointSet[np.int64] = DisjointSet(i64_1d) # DisjointSet() produces a DisjointSet[Any] because T is unbound. assert_type(DisjointSet(), DisjointSet[Any]) From a2f97c34da6a6ab00420fb1ac25cbd238b5756de Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Mon, 24 Nov 2025 08:59:47 +1300 Subject: [PATCH 08/10] Revert ":bug: `custer.hierarchy`: `__contains__` should use type `_T`" This reverts commit 9a9988e1e62ff32148c800dea1ca1e7d198c2727. --- scipy-stubs/_lib/_disjoint_set.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scipy-stubs/_lib/_disjoint_set.pyi b/scipy-stubs/_lib/_disjoint_set.pyi index f3e4e043..16f33dae 100644 --- a/scipy-stubs/_lib/_disjoint_set.pyi +++ b/scipy-stubs/_lib/_disjoint_set.pyi @@ -13,7 +13,7 @@ class DisjointSet(Generic[_T]): def __init__(self, /, elements: Iterable[_T] | None = None) -> None: ... def __iter__(self, /) -> Iterator[_T]: ... def __len__(self, /) -> int: ... - def __contains__(self, x: _T, /) -> bool: ... + def __contains__(self, x: object, /) -> bool: ... def __getitem__(self, x: _T, /) -> _T: ... def add(self, /, x: _T) -> None: ... def merge(self, /, x: _T, y: _T) -> bool: ... From 3a937d7a4e4fc982ccb606a08387af5c64310a46 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Mon, 24 Nov 2025 09:09:27 +1300 Subject: [PATCH 09/10] :memo: `cluster.hierarchy`: Document `np.generic` upper bound for `DisjointSet` type parameter --- scipy-stubs/_lib/_disjoint_set.pyi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scipy-stubs/_lib/_disjoint_set.pyi b/scipy-stubs/_lib/_disjoint_set.pyi index 16f33dae..5aeeda93 100644 --- a/scipy-stubs/_lib/_disjoint_set.pyi +++ b/scipy-stubs/_lib/_disjoint_set.pyi @@ -5,6 +5,9 @@ from typing_extensions import TypeVar import numpy as np import optype as op +# Only the existence of `__hash__` is required. However, in numpy < 2.1 the +# `__hash__` method is missing from numpy stubs on scalar values. Allowing +# `np.generic` fixes this for older numpy versions. _T = TypeVar("_T", bound=op.CanHash | np.generic, default=Any) class DisjointSet(Generic[_T]): From b8f7f510dc250e8796e158ffa70b62a07bdad86c Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Mon, 24 Nov 2025 09:11:32 +1300 Subject: [PATCH 10/10] :white_check_mark: `cluster.hierarchy`: remove `__contains__` test --- tests/cluster/test_hierarchy.pyi | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/cluster/test_hierarchy.pyi b/tests/cluster/test_hierarchy.pyi index 22b5739e..fd7cb462 100644 --- a/tests/cluster/test_hierarchy.pyi +++ b/tests/cluster/test_hierarchy.pyi @@ -35,7 +35,6 @@ assert_type(len(disjoint_set_str), int) # __contains__ accepts an element of the universal set and returns a boolean assert_type("a" in disjoint_set_str, bool) -_ = 1 in disjoint_set_str # type: ignore[operator] # pyright: ignore[reportOperatorIssue] assert_type(np.int64(2) in disjoint_set_i64, bool) # __getitem__ returns an element of the universal set