Skip to content

Commit

Permalink
Merge pull request #1043 from hamelin/spectral-init-macos
Browse files Browse the repository at this point in the history
Fix unit tests involving spectral initialization
  • Loading branch information
lmcinnes committed Aug 22, 2023
2 parents 8e230d3 + 32030ec commit 0a8686a
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 27 deletions.
2 changes: 1 addition & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ jobs:
displayName: 'Run tests'
- script: |
export COVERALLS_REPO_TOKEN=$(COVERALLS_REPO_TOKEN)
export COVERALLS_REPO_TOKEN=$COVERALLS_TOKEN
coveralls
displayName: 'Publish to coveralls'
Expand Down
63 changes: 42 additions & 21 deletions umap/spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ def tswspectral_layout(
random_state,
metric="euclidean",
metric_kwds={},
method=None,
tol=0.0,
maxiter=0
):
Expand Down Expand Up @@ -358,6 +359,13 @@ def tswspectral_layout(
Used only if the multiple connected components are found in the
graph.
method: str (optional, default None, values either 'eigsh' or 'lobpcg')
Name of the eigenvalue computation method to use to compute the spectral
embedding. If left to None (or empty string), as by default, the method is
chosen from the number of vectors in play: larger vector collections are
handled with lobpcg, smaller collections with eigsh. Method names correspond
to SciPy routines in scipy.sparse.linalg.
tol: float, default chosen by implementation
Stopping tolerance for the numerical algorithm computing the embedding.
Expand All @@ -378,6 +386,7 @@ def tswspectral_layout(
metric=metric,
metric_kwds=metric_kwds,
init="tsvd",
method=method,
tol=tol,
maxiter=maxiter
)
Expand All @@ -391,6 +400,7 @@ def _spectral_layout(
metric="euclidean",
metric_kwds={},
init="random",
method=None,
tol=0.0,
maxiter=0
):
Expand Down Expand Up @@ -431,6 +441,13 @@ def _spectral_layout(
singular values. This latter option also forces usage of the LOBPCG eigensolver;
with the former, ARPACK's solver ``eigsh`` will be used for smaller Laplacians.
method: string -- either "eigsh" or "lobpcg" -- or None
Name of the eigenvalue computation method to use to compute the spectral
embedding. If left to None (or empty string), as by default, the method is
chosen from the number of vectors in play: larger vector collections are
handled with lobpcg, smaller collections with eigsh. Method names correspond
to SciPy routines in scipy.sparse.linalg.
tol: float, default chosen by implementation
Stopping tolerance for the numerical algorithm computing the embedding.
Expand Down Expand Up @@ -478,8 +495,29 @@ def _spectral_layout(
if isinstance(random_state, (np.random.Generator, np.random.RandomState))
else np.random.default_rng(seed=random_state)
)
if not method:
method = "eigsh" if L.shape[0] < 2000000 else "lobpcg"

try:
if L.shape[0] < 2000000 and init == "random":
if init == "random":
X = gen.normal(size=(L.shape[0], k))
elif init == "tsvd":
X = TruncatedSVD(
n_components=k,
random_state=random_state,
# algorithm="arpack"
).fit_transform(L)
else:
raise ValueError(
"The init parameter must be either 'random' or 'tsvd': "
f"{init} is invalid."
)
# For such a normalized Laplacian, the first eigenvector is always
# proportional to sqrt(degrees). We thus replace the first t-SVD guess
# with the exact value.
X[:, 0] = sqrt_deg / np.linalg.norm(sqrt_deg)

if method == "eigsh":
eigenvalues, eigenvectors = scipy.sparse.linalg.eigsh(
L,
k,
Expand All @@ -489,26 +527,7 @@ def _spectral_layout(
v0=np.ones(L.shape[0]),
maxiter=maxiter or graph.shape[0] * 5,
)
else:
gen = np.random.default_rng(seed=random_state)
if init == "random":
X = gen.normal(size=(L.shape[0], k))
elif init == "tsvd":
X = TruncatedSVD(
n_components=k,
random_state=random_state,
# algorithm="arpack"
).fit_transform(L)
# For such a normalized Laplacian, the first eigenvector is always
# proportional to sqrt(degrees). We thus replace the first t-SVD guess
# with the exact value.
X[:, 0] = sqrt_deg / np.linalg.norm(sqrt_deg)
else:
raise ValueError(
"The init parameter must be either 'random' or 'tsvd': "
f"{init} is invalid."
)

elif method == "lobpcg":
with warnings.catch_warnings():
warnings.filterwarnings(
category=UserWarning,
Expand All @@ -522,6 +541,8 @@ def _spectral_layout(
tol=tol or 1e-4,
maxiter=maxiter or 5 * graph.shape[0]
)
else:
raise ValueError("Method should either be None, 'eigsh' or 'lobpcg'")

order = np.argsort(eigenvalues)[1:k]
return eigenvectors[:, order]
Expand Down
10 changes: 5 additions & 5 deletions umap/tests/test_spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ def test_tsw_spectral_init(iris):
graph = rng.standard_normal(n * n).reshape((n, n)) ** 2
graph = graph.T * graph

spec = spectral_layout(None, graph, 2, random_state=seed)
tsw_spec = tswspectral_layout(None, graph, 2, random_state=seed, tol=1e-8)
spec = spectral_layout(None, graph, 2, random_state=seed ** 2)
tsw_spec = tswspectral_layout(None, graph, 2, random_state=seed ** 2, tol=1e-8)

# make sure the two methods produce matrices that are close in values
rmsd = np.sqrt(np.mean(np.sum((np.abs(spec) - np.abs(tsw_spec)) ** 2, axis=1)))
# Make sure the two methods produce similar embeddings.
rmsd = np.mean(np.sum((spec - tsw_spec) ** 2, axis=1))
assert (
rmsd < 1e-6
), "tsvd-warmed spectral init insufficiently close to standard spectral init"
Expand All @@ -48,4 +48,4 @@ def test_ensure_fallback_to_random_on_spectral_failure():
UserWarning,
match="Spectral initialisation failed!"
):
tswspectral_layout(u, graph, k, random_state=42, maxiter=2)
tswspectral_layout(u, graph, k, random_state=42, maxiter=2, method="lobpcg")

0 comments on commit 0a8686a

Please sign in to comment.