Skip to content

Commit

Permalink
ENH Add verbose option to SpectralClustering (#18052)
Browse files Browse the repository at this point in the history
Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
  • Loading branch information
sstalley and thomasjpfan committed Aug 7, 2020
1 parent eff1bdf commit 603d05b
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 4 deletions.
5 changes: 5 additions & 0 deletions doc/whats_new/v0.24.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ Changelog
:user:`Emilie Delattre <EmilieDel>`, and
:user:`Francesco Casalegno <FrancescoCasalegno>`.

- |Enhancement| :class:`cluster.SpectralClustering` and
:func:`cluster.spectral_clustering` have a new keyword argument `verbose`.
When set to `True`, additional messages will be displayed which can aid with
debugging. :pr:`18052` by :user:`Sean O. Stalley <sstalley>`.

- |API| :class:`cluster.MiniBatchKMeans` attributes, `counts_` and
`init_size_`, are deprecated and will be removed in 0.26. :pr:`17864` by
:user:`Jérémie du Boisberranger <jeremiedbb>`.
Expand Down
24 changes: 20 additions & 4 deletions sklearn/cluster/_spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ def discretize(vectors, *, copy=True, max_svd_restarts=30, n_iter_max=20,
@_deprecate_positional_args
def spectral_clustering(affinity, *, n_clusters=8, n_components=None,
eigen_solver=None, random_state=None, n_init=10,
eigen_tol=0.0, assign_labels='kmeans'):
eigen_tol=0.0, assign_labels='kmeans',
verbose=False):
"""Apply clustering to a projection of the normalized Laplacian.
In practice Spectral Clustering is very useful when the structure of
Expand Down Expand Up @@ -222,6 +223,11 @@ def spectral_clustering(affinity, *, n_clusters=8, n_components=None,
the 'Multiclass spectral clustering' paper referenced below for
more details on the discretization approach.
verbose : bool, default=False
Verbosity mode.
.. versionadded:: 0.24
Returns
-------
labels : array of integers, shape: n_samples
Expand Down Expand Up @@ -265,10 +271,12 @@ def spectral_clustering(affinity, *, n_clusters=8, n_components=None,
eigen_solver=eigen_solver,
random_state=random_state,
eigen_tol=eigen_tol, drop_first=False)
if verbose:
print(f'Computing label assignment using {assign_labels}')

if assign_labels == 'kmeans':
_, labels, _ = k_means(maps, n_clusters, random_state=random_state,
n_init=n_init)
n_init=n_init, verbose=verbose)
else:
labels = discretize(maps, random_state=random_state)

Expand Down Expand Up @@ -381,6 +389,11 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
for more details.
verbose : bool, default=False
Verbosity mode.
.. versionadded:: 0.24
Attributes
----------
affinity_matrix_ : array-like of shape (n_samples, n_samples)
Expand Down Expand Up @@ -443,7 +456,8 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
def __init__(self, n_clusters=8, *, eigen_solver=None, n_components=None,
random_state=None, n_init=10, gamma=1., affinity='rbf',
n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans',
degree=3, coef0=1, kernel_params=None, n_jobs=None):
degree=3, coef0=1, kernel_params=None, n_jobs=None,
verbose=False):
self.n_clusters = n_clusters
self.eigen_solver = eigen_solver
self.n_components = n_components
Expand All @@ -458,6 +472,7 @@ def __init__(self, n_clusters=8, *, eigen_solver=None, n_components=None,
self.coef0 = coef0
self.kernel_params = kernel_params
self.n_jobs = n_jobs
self.verbose = verbose

def fit(self, X, y=None):
"""Perform spectral clustering from features, or affinity matrix.
Expand Down Expand Up @@ -523,7 +538,8 @@ def fit(self, X, y=None):
random_state=random_state,
n_init=self.n_init,
eigen_tol=self.eigen_tol,
assign_labels=self.assign_labels)
assign_labels=self.assign_labels,
verbose=self.verbose)
return self

def fit_predict(self, X, y=None):
Expand Down
18 changes: 18 additions & 0 deletions sklearn/cluster/tests/test_spectral.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Testing for Spectral Clustering methods"""
import re

import numpy as np
from scipy import sparse
Expand Down Expand Up @@ -248,3 +249,20 @@ def test_n_components():
labels_diff_ncomp = SpectralClustering(n_components=2,
random_state=0).fit(X).labels_
assert not np.array_equal(labels, labels_diff_ncomp)


@pytest.mark.parametrize('assign_labels', ('kmeans', 'discretize'))
def test_verbose(assign_labels, capsys):
# Check verbose mode of KMeans for better coverage.
X, y = make_blobs(n_samples=20, random_state=0,
centers=[[1, 1], [-1, -1]], cluster_std=0.01)

SpectralClustering(n_clusters=2, random_state=42, verbose=1).fit(X)

captured = capsys.readouterr()

assert re.search(r"Computing label assignment using", captured.out)

if assign_labels == "kmeans":
assert re.search(r"Initialization complete", captured.out)
assert re.search(r"Iteration [0-9]+, inertia", captured.out)

0 comments on commit 603d05b

Please sign in to comment.