ENH Add verbose option to SpectralClustering (#18052)

Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
scikit-learn · Aug 7, 2020 · 603d05b · 603d05b
1 parent eff1bdf
commit 603d05b
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 4 deletions.
diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
@@ -72,6 +72,11 @@ Changelog
   :user:`Emilie Delattre <EmilieDel>`, and
   :user:`Francesco Casalegno <FrancescoCasalegno>`.
 
+- |Enhancement| :class:`cluster.SpectralClustering` and
+  :func:`cluster.spectral_clustering` have a new keyword argument `verbose`.
+  When set to `True`, additional messages will be displayed which can aid with
+  debugging. :pr:`18052` by :user:`Sean O. Stalley <sstalley>`.
+
 - |API| :class:`cluster.MiniBatchKMeans` attributes, `counts_` and
   `init_size_`, are deprecated and will be removed in 0.26. :pr:`17864` by
   :user:`Jérémie du Boisberranger <jeremiedbb>`.

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
@@ -160,7 +160,8 @@ def discretize(vectors, *, copy=True, max_svd_restarts=30, n_iter_max=20,
 @_deprecate_positional_args
 def spectral_clustering(affinity, *, n_clusters=8, n_components=None,
                         eigen_solver=None, random_state=None, n_init=10,
-                        eigen_tol=0.0, assign_labels='kmeans'):
+                        eigen_tol=0.0, assign_labels='kmeans',
+                        verbose=False):
     """Apply clustering to a projection of the normalized Laplacian.
 
     In practice Spectral Clustering is very useful when the structure of
@@ -222,6 +223,11 @@ def spectral_clustering(affinity, *, n_clusters=8, n_components=None,
         the 'Multiclass spectral clustering' paper referenced below for
         more details on the discretization approach.
 
+    verbose : bool, default=False
+        Verbosity mode.
+
+        .. versionadded:: 0.24
+
     Returns
     -------
     labels : array of integers, shape: n_samples
@@ -265,10 +271,12 @@ def spectral_clustering(affinity, *, n_clusters=8, n_components=None,
                               eigen_solver=eigen_solver,
                               random_state=random_state,
                               eigen_tol=eigen_tol, drop_first=False)
+    if verbose:
+        print(f'Computing label assignment using {assign_labels}')
 
     if assign_labels == 'kmeans':
         _, labels, _ = k_means(maps, n_clusters, random_state=random_state,
-                               n_init=n_init)
+                               n_init=n_init, verbose=verbose)
     else:
         labels = discretize(maps, random_state=random_state)
 
@@ -381,6 +389,11 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
 
+    verbose : bool, default=False
+        Verbosity mode.
+
+        .. versionadded:: 0.24
+
     Attributes
     ----------
     affinity_matrix_ : array-like of shape (n_samples, n_samples)
@@ -443,7 +456,8 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
     def __init__(self, n_clusters=8, *, eigen_solver=None, n_components=None,
                  random_state=None, n_init=10, gamma=1., affinity='rbf',
                  n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans',
-                 degree=3, coef0=1, kernel_params=None, n_jobs=None):
+                 degree=3, coef0=1, kernel_params=None, n_jobs=None,
+                 verbose=False):
         self.n_clusters = n_clusters
         self.eigen_solver = eigen_solver
         self.n_components = n_components
@@ -458,6 +472,7 @@ def __init__(self, n_clusters=8, *, eigen_solver=None, n_components=None,
         self.coef0 = coef0
         self.kernel_params = kernel_params
         self.n_jobs = n_jobs
+        self.verbose = verbose
 
     def fit(self, X, y=None):
         """Perform spectral clustering from features, or affinity matrix.
@@ -523,7 +538,8 @@ def fit(self, X, y=None):
                                            random_state=random_state,
                                            n_init=self.n_init,
                                            eigen_tol=self.eigen_tol,
-                                           assign_labels=self.assign_labels)
+                                           assign_labels=self.assign_labels,
+                                           verbose=self.verbose)
         return self
 
     def fit_predict(self, X, y=None):

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
@@ -1,4 +1,5 @@
 """Testing for Spectral Clustering methods"""
+import re
 
 import numpy as np
 from scipy import sparse
@@ -248,3 +249,20 @@ def test_n_components():
     labels_diff_ncomp = SpectralClustering(n_components=2,
                                            random_state=0).fit(X).labels_
     assert not np.array_equal(labels, labels_diff_ncomp)
+
+
+@pytest.mark.parametrize('assign_labels', ('kmeans', 'discretize'))
+def test_verbose(assign_labels, capsys):
+    # Check verbose mode of KMeans for better coverage.
+    X, y = make_blobs(n_samples=20, random_state=0,
+                      centers=[[1, 1], [-1, -1]], cluster_std=0.01)
+
+    SpectralClustering(n_clusters=2, random_state=42, verbose=1).fit(X)
+
+    captured = capsys.readouterr()
+
+    assert re.search(r"Computing label assignment using", captured.out)
+
+    if assign_labels == "kmeans":
+        assert re.search(r"Initialization complete", captured.out)
+        assert re.search(r"Iteration [0-9]+, inertia", captured.out)