scikit-learn · ogrisel · Aug 29, 2019 · Apr 24, 2019 · Apr 24, 2019 · Apr 24, 2019
diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
@@ -428,21 +428,24 @@ given sample.
 Spectral clustering
 ===================
 
-:class:`SpectralClustering` does a low-dimension embedding of the
-affinity matrix between samples, followed by a KMeans in the low
-dimensional space. It is especially efficient if the affinity matrix is
-sparse and the `pyamg <https://github.com/pyamg/pyamg>`_ module is installed.
-SpectralClustering requires the number of clusters to be specified. It
-works well for a small number of clusters but is not advised when using
-many clusters.
-
-For two clusters, it solves a convex relaxation of the `normalised
-cuts <https://people.eecs.berkeley.edu/~malik/papers/SM-ncut.pdf>`_ problem on
-the similarity graph: cutting the graph in two so that the weight of the
-edges cut is small compared to the weights of the edges inside each
-cluster. This criteria is especially interesting when working on images:
-graph vertices are pixels, and edges of the similarity graph are a
-function of the gradient of the image.
+:class:`SpectralClustering` performs a low-dimension embedding of the
+affinity matrix between samples, followed by clustering, e.g., by KMeans,
+of the components of the eigenvectors in the low dimensional space.
+It is especially computationally efficient if the affinity matrix is sparse
+and the `amg` solver is used for the eigenvalue problem (Note, the `amg` solver
+requires that the `pyamg <https://github.com/pyamg/pyamg>`_ module is installed.)
+
+The present version of SpectralClustering requires the number of clusters
+to be specified in advance. It works well for a small number of clusters,
+but is not advised for many clusters.
+
+For two clusters, SpectralClustering solves a convex relaxation of the
+`normalised cuts <https://people.eecs.berkeley.edu/~malik/papers/SM-ncut.pdf>`_
+problem on the similarity graph: cutting the graph in two so that the weight of
+the edges cut is small compared to the weights of the edges inside each
+cluster. This criteria is especially interesting when working on images, where
+graph vertices are pixels, and weights of the edges of the similarity graph are
+computed using a function of a gradient of the image.
 
 
 .. |noisy_img| image:: ../auto_examples/cluster/images/sphx_glr_plot_segmentation_toy_001.png
@@ -489,12 +492,11 @@ Different label assignment strategies
 
 Different label assignment strategies can be used, corresponding to the
 ``assign_labels`` parameter of :class:`SpectralClustering`.
-The ``"kmeans"`` strategy can match finer details of the data, but it can be
-more unstable. In particular, unless you control the ``random_state``, it
-may not be reproducible from run-to-run, as it depends on a random
-initialization. On the other hand, the ``"discretize"`` strategy is 100%
-reproducible, but it tends to create parcels of fairly even and
-geometrical shape.
+``"kmeans"`` strategy can match finer details, but can be unstable.
+In particular, unless you control the ``random_state``, it may not be
+reproducible from run-to-run, as it depends on random initialization.
+The alternative ``"discretize"`` strategy is 100% reproducible, but tends
+to create parcels of fairly even and geometrical shape.
 
 =====================================  =====================================
  ``assign_labels="kmeans"``              ``assign_labels="discretize"``
@@ -505,7 +507,7 @@ geometrical shape.
 Spectral Clustering Graphs
 --------------------------
 
-Spectral Clustering can also be used to cluster graphs by their spectral
+Spectral Clustering can also be used to partition graphs via their spectral
 embeddings.  In this case, the affinity matrix is the adjacency matrix of the
 graph, and SpectralClustering is initialized with `affinity='precomputed'`::
 
@@ -532,6 +534,10 @@ graph, and SpectralClustering is initialized with `affinity='precomputed'`::
    <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8100>`_
    Andrew Y. Ng, Michael I. Jordan, Yair Weiss, 2001
 
+ * `"Preconditioned Spectral Clustering for Stochastic
+   Block Partition Streaming Graph Challenge"
+   <https://arxiv.org/abs/1708.07481>`_
+   David Zhuzhunashvili, Andrew Knyazev
 
 .. _hierarchical_clustering:
 

diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py
@@ -288,11 +288,12 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
         laplacian = check_array(laplacian, dtype=np.float64,
                                 accept_sparse=True)
         laplacian = _set_diag(laplacian, 1, norm_laplacian)
+        laplacian = laplacian + 1e-5 * sparse.eye(laplacian.shape[0])
         ml = smoothed_aggregation_solver(check_array(laplacian, 'csr'))
         M = ml.aspreconditioner()
         X = random_state.rand(laplacian.shape[0], n_components + 1)
         X[:, 0] = dd.ravel()
-        lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-12,
+        lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-5,
                                         largest=False)
         embedding = diffusion_map.T
         if norm_laplacian:
@@ -320,7 +321,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
             # doesn't behave well in low dimension
             X = random_state.rand(laplacian.shape[0], n_components + 1)
             X[:, 0] = dd.ravel()
-            lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-15,
+            lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-5,
                                             largest=False, maxiter=2000)
             embedding = diffusion_map.T[:n_components]
             if norm_laplacian:

diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py
@@ -181,6 +181,32 @@ def test_spectral_embedding_amg_solver(seed=36):
     assert _check_with_col_sign_flipping(embed_amg, embed_arpack, 0.05)
 
 
+def test_spectral_embedding_amg_solver_failure(seed=36):
+    # Test spectral embedding with amg solver failure
+    try:
+        from pyamg import smoothed_aggregation_solver  # noqa
+    except ImportError:
+        raise SkipTest("pyamg not available.")
+
+    # The generated graph below is NOT fully connected if n_neighbors=3
+    n_samples = 200
+    n_clusters = 3
+    n_features = 3
+    centers = np.eye(n_clusters, n_features)
+    S, true_labels = make_blobs(n_samples=n_samples, centers=centers,
+                                cluster_std=1., random_state=42)
+
+    se_amg0 = SpectralEmbedding(n_components=3, affinity="nearest_neighbors",
+                                eigen_solver="amg", n_neighbors=3,
+                                random_state=np.random.RandomState(seed))
+    se_amg1 = SpectralEmbedding(n_components=3, affinity="nearest_neighbors",
+                                eigen_solver="amg", n_neighbors=3,
+                                random_state=np.random.RandomState(seed+1))
+    embed_amg0 = se_amg0.fit_transform(S)
+    embed_amg1 = se_amg1.fit_transform(S)
+    assert _check_with_col_sign_flipping(embed_amg0, embed_amg1, 0.05)
+
+
 @pytest.mark.filterwarnings("ignore:the behavior of nmi will "
                             "change in version 0.22")
 def test_pipeline_spectral_clustering(seed=36):