Navigation Menu

Skip to content

Commit

Permalink
ENH add n_components kwarg to SpectralClustering. See #13698 (#13726)
Browse files Browse the repository at this point in the history
  • Loading branch information
fdas3213 authored and jnothman committed May 27, 2019
1 parent f3a6a1a commit db48ebc
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 8 deletions.
10 changes: 10 additions & 0 deletions doc/whats_new/v0.22.rst
Expand Up @@ -57,6 +57,16 @@ Changelog
``decision_function_shape='ovr'``, and the number of target classes > 2.
:pr:`12557` by `Adrin Jalali`_.


:mod:`sklearn.cluster`
..................

- |Enhancement| :class:`cluster.SpectralClustering` now accepts a ``n_components``
parameter. This parameter extends `SpectralClustering` class functionality to
match `spectral_clustering`.
:pr:`13726` by :user:`Shuzhe Xiao <fdas3213>`.


Miscellaneous
.............

Expand Down
17 changes: 11 additions & 6 deletions sklearn/cluster/spectral.py
Expand Up @@ -307,6 +307,9 @@ class SpectralClustering(BaseEstimator, ClusterMixin):
to be installed. It can be faster on very large, sparse problems,
but may also lead to instabilities.
n_components : integer, optional, default=n_clusters
Number of eigen vectors to use for the spectral embedding
random_state : int, RandomState instance or None (default)
A pseudo random number generator used for the initialization of the
lobpcg eigen vectors decomposition when ``eigen_solver='amg'`` and by
Expand Down Expand Up @@ -387,8 +390,8 @@ class SpectralClustering(BaseEstimator, ClusterMixin):
>>> clustering # doctest: +NORMALIZE_WHITESPACE
SpectralClustering(affinity='rbf', assign_labels='discretize', coef0=1,
degree=3, eigen_solver=None, eigen_tol=0.0, gamma=1.0,
kernel_params=None, n_clusters=2, n_init=10, n_jobs=None,
n_neighbors=10, random_state=0)
kernel_params=None, n_clusters=2, n_components=None, n_init=10,
n_jobs=None, n_neighbors=10, random_state=0)
Notes
-----
Expand Down Expand Up @@ -425,12 +428,13 @@ class SpectralClustering(BaseEstimator, ClusterMixin):
https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf
"""

def __init__(self, n_clusters=8, eigen_solver=None, random_state=None,
n_init=10, gamma=1., affinity='rbf', n_neighbors=10,
eigen_tol=0.0, assign_labels='kmeans', degree=3, coef0=1,
kernel_params=None, n_jobs=None):
def __init__(self, n_clusters=8, eigen_solver=None, n_components=None,
random_state=None, n_init=10, gamma=1., affinity='rbf',
n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans',
degree=3, coef0=1, kernel_params=None, n_jobs=None):
self.n_clusters = n_clusters
self.eigen_solver = eigen_solver
self.n_components = n_components
self.random_state = random_state
self.n_init = n_init
self.gamma = gamma
Expand Down Expand Up @@ -486,6 +490,7 @@ def fit(self, X, y=None):
random_state = check_random_state(self.random_state)
self.labels_ = spectral_clustering(self.affinity_matrix_,
n_clusters=self.n_clusters,
n_components=self.n_components,
eigen_solver=self.eigen_solver,
random_state=random_state,
n_init=self.n_init,
Expand Down
23 changes: 21 additions & 2 deletions sklearn/cluster/tests/test_spectral.py
Expand Up @@ -107,8 +107,7 @@ def test_affinities():
# a dataset that yields a stable eigen decomposition both when built
# on OSX and Linux
X, y = make_blobs(n_samples=20, random_state=0,
centers=[[1, 1], [-1, -1]], cluster_std=0.01
)
centers=[[1, 1], [-1, -1]], cluster_std=0.01)
# nearest neighbors affinity
sp = SpectralClustering(n_clusters=2, affinity='nearest_neighbors',
random_state=0)
Expand Down Expand Up @@ -204,3 +203,23 @@ def test_spectral_clustering_with_arpack_amg_solvers():
assert_raises(
ValueError, spectral_clustering,
graph, n_clusters=2, eigen_solver='amg', random_state=0)


def test_n_components():
# Test that after adding n_components, result is different and
# n_components = n_clusters by default
X, y = make_blobs(n_samples=20, random_state=0,
centers=[[1, 1], [-1, -1]], cluster_std=0.01)
sp = SpectralClustering(n_clusters=2, random_state=0)
labels = sp.fit(X).labels_
# set n_components = n_cluster and test if result is the same
labels_same_ncomp = SpectralClustering(n_clusters=2, n_components=2,
random_state=0).fit(X).labels_
# test that n_components=n_clusters by default
assert_array_equal(labels, labels_same_ncomp)

# test that n_components affect result
# n_clusters=8 by default, and set n_components=2
labels_diff_ncomp = SpectralClustering(n_components=2,
random_state=0).fit(X).labels_
assert not np.array_equal(labels, labels_diff_ncomp)

0 comments on commit db48ebc

Please sign in to comment.