Skip to content

Commit

Permalink
DOC Usage examples added to sklearn.cluster classes (#11508)
Browse files Browse the repository at this point in the history
  • Loading branch information
adrinjalali authored and qinhanmin2014 committed Jul 25, 2018
1 parent 3a0b009 commit c2b7478
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 1 deletion.
33 changes: 33 additions & 0 deletions sklearn/cluster/bicluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,22 @@ class SpectralCoclustering(BaseSpectral):
column_labels_ : array-like, shape (n_cols,)
The bicluster label of each column.
Examples
--------
>>> from sklearn.cluster import SpectralCoclustering
>>> import numpy as np
>>> X = np.array([[1, 1], [2, 1], [1, 0],
... [4, 7], [3, 5], [3, 6]])
>>> clustering = SpectralCoclustering(n_clusters=2, random_state=0).fit(X)
>>> clustering.row_labels_
array([0, 1, 1, 0, 0, 0], dtype=int32)
>>> clustering.column_labels_
array([0, 0], dtype=int32)
>>> clustering # doctest: +NORMALIZE_WHITESPACE
SpectralCoclustering(init='k-means++', mini_batch=False, n_clusters=2,
n_init=10, n_jobs=1, n_svd_vecs=None, random_state=0,
svd_method='randomized')
References
----------
Expand Down Expand Up @@ -389,6 +405,23 @@ class SpectralBiclustering(BaseSpectral):
column_labels_ : array-like, shape (n_cols,)
Column partition labels.
Examples
--------
>>> from sklearn.cluster import SpectralBiclustering
>>> import numpy as np
>>> X = np.array([[1, 1], [2, 1], [1, 0],
... [4, 7], [3, 5], [3, 6]])
>>> clustering = SpectralBiclustering(n_clusters=2, random_state=0).fit(X)
>>> clustering.row_labels_
array([1, 1, 1, 0, 0, 0], dtype=int32)
>>> clustering.column_labels_
array([0, 1], dtype=int32)
>>> clustering # doctest: +NORMALIZE_WHITESPACE
SpectralBiclustering(init='k-means++', method='bistochastic',
mini_batch=False, n_best=3, n_clusters=2, n_components=6,
n_init=10, n_jobs=1, n_svd_vecs=None, random_state=0,
svd_method='randomized')
References
----------
Expand Down
2 changes: 1 addition & 1 deletion sklearn/cluster/birch.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ class Birch(BaseEstimator, TransformerMixin, ClusterMixin):
>>> X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]
>>> brc = Birch(branching_factor=50, n_clusters=None, threshold=0.5,
... compute_labels=True)
>>> brc.fit(X)
>>> brc.fit(X) # doctest: +NORMALIZE_WHITESPACE
Birch(branching_factor=50, compute_labels=True, copy=True, n_clusters=None,
threshold=0.5)
>>> brc.predict(X)
Expand Down
13 changes: 13 additions & 0 deletions sklearn/cluster/dbscan_.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,19 @@ class DBSCAN(BaseEstimator, ClusterMixin):
Cluster labels for each point in the dataset given to fit().
Noisy samples are given the label -1.
Examples
--------
>>> from sklearn.cluster import DBSCAN
>>> import numpy as np
>>> X = np.array([[1, 2], [2, 2], [2, 3],
... [8, 7], [8, 8], [25, 80]])
>>> clustering = DBSCAN(eps=3, min_samples=2).fit(X)
>>> clustering.labels_
array([ 0, 0, 0, 1, 1, -1])
>>> clustering # doctest: +NORMALIZE_WHITESPACE
DBSCAN(algorithm='auto', eps=3, leaf_size=30, metric='euclidean',
metric_params=None, min_samples=2, n_jobs=1, p=None)
See also
--------
OPTICS
Expand Down
16 changes: 16 additions & 0 deletions sklearn/cluster/hierarchical.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,22 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
node and has children `children_[i - n_features]`. Alternatively
at the i-th iteration, children[i][0] and children[i][1]
are merged to form node `n_features + i`
Examples
--------
>>> import numpy as np
>>> from sklearn import datasets, cluster
>>> digits = datasets.load_digits()
>>> images = digits.images
>>> X = np.reshape(images, (len(images), -1))
>>> agglo = cluster.FeatureAgglomeration(n_clusters=32)
>>> agglo.fit(X) # doctest: +ELLIPSIS
FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto',
connectivity=None, linkage='ward', memory=None, n_clusters=32,
pooling_func=...)
>>> X_reduced = agglo.transform(X)
>>> X_reduced.shape
(1797, 32)
"""

def __init__(self, n_clusters=2, affinity="euclidean",
Expand Down
30 changes: 30 additions & 0 deletions sklearn/cluster/k_means_.py
Original file line number Diff line number Diff line change
Expand Up @@ -1404,6 +1404,36 @@ class MiniBatchKMeans(KMeans):
defined as the sum of square distances of samples to their nearest
neighbor.
Examples
--------
>>> from sklearn.cluster import MiniBatchKMeans
>>> import numpy as np
>>> X = np.array([[1, 2], [1, 4], [1, 0],
... [4, 2], [4, 0], [4, 4],
... [4, 5], [0, 1], [2, 2],
... [3, 2], [5, 5], [1, -1]])
>>> # manually fit on batches
>>> kmeans = MiniBatchKMeans(n_clusters=2,
... random_state=0,
... batch_size=6)
>>> kmeans = kmeans.partial_fit(X[0:6,:])
>>> kmeans = kmeans.partial_fit(X[6:12,:])
>>> kmeans.cluster_centers_
array([[1, 1],
[3, 4]])
>>> kmeans.predict([[0, 0], [4, 4]])
array([0, 1], dtype=int32)
>>> # fit on the whole data
>>> kmeans = MiniBatchKMeans(n_clusters=2,
... random_state=0,
... batch_size=6,
... max_iter=10).fit(X)
>>> kmeans.cluster_centers_
array([[3.95918367, 2.40816327],
[1.12195122, 1.3902439 ]])
>>> kmeans.predict([[0, 0], [4, 4]])
array([1, 0], dtype=int32)
See also
--------
Expand Down
15 changes: 15 additions & 0 deletions sklearn/cluster/mean_shift_.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,21 @@ class MeanShift(BaseEstimator, ClusterMixin):
labels_ :
Labels of each point.
Examples
--------
>>> from sklearn.cluster import MeanShift
>>> import numpy as np
>>> X = np.array([[1, 1], [2, 1], [1, 0],
... [4, 7], [3, 5], [3, 6]])
>>> clustering = MeanShift(bandwidth=2).fit(X)
>>> clustering.labels_
array([0, 0, 0, 1, 1, 1])
>>> clustering.predict([[0, 0], [5, 5]])
array([0, 1])
>>> clustering # doctest: +NORMALIZE_WHITESPACE
MeanShift(bandwidth=2, bin_seeding=False, cluster_all=True, min_bin_freq=1,
n_jobs=1, seeds=None)
Notes
-----
Expand Down
17 changes: 17 additions & 0 deletions sklearn/cluster/spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,23 @@ class SpectralClustering(BaseEstimator, ClusterMixin):
labels_ :
Labels of each point
Examples
--------
>>> from sklearn.cluster import SpectralClustering
>>> import numpy as np
>>> X = np.array([[1, 1], [2, 1], [1, 0],
... [4, 7], [3, 5], [3, 6]])
>>> clustering = SpectralClustering(n_clusters=2,
... assign_labels="discretize",
... random_state=0).fit(X)
>>> clustering.labels_
array([1, 1, 1, 0, 0, 0])
>>> clustering # doctest: +NORMALIZE_WHITESPACE
SpectralClustering(affinity='rbf', assign_labels='discretize', coef0=1,
degree=3, eigen_solver=None, eigen_tol=0.0, gamma=1.0,
kernel_params=None, n_clusters=2, n_init=10, n_jobs=1,
n_neighbors=10, random_state=0)
Notes
-----
If you have an affinity matrix, such as a distance matrix,
Expand Down

0 comments on commit c2b7478

Please sign in to comment.