DOC Some see alsos between dbscan and optics (#11616)

scikit-learn · Jul 22, 2018 · 6e11386 · 6e11386
1 parent b448b3a
commit 6e11386
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 8 deletions.
diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
@@ -802,9 +802,10 @@ by black points below.
     be used (e.g. with sparse matrices). This matrix will consume n^2 floats.
     A couple of mechanisms for getting around this are:
 
-    - Use OPTICS clustering in conjunction with the `extract_dbscan` method. OPTICS
-      clustering also calculates the full pairwise matrix, but only keeps one row in
-      memory at a time (memory complexity n).
+    - Use :ref:`OPTICS <optics>` clustering in conjunction with the
+      `extract_dbscan` method. OPTICS clustering also calculates the full
+      pairwise matrix, but only keeps one row in memory at a time (memory
+      complexity n).
 
     - A sparse radius neighborhood graph (where missing entries are presumed to
       be out of eps) can be precomputed in a memory-efficient way and dbscan

diff --git a/sklearn/cluster/dbscan_.py b/sklearn/cluster/dbscan_.py
@@ -87,6 +87,14 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski', metric_params=None,
     labels : array [n_samples]
         Cluster labels for each point.  Noisy samples are given the label -1.
 
+    See also
+    --------
+    DBSCAN
+        An estimator interface for this clustering algorithm.
+    optics
+        A similar clustering at multiple values of eps. Our implementation
+        is optimized for memory usage.
+
     Notes
     -----
     For an example, see :ref:`examples/cluster/plot_dbscan.py
@@ -107,6 +115,9 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski', metric_params=None,
     Another way to reduce memory and computation time is to remove
     (near-)duplicate points and use ``sample_weight`` instead.
 
+    :func:`cluster.optics` provides a similar clustering with lower memory
+    usage.
+
     References
     ----------
     Ester, M., H. P. Kriegel, J. Sander, and X. Xu, "A Density-Based
@@ -233,6 +244,12 @@ class DBSCAN(BaseEstimator, ClusterMixin):
         Cluster labels for each point in the dataset given to fit().
         Noisy samples are given the label -1.
 
+    See also
+    --------
+    OPTICS
+        A similar clustering at multiple values of eps. Our implementation
+        is optimized for memory usage.
+
     Notes
     -----
     For an example, see :ref:`examples/cluster/plot_dbscan.py
@@ -253,6 +270,9 @@ class DBSCAN(BaseEstimator, ClusterMixin):
     Another way to reduce memory and computation time is to remove
     (near-)duplicate points and use ``sample_weight`` instead.
 
+    :class:`cluster.OPTICS` provides a similar clustering with lower memory
+    usage.
+
     References
     ----------
     Ester, M., H. P. Kriegel, J. Sander, and X. Xu, "A Density-Based

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
@@ -127,6 +127,14 @@ def optics(X, min_samples=5, max_bound=np.inf, metric='euclidean',
     labels_ : array, shape (n_samples,)
         The estimated labels.
 
+    See also
+    --------
+    OPTICS
+        An estimator interface for this clustering algorithm.
+    dbscan
+        A similar clustering for a specified neighborhood radius (eps).
+        Our implementation is optimized for runtime.
+
     References
     ----------
     Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander.
@@ -256,11 +264,8 @@ class OPTICS(BaseEstimator, ClusterMixin):
     --------
 
     DBSCAN
-        CPU optimized algorithm that clusters at specified neighborhood
-        radius (eps).
-    HDBSCAN
-        Related clustering algorithm that calculates the minimum spanning tree
-        across mutual reachability space.
+        A similar clustering for a specified neighborhood radius (eps).
+        Our implementation is optimized for runtime.
 
     References
     ----------