Expose random_state in leiden (#102)

scverse · Dec 13, 2023 · 32c940c · 32c940c
1 parent fe8156d
commit 32c940c
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 17 deletions.
diff --git a/docs/release-notes/0.9.4.md b/docs/release-notes/0.9.4.md
@@ -0,0 +1,5 @@
+### 0.9.4
+
+```{rubric} Features
+```
+* {func}`~rapids_singlecell.tl.leiden` now provides `random_state` parameter for initializing the optimization {pr}`102` {smaller}`J Pintar & S Dicks`
diff --git a/docs/release-notes/index.md b/docs/release-notes/index.md
@@ -3,6 +3,9 @@
 # Release notes
 
 ## Version 0.9.0
+```{include} /release-notes/0.9.4.md
+``````
+
 ```{include} /release-notes/0.9.3.md
 ``````
 

diff --git a/src/rapids_singlecell/tools/_clustering.py b/src/rapids_singlecell/tools/_clustering.py
@@ -1,4 +1,4 @@
-from typing import Optional, Sequence, Tuple
+from typing import Optional, Sequence, Tuple, Union
 
 import cudf
 import numpy as np
@@ -14,17 +14,23 @@ def leiden(
     adata: AnnData,
     resolution: float = 1.0,
     *,
+    random_state: Union[int, None] = 0,
     restrict_to: Optional[Tuple[str, Sequence[str]]] = None,
     key_added: str = "leiden",
     adjacency: Optional[sparse.spmatrix] = None,
     n_iterations: int = 100,
     use_weights: bool = True,
-    neighbors_key: Optional[int] = None,
+    neighbors_key: Optional[str] = None,
     obsp: Optional[str] = None,
     copy: bool = False,
 ) -> Optional[AnnData]:
     """
-    Performs Leiden Clustering using cuGraph
+    Performs Leiden clustering using cuGraph, which implements the method
+    described in:
+
+    Traag, V.A., Waltman, L., & van Eck, N.J. (2019). From Louvain to
+    Leiden: guaranteeing well-connected communities. Sci. Rep., 9(1), 5233.
+    DOI: 10.1038/s41598-019-41695-z
 
     Parameters
     ----------
@@ -33,38 +39,46 @@ def leiden(
 
         resolution
             A parameter value controlling the coarseness of the clustering.
-            Higher values lead to more clusters.
+            (called gamma in the modularity formula). Higher values lead to
+            more clusters.
+
+        random_state
+            Change the initialization of the optimization. Defaults to 0.
 
         restrict_to
-            Restrict the clustering to the categories within the key for sample
-            annotation, tuple needs to contain `(obs_key, list_of_categories)`.
+            Restrict the clustering to the categories within the key for
+            sample annotation, tuple needs to contain
+            `(obs_key, list_of_categories)`.
 
         key_added
             `adata.obs` key under which to add the cluster labels.
 
         adjacency
-            Sparse adjacency matrix of the graph, defaults to neighbors connectivities.
+            Sparse adjacency matrix of the graph, defaults to neighbors
+            connectivities.
 
         n_iterations
-            This controls the maximum number of levels/iterations of the Leiden algorithm.
-            When specified the algorithm will terminate after no more than the specified number of iterations.
-            No error occurs when the algorithm terminates early in this manner.
+            This controls the maximum number of levels/iterations of the
+            Leiden algorithm. When specified, the algorithm will terminate
+            after no more than the specified number of iterations. No error
+            occurs when the algorithm terminates early in this manner.
 
         use_weights
-            If `True`, edge weights from the graph are used in the computation
-            (placing more emphasis on stronger edges).
+            If `True`, edge weights from the graph are used in the
+            computation (placing more emphasis on stronger edges).
 
         neighbors_key
-            If not specified, `leiden` looks at `.obsp['connectivities']` for neighbors connectivities
-            If specified, `leiden` looks at `.obsp['neighbors_key_ connectivities']` for neighbors connectivities
+            If not specified, `leiden` looks at `.obsp['connectivities']`
+            for neighbors connectivities. If specified, `leiden` looks at
+            `.obsp[.uns[neighbors_key]['connectivities_key']]` for neighbors
+            connectivities.
 
         obsp
             Use .obsp[obsp] as adjacency. You can't specify both
             `obsp` and `neighbors_key` at the same time.
 
         copy
-            Whether to copy `adata` or modify it inplace.
-
+            Whether to copy `adata` or modify it in place.
     """
     # Adjacency graph
     from cugraph import Graph
@@ -94,7 +108,12 @@ def leiden(
     g.from_cudf_adjlist(offsets, indices, weights)
 
     # Cluster
-    leiden_parts, _ = culeiden(g, resolution=resolution, max_iter=n_iterations)
+    leiden_parts, _ = culeiden(
+        g,
+        resolution=resolution,
+        random_state=random_state,
+        max_iter=n_iterations,
+    )
 
     # Format output
     groups = (
@@ -119,6 +138,7 @@ def leiden(
     adata.uns["leiden"] = {}
     adata.uns["leiden"]["params"] = {
         "resolution": resolution,
+        "random_state": random_state,
         "n_iterations": n_iterations,
     }
     return adata if copy else None