Skip to content

Commit

Permalink
Expose random_state in leiden (#102)
Browse files Browse the repository at this point in the history
  • Loading branch information
jpintar committed Dec 13, 2023
1 parent fe8156d commit 32c940c
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 17 deletions.
5 changes: 5 additions & 0 deletions docs/release-notes/0.9.4.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
### 0.9.4

```{rubric} Features
```
* {func}`~rapids_singlecell.tl.leiden` now provides `random_state` parameter for initializing the optimization {pr}`102` {smaller}`J Pintar & S Dicks`
3 changes: 3 additions & 0 deletions docs/release-notes/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
# Release notes

## Version 0.9.0
```{include} /release-notes/0.9.4.md
``````

```{include} /release-notes/0.9.3.md
``````

Expand Down
54 changes: 37 additions & 17 deletions src/rapids_singlecell/tools/_clustering.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, Sequence, Tuple
from typing import Optional, Sequence, Tuple, Union

import cudf
import numpy as np
Expand All @@ -14,17 +14,23 @@ def leiden(
adata: AnnData,
resolution: float = 1.0,
*,
random_state: Union[int, None] = 0,
restrict_to: Optional[Tuple[str, Sequence[str]]] = None,
key_added: str = "leiden",
adjacency: Optional[sparse.spmatrix] = None,
n_iterations: int = 100,
use_weights: bool = True,
neighbors_key: Optional[int] = None,
neighbors_key: Optional[str] = None,
obsp: Optional[str] = None,
copy: bool = False,
) -> Optional[AnnData]:
"""
Performs Leiden Clustering using cuGraph
Performs Leiden clustering using cuGraph, which implements the method
described in:
Traag, V.A., Waltman, L., & van Eck, N.J. (2019). From Louvain to
Leiden: guaranteeing well-connected communities. Sci. Rep., 9(1), 5233.
DOI: 10.1038/s41598-019-41695-z
Parameters
----------
Expand All @@ -33,38 +39,46 @@ def leiden(
resolution
A parameter value controlling the coarseness of the clustering.
Higher values lead to more clusters.
(called gamma in the modularity formula). Higher values lead to
more clusters.
random_state
Change the initialization of the optimization. Defaults to 0.
restrict_to
Restrict the clustering to the categories within the key for sample
annotation, tuple needs to contain `(obs_key, list_of_categories)`.
Restrict the clustering to the categories within the key for
sample annotation, tuple needs to contain
`(obs_key, list_of_categories)`.
key_added
`adata.obs` key under which to add the cluster labels.
adjacency
Sparse adjacency matrix of the graph, defaults to neighbors connectivities.
Sparse adjacency matrix of the graph, defaults to neighbors
connectivities.
n_iterations
This controls the maximum number of levels/iterations of the Leiden algorithm.
When specified the algorithm will terminate after no more than the specified number of iterations.
No error occurs when the algorithm terminates early in this manner.
This controls the maximum number of levels/iterations of the
Leiden algorithm. When specified, the algorithm will terminate
after no more than the specified number of iterations. No error
occurs when the algorithm terminates early in this manner.
use_weights
If `True`, edge weights from the graph are used in the computation
(placing more emphasis on stronger edges).
If `True`, edge weights from the graph are used in the
computation (placing more emphasis on stronger edges).
neighbors_key
If not specified, `leiden` looks at `.obsp['connectivities']` for neighbors connectivities
If specified, `leiden` looks at `.obsp['neighbors_key_ connectivities']` for neighbors connectivities
If not specified, `leiden` looks at `.obsp['connectivities']`
for neighbors connectivities. If specified, `leiden` looks at
`.obsp[.uns[neighbors_key]['connectivities_key']]` for neighbors
connectivities.
obsp
Use .obsp[obsp] as adjacency. You can't specify both
`obsp` and `neighbors_key` at the same time.
copy
Whether to copy `adata` or modify it inplace.
Whether to copy `adata` or modify it in place.
"""
# Adjacency graph
from cugraph import Graph
Expand Down Expand Up @@ -94,7 +108,12 @@ def leiden(
g.from_cudf_adjlist(offsets, indices, weights)

# Cluster
leiden_parts, _ = culeiden(g, resolution=resolution, max_iter=n_iterations)
leiden_parts, _ = culeiden(
g,
resolution=resolution,
random_state=random_state,
max_iter=n_iterations,
)

# Format output
groups = (
Expand All @@ -119,6 +138,7 @@ def leiden(
adata.uns["leiden"] = {}
adata.uns["leiden"]["params"] = {
"resolution": resolution,
"random_state": random_state,
"n_iterations": n_iterations,
}
return adata if copy else None
Expand Down

0 comments on commit 32c940c

Please sign in to comment.