/
_louvain.py
234 lines (216 loc) · 8.31 KB
/
_louvain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
from types import MappingProxyType
from typing import Optional, Tuple, Sequence, Type, Mapping, Any
import numpy as np
import pandas as pd
from anndata import AnnData
from natsort import natsorted
from scipy.sparse import spmatrix
from packaging import version
from ._utils_clustering import rename_groups, restrict_adjacency
from .. import _utils, logging as logg
from .._compat import Literal
from .._utils import _choose_graph
try:
from louvain.VertexPartition import MutableVertexPartition
except ImportError:
class MutableVertexPartition:
pass
MutableVertexPartition.__module__ = 'louvain.VertexPartition'
def louvain(
adata: AnnData,
resolution: Optional[float] = None,
random_state: _utils.AnyRandom = 0,
restrict_to: Optional[Tuple[str, Sequence[str]]] = None,
key_added: str = 'louvain',
adjacency: Optional[spmatrix] = None,
flavor: Literal['vtraag', 'igraph', 'rapids'] = 'vtraag',
directed: bool = True,
use_weights: bool = False,
partition_type: Optional[Type[MutableVertexPartition]] = None,
partition_kwargs: Mapping[str, Any] = MappingProxyType({}),
neighbors_key: Optional[str] = None,
obsp: Optional[str] = None,
copy: bool = False,
) -> Optional[AnnData]:
"""\
Cluster cells into subgroups [Blondel08]_ [Levine15]_ [Traag17]_.
Cluster cells using the Louvain algorithm [Blondel08]_ in the implementation
of [Traag17]_. The Louvain algorithm has been proposed for single-cell
analysis by [Levine15]_.
This requires having ran :func:`~scanpy.pp.neighbors` or
:func:`~scanpy.external.pp.bbknn` first,
or explicitly passing a ``adjacency`` matrix.
Parameters
----------
adata
The annotated data matrix.
resolution
For the default flavor (``'vtraag'``) or for ```RAPIDS```, you can provide a
resolution (higher resolution means finding more and smaller clusters),
which defaults to 1.0.
See “Time as a resolution parameter” in [Lambiotte09]_.
random_state
Change the initialization of the optimization.
restrict_to
Restrict the clustering to the categories within the key for sample
annotation, tuple needs to contain ``(obs_key, list_of_categories)``.
key_added
Key under which to add the cluster labels. (default: ``'louvain'``)
adjacency
Sparse adjacency matrix of the graph, defaults to neighbors connectivities.
flavor
Choose between to packages for computing the clustering.
``'vtraag'`` is much more powerful, and the default.
directed
Interpret the ``adjacency`` matrix as directed graph?
use_weights
Use weights from knn graph.
partition_type
Type of partition to use.
Only a valid argument if ``flavor`` is ``'vtraag'``.
partition_kwargs
Key word arguments to pass to partitioning,
if ``vtraag`` method is being used.
neighbors_key
Use neighbors connectivities as adjacency.
If not specified, louvain looks .obsp['connectivities'] for connectivities
(default storage place for pp.neighbors).
If specified, louvain looks
.obsp[.uns[neighbors_key]['connectivities_key']] for connectivities.
obsp
Use .obsp[obsp] as adjacency. You can't specify both
`obsp` and `neighbors_key` at the same time.
copy
Copy adata or modify it inplace.
Returns
-------
:obj:`None`
By default (``copy=False``), updates ``adata`` with the following fields:
``adata.obs['louvain']`` (:class:`pandas.Series`, dtype ``category``)
Array of dim (number of samples) that stores the subgroup id
(``'0'``, ``'1'``, ...) for each cell.
:class:`~anndata.AnnData`
When ``copy=True`` is set, a copy of ``adata`` with those fields is returned.
"""
partition_kwargs = dict(partition_kwargs)
start = logg.info('running Louvain clustering')
if (flavor != 'vtraag') and (partition_type is not None):
raise ValueError(
'`partition_type` is only a valid argument ' 'when `flavour` is "vtraag"'
)
adata = adata.copy() if copy else adata
if adjacency is None:
adjacency = _choose_graph(adata, obsp, neighbors_key)
if restrict_to is not None:
restrict_key, restrict_categories = restrict_to
adjacency, restrict_indices = restrict_adjacency(
adata,
restrict_key,
restrict_categories,
adjacency,
)
if flavor in {'vtraag', 'igraph'}:
if flavor == 'igraph' and resolution is not None:
logg.warning('`resolution` parameter has no effect for flavor "igraph"')
if directed and flavor == 'igraph':
directed = False
if not directed:
logg.debug(' using the undirected graph')
g = _utils.get_igraph_from_adjacency(adjacency, directed=directed)
if use_weights:
weights = np.array(g.es["weight"]).astype(np.float64)
else:
weights = None
if flavor == 'vtraag':
import louvain
if partition_type is None:
partition_type = louvain.RBConfigurationVertexPartition
if resolution is not None:
partition_kwargs["resolution_parameter"] = resolution
if use_weights:
partition_kwargs["weights"] = weights
if version.parse(louvain.__version__) < version.parse("0.7.0"):
louvain.set_rng_seed(random_state)
else:
partition_kwargs["seed"] = random_state
logg.info(' using the "louvain" package of Traag (2017)')
part = louvain.find_partition(
g,
partition_type,
**partition_kwargs,
)
# adata.uns['louvain_quality'] = part.quality()
else:
part = g.community_multilevel(weights=weights)
groups = np.array(part.membership)
elif flavor == 'rapids':
# nvLouvain only works with undirected graphs,
# and `adjacency` must have a directed edge in both directions
import cudf
import cugraph
offsets = cudf.Series(adjacency.indptr)
indices = cudf.Series(adjacency.indices)
if use_weights:
sources, targets = adjacency.nonzero()
weights = adjacency[sources, targets]
if isinstance(weights, np.matrix):
weights = weights.A1
weights = cudf.Series(weights)
else:
weights = None
g = cugraph.Graph()
if hasattr(g, 'add_adj_list'):
g.add_adj_list(offsets, indices, weights)
else:
g.from_cudf_adjlist(offsets, indices, weights)
logg.info(' using the "louvain" package of rapids')
if resolution is not None:
louvain_parts, _ = cugraph.louvain(g, resolution=resolution)
else:
louvain_parts, _ = cugraph.louvain(g)
groups = (
louvain_parts.to_pandas()
.sort_values('vertex')[['partition']]
.to_numpy()
.ravel()
)
elif flavor == 'taynaud':
# this is deprecated
import networkx as nx
import community
g = nx.Graph(adjacency)
partition = community.best_partition(g)
groups = np.zeros(len(partition), dtype=int)
for k, v in partition.items():
groups[k] = v
else:
raise ValueError('`flavor` needs to be "vtraag" or "igraph" or "taynaud".')
if restrict_to is not None:
if key_added == 'louvain':
key_added += '_R'
groups = rename_groups(
adata,
key_added,
restrict_key,
restrict_categories,
restrict_indices,
groups,
)
adata.obs[key_added] = pd.Categorical(
values=groups.astype('U'),
categories=natsorted(map(str, np.unique(groups))),
)
adata.uns['louvain'] = {}
adata.uns['louvain']['params'] = dict(
resolution=resolution,
random_state=random_state,
)
logg.info(
' finished',
time=start,
deep=(
f'found {len(np.unique(groups))} clusters and added\n'
f' {key_added!r}, the cluster labels (adata.obs, categorical)'
),
)
return adata if copy else None