/
_traversal.pyx
696 lines (579 loc) · 24.3 KB
/
_traversal.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
"""
Routines for traversing graphs in compressed sparse format
"""
# Author: Jake Vanderplas -- <vanderplas@astro.washington.edu>
# License: BSD, (C) 2012
import numpy as np
cimport numpy as np
from scipy.sparse import csr_matrix, isspmatrix, isspmatrix_csr, isspmatrix_csc
from scipy.sparse.csgraph._validation import validate_graph
from scipy.sparse.csgraph._tools import reconstruct_path
cimport cython
from libc cimport stdlib
include 'parameters.pxi'
def connected_components(csgraph, directed=True, connection='weak',
return_labels=True):
"""
connected_components(csgraph, directed=True, connection='weak', return_labels=True)
Analyze the connected components of a sparse graph
Parameters
----------
csgraph: array_like or sparse matrix
The N x N matrix representing the compressed sparse graph. The input
csgraph will be converted to csr format for the calculation.
directed: bool, optional
if True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
if False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
connection: string, optional
['weak'|'strong']. For directed graphs, the type of connection to
use. Nodes i and j are strongly connected if a path exists both
from i to j and from j to i. Nodes i and j are weakly connected if
only one of these paths exists. If directed == False, this keyword
is not referenced.
return_labels: string, optional
if True (default), then return the labels for each of the connected
components.
Returns
-------
n_components: integer
The number of connected components.
labels: ndarray
The length-N array of labels of the connected components.
"""
if connection.lower() not in ['weak', 'strong']:
raise ValueError("connection must be 'weak' or 'strong'")
# weak connections <=> components of undirected graph
if connection.lower() == 'weak':
directed = False
csgraph = validate_graph(csgraph, directed,
dense_output=False)
labels = np.empty(csgraph.shape[0], dtype=ITYPE)
labels.fill(NULL_IDX)
if directed:
n_components = _connected_components_directed(csgraph.indices,
csgraph.indptr,
labels)
else:
csgraph_T = csgraph.T.tocsr()
n_components = _connected_components_undirected(csgraph.indices,
csgraph.indptr,
csgraph_T.indices,
csgraph_T.indptr,
labels)
if return_labels:
return n_components, labels
else:
return n_components
def breadth_first_tree(csgraph, i_start, directed=True):
r"""
breadth_first_tree(csgraph, i_start, directed=True)
Return the tree generated by a breadth-first search
Note that a breadth-first tree from a specified node is unique.
Parameters
----------
csgraph: array_like or sparse matrix
The N x N matrix representing the compressed sparse graph. The input
csgraph will be converted to csr format for the calculation.
i_start: int
The index of starting node.
directed: bool, optional
if True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
if False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
Returns
-------
cstree : csr matrix
The N x N directed compressed-sparse representation of the breadth-
first tree drawn from csgraph, starting at the specified node.
Examples
--------
The following example shows the computation of a depth-first tree
over a simple four-component graph, starting at node 0::
input graph breadth first tree from (0)
(0) (0)
/ \ / \
3 8 3 8
/ \ / \
(3)---5---(1) (3) (1)
\ / /
6 2 2
\ / /
(2) (2)
In compressed sparse representation, the solution looks like this:
>>> from scipy.sparse import csr_matrix
>>> from scipy.sparse.csgraph import breadth_first_tree
>>> X = csr_matrix([[0, 8, 0, 3],
... [0, 0, 2, 5],
... [0, 0, 0, 6],
... [0, 0, 0, 0]])
>>> Tcsr = breadth_first_tree(X, 0, directed=False)
>>> Tcsr.toarray().astype(int)
array([[0, 8, 0, 3],
[0, 0, 2, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]])
Note that the resulting graph is a Directed Acyclic Graph which spans
the graph. A breadth-first tree from a given node is unique.
"""
node_list, predecessors = breadth_first_order(csgraph, i_start,
directed, True)
return reconstruct_path(csgraph, predecessors, directed)
def depth_first_tree(csgraph, i_start, directed=True):
r"""
depth_first_tree(csgraph, i_start, directed=True)
Return a tree generated by a depth-first search.
Note that a tree generated by a depth-first search is not unique:
it depends on the order that the children of each node are searched.
Parameters
----------
csgraph: array_like or sparse matrix
The N x N matrix representing the compressed sparse graph. The input
csgraph will be converted to csr format for the calculation.
i_start: int
The index of starting node.
directed: bool, optional
if True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
if False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
Returns
-------
cstree : csr matrix
The N x N directed compressed-sparse representation of the depth-
first tree drawn from csgraph, starting at the specified node.
Examples
--------
The following example shows the computation of a depth-first tree
over a simple four-component graph, starting at node 0::
input graph depth first tree from (0)
(0) (0)
/ \ \
3 8 8
/ \ \
(3)---5---(1) (3) (1)
\ / \ /
6 2 6 2
\ / \ /
(2) (2)
In compressed sparse representation, the solution looks like this:
>>> from scipy.sparse import csr_matrix
>>> from scipy.sparse.csgraph import depth_first_tree
>>> X = csr_matrix([[0, 8, 0, 3],
... [0, 0, 2, 5],
... [0, 0, 0, 6],
... [0, 0, 0, 0]])
>>> Tcsr = depth_first_tree(X, 0, directed=False)
>>> Tcsr.toarray().astype(int)
array([[0, 8, 0, 0],
[0, 0, 2, 0],
[0, 0, 0, 6],
[0, 0, 0, 0]])
Note that the resulting graph is a Directed Acyclic Graph which spans
the graph. Unlike a breadth-first tree, a depth-first tree of a given
graph is not unique if the graph contains cycles. If the above solution
had begun with the edge connecting nodes 0 and 3, the result would have
been different.
"""
node_list, predecessors = depth_first_order(csgraph, i_start,
directed, True)
return reconstruct_path(csgraph, predecessors, directed)
def breadth_first_order(csgraph, i_start,
directed=True, return_predecessors=True):
"""
breadth_first_order(csgraph, i_start, directed=True, return_predecessors=True)
Return a breadth-first ordering starting with specified node.
Note that a breadth-first order is not unique, but the tree which it
generates is unique.
Parameters
----------
csgraph: array_like or sparse matrix
The N x N compressed sparse graph. The input csgraph will be
converted to csr format for the calculation.
i_start: int
The index of starting node.
directed: bool, optional
If True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
If False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
return_predecessors: bool, optional
If True (default), then return the predecesor array (see below).
Returns
-------
node_array: ndarray, one dimension
The breadth-first list of nodes, starting with specified node. The
length of node_array is the number of nodes reachable from the
specified node.
predecessors: ndarray, one dimension
Returned only if return_predecessors is True.
The length-N list of predecessors of each node in a breadth-first
tree. If node i is in the tree, then its parent is given by
predecessors[i]. If node i is not in the tree (and for the parent
node) then predecessors[i] = -9999.
"""
global NULL_IDX
csgraph = validate_graph(csgraph, directed, dense_output=False)
cdef int N = csgraph.shape[0]
cdef np.ndarray node_list = np.empty(N, dtype=ITYPE)
cdef np.ndarray predecessors = np.empty(N, dtype=ITYPE)
node_list.fill(NULL_IDX)
predecessors.fill(NULL_IDX)
if directed:
length = _breadth_first_directed(i_start,
csgraph.indices, csgraph.indptr,
node_list, predecessors)
else:
csgraph_T = csgraph.T.tocsr()
length = _breadth_first_undirected(i_start,
csgraph.indices, csgraph.indptr,
csgraph_T.indices, csgraph_T.indptr,
node_list, predecessors)
if return_predecessors:
return node_list[:length], predecessors
else:
return node_list[:length]
cdef unsigned int _breadth_first_directed(
unsigned int head_node,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr,
np.ndarray[ITYPE_t, ndim=1, mode='c'] node_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] predecessors):
# Inputs:
# head_node: (input) index of the node from which traversal starts
# indices: (input) CSR indices of graph
# indptr: (input) CSR indptr of graph
# node_list: (output) breadth-first list of nodes
# predecessors: (output) list of predecessors of nodes in breadth-first
# tree. Should be initialized to NULL_IDX
# Returns:
# n_nodes: the number of nodes in the breadth-first tree
global NULL_IDX
cdef unsigned int i, pnode, cnode
cdef unsigned int i_nl, i_nl_end
cdef unsigned int N = node_list.shape[0]
node_list[0] = head_node
i_nl = 0
i_nl_end = 1
while i_nl < i_nl_end:
pnode = node_list[i_nl]
for i from indptr[pnode] <= i < indptr[pnode + 1]:
cnode = indices[i]
if (cnode == head_node):
continue
elif (predecessors[cnode] == NULL_IDX):
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
i_nl_end += 1
i_nl += 1
return i_nl
cdef unsigned int _breadth_first_undirected(
unsigned int head_node,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] node_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] predecessors):
# Inputs:
# head_node: (input) index of the node from which traversal starts
# indices1: (input) CSR indices of graph
# indptr1: (input) CSR indptr of graph
# indices2: (input) CSR indices of transposed graph
# indptr2: (input) CSR indptr of transposed graph
# node_list: (output) breadth-first list of nodes
# predecessors: (output) list of predecessors of nodes in breadth-first
# tree. Should be initialized to NULL_IDX
# Returns:
# n_nodes: the number of nodes in the breadth-first tree
global NULL_IDX
cdef unsigned int i, pnode, cnode
cdef unsigned int i_nl, i_nl_end
cdef unsigned int N = node_list.shape[0]
node_list[0] = head_node
i_nl = 0
i_nl_end = 1
while i_nl < i_nl_end:
pnode = node_list[i_nl]
for i from indptr1[pnode] <= i < indptr1[pnode + 1]:
cnode = indices1[i]
if (cnode == head_node):
continue
elif (predecessors[cnode] == NULL_IDX):
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
i_nl_end += 1
for i from indptr2[pnode] <= i < indptr2[pnode + 1]:
cnode = indices2[i]
if (cnode == head_node):
continue
elif (predecessors[cnode] == NULL_IDX):
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
i_nl_end += 1
i_nl += 1
return i_nl
def depth_first_order(csgraph, i_start,
directed=True, return_predecessors=True):
"""
depth_first_order(csgraph, i_start, directed=True, return_predecessors=True)
Return a depth-first ordering starting with specified node.
Note that a depth-first order is not unique. Furthermore, for graphs
with cycles, the tree generated by a depth-first search is not
unique either.
Parameters
----------
csgraph: array_like or sparse matrix
The N x N compressed sparse graph. The input csgraph will be
converted to csr format for the calculation.
i_start: int
The index of starting node.
directed: bool, optional
If True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
If False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
return_predecessors: bool, optional
If True (default), then return the predecesor array (see below).
Returns
-------
node_array: ndarray, one dimension
The breadth-first list of nodes, starting with specified node. The
length of node_array is the number of nodes reachable from the
specified node.
predecessors: ndarray, one dimension
Returned only if return_predecessors is True.
The length-N list of predecessors of each node in a breadth-first
tree. If node i is in the tree, then its parent is given by
predecessors[i]. If node i is not in the tree (and for the parent
node) then predecessors[i] = -9999.
"""
global NULL_IDX
csgraph = validate_graph(csgraph, directed, dense_output=False)
cdef int N = csgraph.shape[0]
node_list = np.empty(N, dtype=ITYPE)
predecessors = np.empty(N, dtype=ITYPE)
root_list = np.empty(N, dtype=ITYPE)
flag = np.zeros(N, dtype=ITYPE)
node_list.fill(NULL_IDX)
predecessors.fill(NULL_IDX)
root_list.fill(NULL_IDX)
if directed:
length = _depth_first_directed(i_start,
csgraph.indices, csgraph.indptr,
node_list, predecessors,
root_list, flag)
else:
csgraph_T = csgraph.T.tocsr()
length = _depth_first_undirected(i_start,
csgraph.indices, csgraph.indptr,
csgraph_T.indices, csgraph_T.indptr,
node_list, predecessors,
root_list, flag)
if return_predecessors:
return node_list[:length], predecessors
else:
return node_list[:length]
cdef unsigned int _depth_first_directed(
unsigned int head_node,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr,
np.ndarray[ITYPE_t, ndim=1, mode='c'] node_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] predecessors,
np.ndarray[ITYPE_t, ndim=1, mode='c'] root_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] flag):
cdef unsigned int i, j, i_nl_end, cnode, pnode
cdef unsigned int N = node_list.shape[0]
cdef int no_children, i_root
node_list[0] = head_node
root_list[0] = head_node
i_root = 0
i_nl_end = 1
flag[head_node] = 1
while i_root >= 0:
pnode = root_list[i_root]
no_children = True
for i from indptr[pnode] <= i < indptr[pnode + 1]:
cnode = indices[i]
if flag[cnode]:
continue
else:
i_root += 1
root_list[i_root] = cnode
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
flag[cnode] = 1
i_nl_end += 1
no_children = False
break
if i_nl_end == N:
break
if no_children:
i_root -= 1
return i_nl_end
cdef unsigned int _depth_first_undirected(
unsigned int head_node,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] node_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] predecessors,
np.ndarray[ITYPE_t, ndim=1, mode='c'] root_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] flag):
cdef unsigned int i, j, i_nl_end, cnode, pnode
cdef unsigned int N = node_list.shape[0]
cdef int no_children, i_root
node_list[0] = head_node
root_list[0] = head_node
i_root = 0
i_nl_end = 1
flag[head_node] = 1
while i_root >= 0:
pnode = root_list[i_root]
no_children = True
for i from indptr1[pnode] <= i < indptr1[pnode + 1]:
cnode = indices1[i]
if flag[cnode]:
continue
else:
i_root += 1
root_list[i_root] = cnode
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
flag[cnode] = 1
i_nl_end += 1
no_children = False
break
if no_children:
for i from indptr2[pnode] <= i < indptr2[pnode + 1]:
cnode = indices2[i]
if flag[cnode]:
continue
else:
i_root += 1
root_list[i_root] = cnode
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
flag[cnode] = 1
i_nl_end += 1
no_children = False
break
if i_nl_end == N:
break
if no_children:
i_root -= 1
return i_nl_end
cdef int _connected_components_undirected(
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] labels):
cdef unsigned int N = labels.shape[0]
cdef unsigned int i, label=0
cdef np.ndarray node_list = np.empty(N, dtype=ITYPE)
cdef np.ndarray predecessors = np.empty(N, dtype=ITYPE)
cdef np.ndarray root_list = np.empty(N, dtype=ITYPE)
cdef np.ndarray flag = np.zeros(N, dtype=ITYPE)
root_list.fill(NULL_IDX)
predecessors.fill(NULL_IDX)
node_list.fill(NULL_IDX)
for i from 0 <= i < N:
if labels[i] < 0:
_depth_first_undirected(i, indices1, indptr1,
indices2, indptr2,
node_list, predecessors, root_list, flag)
labels[flag > 0] = label
flag.fill(0)
label += 1
return label
cdef int _connected_components_directed(
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr,
np.ndarray[ITYPE_t, ndim=1, mode='c'] labels):
# uses http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.102.1707
global NULL_IDX
cdef int N = labels.shape[0]
cdef int i_node
cdef NodeStack *stack = <NodeStack*>stdlib.malloc(sizeof(NodeStack)
+ N * sizeof(int))
stack.label = 0
stack.index = 0
stack.i = 0
stack.N = N
stack.c = N - 1
labels.fill(-1)
for i_node from 0 <= i_node < N:
if labels[i_node] == -1:
_cc_visit(i_node, stack,
labels, indices, indptr)
# labels count down from N-1 to zero. Modify them so they
# count upward from 0
labels *= -1
labels += (N - 1)
N -= stack.c + 1
stdlib.free(stack)
return N
cdef int _cc_visit(int i_node, NodeStack *stack,
np.ndarray[ITYPE_t, ndim=1, mode='c'] rindex,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr):
global NULL_IDX
cdef int root = True
cdef int i, j, i_node2
rindex[i_node] = stack.index
stack.index += 1
# look at all edges coming out of node i_node. We'll label these i_node2
for j from indptr[i_node] <= j < indptr[i_node + 1]:
i_node2 = indices[j]
# i_node2 not yet seen. Search paths from this
if rindex[i_node2] == -1:
_cc_visit(i_node2, stack, rindex,
indices, indptr)
if rindex[i_node2] < rindex[i_node]:
rindex[i_node] = rindex[i_node2]
root = False
if root:
stack.index -= 1
while (stack.i > 0):
if rindex[i_node] > rindex[stack.arr[stack.i - 1]]:
break
i_node2 = stack_pop(stack) # i_node & i_node2 strongly connected
rindex[i_node2] = stack.c
stack.index -= 1
rindex[i_node] = stack.c
stack.c -= 1
else:
stack_push(stack, i_node)
cdef struct NodeStack:
int index
int label
int i
int N
int c
int arr[0]
cdef int stack_pop(NodeStack *s):
s.i -= 1
if s.i < 0:
raise ValueError('s.i < 0')
return s.arr[s.i]
cdef int stack_push(NodeStack *s, int item):
if s.i >= s.N:
raise ValueError('s.i >= N')
s.arr[s.i] = item
s.i += 1
cdef int in_stack(NodeStack *s, int item):
cdef int i = s.i - 1
while i >= 0:
if s.arr[i] == item:
return 1
i -= 1
return 0