Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

739 lines (621 sloc) 27.414 kB
"""
Routines for traversing graphs in compressed sparse format
"""
# Author: Jake Vanderplas -- <vanderplas@astro.washington.edu>
# License: BSD, (C) 2012
import numpy as np
cimport numpy as np
from scipy.sparse import csr_matrix, isspmatrix, isspmatrix_csr, isspmatrix_csc
from scipy.sparse.csgraph._validation import validate_graph
from scipy.sparse.csgraph._tools import reconstruct_path
cimport cython
from libc cimport stdlib
include 'parameters.pxi'
def connected_components(csgraph, directed=True, connection='weak',
return_labels=True):
"""
connected_components(csgraph, directed=True, connection='weak',
return_labels=True)
Analyze the connected components of a sparse graph
.. versionadded:: 0.11.0
Parameters
----------
csgraph : array_like or sparse matrix
The N x N matrix representing the compressed sparse graph. The input
csgraph will be converted to csr format for the calculation.
directed : bool, optional
If True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
If False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
connection : str, optional
['weak'|'strong']. For directed graphs, the type of connection to
use. Nodes i and j are strongly connected if a path exists both
from i to j and from j to i. Nodes i and j are weakly connected if
only one of these paths exists. If directed == False, this keyword
is not referenced.
return_labels : str, optional
If True (default), then return the labels for each of the connected
components.
Returns
-------
n_components: int
The number of connected components.
labels: ndarray
The length-N array of labels of the connected components.
"""
if connection.lower() not in ['weak', 'strong']:
raise ValueError("connection must be 'weak' or 'strong'")
# weak connections <=> components of undirected graph
if connection.lower() == 'weak':
directed = False
csgraph = validate_graph(csgraph, directed,
dense_output=False)
labels = np.empty(csgraph.shape[0], dtype=ITYPE)
labels.fill(NULL_IDX)
if directed:
n_components = _connected_components_directed(csgraph.indices,
csgraph.indptr,
labels)
else:
csgraph_T = csgraph.T.tocsr()
n_components = _connected_components_undirected(csgraph.indices,
csgraph.indptr,
csgraph_T.indices,
csgraph_T.indptr,
labels)
if return_labels:
return n_components, labels
else:
return n_components
def breadth_first_tree(csgraph, i_start, directed=True):
r"""
breadth_first_tree(csgraph, i_start, directed=True)
Return the tree generated by a breadth-first search
Note that a breadth-first tree from a specified node is unique.
.. versionadded:: 0.11.0
Parameters
----------
csgraph : array_like or sparse matrix
The N x N matrix representing the compressed sparse graph. The input
csgraph will be converted to csr format for the calculation.
i_start : int
The index of starting node.
directed : bool, optional
If True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
If False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
Returns
-------
cstree : csr matrix
The N x N directed compressed-sparse representation of the breadth-
first tree drawn from csgraph, starting at the specified node.
Examples
--------
The following example shows the computation of a depth-first tree
over a simple four-component graph, starting at node 0::
input graph breadth first tree from (0)
(0) (0)
/ \ / \
3 8 3 8
/ \ / \
(3)---5---(1) (3) (1)
\ / /
6 2 2
\ / /
(2) (2)
In compressed sparse representation, the solution looks like this:
>>> from scipy.sparse import csr_matrix
>>> from scipy.sparse.csgraph import breadth_first_tree
>>> X = csr_matrix([[0, 8, 0, 3],
... [0, 0, 2, 5],
... [0, 0, 0, 6],
... [0, 0, 0, 0]])
>>> Tcsr = breadth_first_tree(X, 0, directed=False)
>>> Tcsr.toarray().astype(int)
array([[0, 8, 0, 3],
[0, 0, 2, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]])
Note that the resulting graph is a Directed Acyclic Graph which spans
the graph. A breadth-first tree from a given node is unique.
"""
node_list, predecessors = breadth_first_order(csgraph, i_start,
directed, True)
return reconstruct_path(csgraph, predecessors, directed)
def depth_first_tree(csgraph, i_start, directed=True):
r"""
depth_first_tree(csgraph, i_start, directed=True)
Return a tree generated by a depth-first search.
Note that a tree generated by a depth-first search is not unique:
it depends on the order that the children of each node are searched.
.. versionadded:: 0.11.0
Parameters
----------
csgraph : array_like or sparse matrix
The N x N matrix representing the compressed sparse graph. The input
csgraph will be converted to csr format for the calculation.
i_start : int
The index of starting node.
directed : bool, optional
If True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
If False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
Returns
-------
cstree : csr matrix
The N x N directed compressed-sparse representation of the depth-
first tree drawn from csgraph, starting at the specified node.
Examples
--------
The following example shows the computation of a depth-first tree
over a simple four-component graph, starting at node 0::
input graph depth first tree from (0)
(0) (0)
/ \ \
3 8 8
/ \ \
(3)---5---(1) (3) (1)
\ / \ /
6 2 6 2
\ / \ /
(2) (2)
In compressed sparse representation, the solution looks like this:
>>> from scipy.sparse import csr_matrix
>>> from scipy.sparse.csgraph import depth_first_tree
>>> X = csr_matrix([[0, 8, 0, 3],
... [0, 0, 2, 5],
... [0, 0, 0, 6],
... [0, 0, 0, 0]])
>>> Tcsr = depth_first_tree(X, 0, directed=False)
>>> Tcsr.toarray().astype(int)
array([[0, 8, 0, 0],
[0, 0, 2, 0],
[0, 0, 0, 6],
[0, 0, 0, 0]])
Note that the resulting graph is a Directed Acyclic Graph which spans
the graph. Unlike a breadth-first tree, a depth-first tree of a given
graph is not unique if the graph contains cycles. If the above solution
had begun with the edge connecting nodes 0 and 3, the result would have
been different.
"""
node_list, predecessors = depth_first_order(csgraph, i_start,
directed, True)
return reconstruct_path(csgraph, predecessors, directed)
def breadth_first_order(csgraph, i_start,
directed=True, return_predecessors=True):
"""
breadth_first_order(csgraph, i_start, directed=True, return_predecessors=True)
Return a breadth-first ordering starting with specified node.
Note that a breadth-first order is not unique, but the tree which it
generates is unique.
.. versionadded:: 0.11.0
Parameters
----------
csgraph : array_like or sparse matrix
The N x N compressed sparse graph. The input csgraph will be
converted to csr format for the calculation.
i_start : int
The index of starting node.
directed : bool, optional
If True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
If False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
return_predecessors : bool, optional
If True (default), then return the predecesor array (see below).
Returns
-------
node_array : ndarray, one dimension
The breadth-first list of nodes, starting with specified node. The
length of node_array is the number of nodes reachable from the
specified node.
predecessors : ndarray, one dimension
Returned only if return_predecessors is True.
The length-N list of predecessors of each node in a breadth-first
tree. If node i is in the tree, then its parent is given by
predecessors[i]. If node i is not in the tree (and for the parent
node) then predecessors[i] = -9999.
"""
global NULL_IDX
csgraph = validate_graph(csgraph, directed, dense_output=False)
cdef int N = csgraph.shape[0]
cdef np.ndarray node_list = np.empty(N, dtype=ITYPE)
cdef np.ndarray predecessors = np.empty(N, dtype=ITYPE)
node_list.fill(NULL_IDX)
predecessors.fill(NULL_IDX)
if directed:
length = _breadth_first_directed(i_start,
csgraph.indices, csgraph.indptr,
node_list, predecessors)
else:
csgraph_T = csgraph.T.tocsr()
length = _breadth_first_undirected(i_start,
csgraph.indices, csgraph.indptr,
csgraph_T.indices, csgraph_T.indptr,
node_list, predecessors)
if return_predecessors:
return node_list[:length], predecessors
else:
return node_list[:length]
cdef unsigned int _breadth_first_directed(
unsigned int head_node,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr,
np.ndarray[ITYPE_t, ndim=1, mode='c'] node_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] predecessors):
# Inputs:
# head_node: (input) index of the node from which traversal starts
# indices: (input) CSR indices of graph
# indptr: (input) CSR indptr of graph
# node_list: (output) breadth-first list of nodes
# predecessors: (output) list of predecessors of nodes in breadth-first
# tree. Should be initialized to NULL_IDX
# Returns:
# n_nodes: the number of nodes in the breadth-first tree
global NULL_IDX
cdef unsigned int i, pnode, cnode
cdef unsigned int i_nl, i_nl_end
cdef unsigned int N = node_list.shape[0]
node_list[0] = head_node
i_nl = 0
i_nl_end = 1
while i_nl < i_nl_end:
pnode = node_list[i_nl]
for i from indptr[pnode] <= i < indptr[pnode + 1]:
cnode = indices[i]
if (cnode == head_node):
continue
elif (predecessors[cnode] == NULL_IDX):
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
i_nl_end += 1
i_nl += 1
return i_nl
cdef unsigned int _breadth_first_undirected(
unsigned int head_node,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] node_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] predecessors):
# Inputs:
# head_node: (input) index of the node from which traversal starts
# indices1: (input) CSR indices of graph
# indptr1: (input) CSR indptr of graph
# indices2: (input) CSR indices of transposed graph
# indptr2: (input) CSR indptr of transposed graph
# node_list: (output) breadth-first list of nodes
# predecessors: (output) list of predecessors of nodes in breadth-first
# tree. Should be initialized to NULL_IDX
# Returns:
# n_nodes: the number of nodes in the breadth-first tree
global NULL_IDX
cdef unsigned int i, pnode, cnode
cdef unsigned int i_nl, i_nl_end
cdef unsigned int N = node_list.shape[0]
node_list[0] = head_node
i_nl = 0
i_nl_end = 1
while i_nl < i_nl_end:
pnode = node_list[i_nl]
for i from indptr1[pnode] <= i < indptr1[pnode + 1]:
cnode = indices1[i]
if (cnode == head_node):
continue
elif (predecessors[cnode] == NULL_IDX):
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
i_nl_end += 1
for i from indptr2[pnode] <= i < indptr2[pnode + 1]:
cnode = indices2[i]
if (cnode == head_node):
continue
elif (predecessors[cnode] == NULL_IDX):
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
i_nl_end += 1
i_nl += 1
return i_nl
def depth_first_order(csgraph, i_start,
directed=True, return_predecessors=True):
"""
depth_first_order(csgraph, i_start, directed=True, return_predecessors=True)
Return a depth-first ordering starting with specified node.
Note that a depth-first order is not unique. Furthermore, for graphs
with cycles, the tree generated by a depth-first search is not
unique either.
.. versionadded:: 0.11.0
Parameters
----------
csgraph : array_like or sparse matrix
The N x N compressed sparse graph. The input csgraph will be
converted to csr format for the calculation.
i_start : int
The index of starting node.
directed : bool, optional
If True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
If False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
return_predecessors : bool, optional
If True (default), then return the predecesor array (see below).
Returns
-------
node_array : ndarray, one dimension
The breadth-first list of nodes, starting with specified node. The
length of node_array is the number of nodes reachable from the
specified node.
predecessors : ndarray, one dimension
Returned only if return_predecessors is True.
The length-N list of predecessors of each node in a breadth-first
tree. If node i is in the tree, then its parent is given by
predecessors[i]. If node i is not in the tree (and for the parent
node) then predecessors[i] = -9999.
"""
global NULL_IDX
csgraph = validate_graph(csgraph, directed, dense_output=False)
cdef int N = csgraph.shape[0]
node_list = np.empty(N, dtype=ITYPE)
predecessors = np.empty(N, dtype=ITYPE)
root_list = np.empty(N, dtype=ITYPE)
flag = np.zeros(N, dtype=ITYPE)
node_list.fill(NULL_IDX)
predecessors.fill(NULL_IDX)
root_list.fill(NULL_IDX)
if directed:
length = _depth_first_directed(i_start,
csgraph.indices, csgraph.indptr,
node_list, predecessors,
root_list, flag)
else:
csgraph_T = csgraph.T.tocsr()
length = _depth_first_undirected(i_start,
csgraph.indices, csgraph.indptr,
csgraph_T.indices, csgraph_T.indptr,
node_list, predecessors,
root_list, flag)
if return_predecessors:
return node_list[:length], predecessors
else:
return node_list[:length]
cdef unsigned int _depth_first_directed(
unsigned int head_node,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr,
np.ndarray[ITYPE_t, ndim=1, mode='c'] node_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] predecessors,
np.ndarray[ITYPE_t, ndim=1, mode='c'] root_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] flag):
cdef unsigned int i, j, i_nl_end, cnode, pnode
cdef unsigned int N = node_list.shape[0]
cdef int no_children, i_root
node_list[0] = head_node
root_list[0] = head_node
i_root = 0
i_nl_end = 1
flag[head_node] = 1
while i_root >= 0:
pnode = root_list[i_root]
no_children = True
for i from indptr[pnode] <= i < indptr[pnode + 1]:
cnode = indices[i]
if flag[cnode]:
continue
else:
i_root += 1
root_list[i_root] = cnode
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
flag[cnode] = 1
i_nl_end += 1
no_children = False
break
if i_nl_end == N:
break
if no_children:
i_root -= 1
return i_nl_end
cdef unsigned int _depth_first_undirected(
unsigned int head_node,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] node_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] predecessors,
np.ndarray[ITYPE_t, ndim=1, mode='c'] root_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] flag):
cdef unsigned int i, j, i_nl_end, cnode, pnode
cdef unsigned int N = node_list.shape[0]
cdef int no_children, i_root
node_list[0] = head_node
root_list[0] = head_node
i_root = 0
i_nl_end = 1
flag[head_node] = 1
while i_root >= 0:
pnode = root_list[i_root]
no_children = True
for i from indptr1[pnode] <= i < indptr1[pnode + 1]:
cnode = indices1[i]
if flag[cnode]:
continue
else:
i_root += 1
root_list[i_root] = cnode
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
flag[cnode] = 1
i_nl_end += 1
no_children = False
break
if no_children:
for i from indptr2[pnode] <= i < indptr2[pnode + 1]:
cnode = indices2[i]
if flag[cnode]:
continue
else:
i_root += 1
root_list[i_root] = cnode
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
flag[cnode] = 1
i_nl_end += 1
no_children = False
break
if i_nl_end == N:
break
if no_children:
i_root -= 1
return i_nl_end
cdef int _connected_components_directed(
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr,
np.ndarray[ITYPE_t, ndim=1, mode='c'] labels):
"""
Uses an iterative version of Tarjan's algorithm to find the
strongly connected components of a directed graph represented as a
sparse matrix (scipy.sparse.csc_matrix or scipy.sparse.csr_matrix).
The algorithmic complexity is for a graph with E edges and V
vertices is O(E + V).
The storage requirement is 2*V integer arrays.
Uses an iterative version of the algorithm described here:
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.102.1707
"""
cdef int v, w, index, low_v, low_w, label, j
cdef int SS_head, root, stack_head, f, b
cdef int VOID = -1
cdef int END = -2
cdef int N = labels.shape[0]
cdef np.ndarray[ITYPE_t, ndim=1, mode="c"] SS, lowlinks, stack_f, stack_b
lowlinks = labels
SS = np.ndarray((N,), dtype=ITYPE)
stack_b = np.ndarray((N,), dtype=ITYPE)
stack_f = SS
# The stack of nodes which have been backtracked and are in the current SCC
SS.fill(VOID)
SS_head = END
# The array containing the lowlinks of nodes not yet assigned an SCC. Shares
# memory with the labels array, since they are not used at the same time.
lowlinks.fill(VOID)
# The DFS stack. Stored with both forwards and backwards pointers to allow
# us to move a node up to the top of the stack, as we only need to visit
# each node once. stack_f shares memory with SS, as nodes aren't put on the
# SS stack until after they've been popped from the DFS stack.
stack_head = END
stack_f.fill(VOID)
stack_b.fill(VOID)
index = 0
# Count SCC labels backwards so as not to class with lowlinks values.
label = N - 1
for v in range(N):
if lowlinks[v] == VOID:
# DFS-stack push
stack_head = v
stack_f[v] = END
stack_b[v] = END
while stack_head != END:
v = stack_head
if lowlinks[v] == VOID:
lowlinks[v] = index
index += 1
# Add successor nodes
for j from indptr[v] <= j < indptr[v+1]:
w = indices[j]
if lowlinks[w] == VOID:
# DFS-stack push
if stack_f[w] != VOID:
# w is already inside the stack, so excise it.
f = stack_f[w]
b = stack_b[w]
if b != END:
stack_f[b] = f
if f != END:
stack_b[f] = b
stack_f[w] = stack_head
stack_b[w] = END
stack_b[stack_head] = w
stack_head = w
else:
# DFS-stack pop
stack_head = stack_f[v]
if stack_head >= 0:
stack_b[stack_head] = END
stack_f[v] = VOID
stack_b[v] = VOID
root = 1 # True
low_v = lowlinks[v]
for j from indptr[v] <= j < indptr[v+1]:
low_w = lowlinks[indices[j]]
if low_w < low_v:
low_v = low_w
root = 0 # False
lowlinks[v] = low_v
if root: # Found a root node
index -= 1
# while S not empty and rindex[v] <= rindex[top[S]
while SS_head != END and lowlinks[v] <= lowlinks[SS_head]:
w = SS_head # w = pop(S)
SS_head = SS[w]
SS[w] = VOID
labels[w] = label # rindex[w] = c
index -= 1 # index = index - 1
labels[v] = label # rindex[v] = c
label -= 1 # c = c - 1
else:
SS[v] = SS_head # push(S, v)
SS_head = v
# labels count down from N-1 to zero. Modify them so they
# count upward from 0
labels *= -1
labels += (N - 1)
return (N - 1) - label
cdef int _connected_components_undirected(
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] labels):
cdef int v, w, j, label, SS_head
cdef int N = labels.shape[0]
cdef int VOID = -1
cdef int END = -2
labels.fill(VOID)
label = 0
# Share memory for the stack and labels, since labels are only
# applied once a node has been popped from the stack.
cdef np.ndarray[ITYPE_t, ndim=1, mode="c"] SS = labels
SS_head = END
for v in range(N):
if labels[v] == VOID:
# SS.push(v)
SS_head = v
SS[v] = END
while SS_head != END:
# v = SS.pop()
v = SS_head
SS_head = SS[v]
labels[v] = label
# Push children onto the stack if they havn't been
# seen at all yet.
for j from indptr1[v] <= j < indptr1[v+1]:
w = indices1[j]
if SS[w] == VOID:
SS[w] = SS_head
SS_head = w
for j from indptr2[v] <= j < indptr2[v+1]:
w = indices2[j]
if SS[w] == VOID:
SS[w] = SS_head
SS_head = w
label += 1
return label
Jump to Line
Something went wrong with that request. Please try again.