Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

697 lines (579 sloc) 24.869 kB
"""
Routines for traversing graphs in compressed sparse format
"""
# Author: Jake Vanderplas -- <vanderplas@astro.washington.edu>
# License: BSD, (C) 2012
import numpy as np
cimport numpy as np
from scipy.sparse import csr_matrix, isspmatrix, isspmatrix_csr, isspmatrix_csc
from scipy.sparse.csgraph._validation import validate_graph
from scipy.sparse.csgraph._tools import reconstruct_path
cimport cython
from libc cimport stdlib
include 'parameters.pxi'
def connected_components(csgraph, directed=True, connection='weak',
return_labels=True):
"""
connected_components(csgraph, directed=True, connection='weak', return_labels=True)
Analyze the connected components of a sparse graph
Parameters
----------
csgraph: array_like or sparse matrix
The N x N matrix representing the compressed sparse graph. The input
csgraph will be converted to csr format for the calculation.
directed: bool, optional
if True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
if False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
connection: string, optional
['weak'|'strong']. For directed graphs, the type of connection to
use. Nodes i and j are strongly connected if a path exists both
from i to j and from j to i. Nodes i and j are weakly connected if
only one of these paths exists. If directed == False, this keyword
is not referenced.
return_labels: string, optional
if True (default), then return the labels for each of the connected
components.
Returns
-------
n_components: integer
The number of connected components.
labels: ndarray
The length-N array of labels of the connected components.
"""
if connection.lower() not in ['weak', 'strong']:
raise ValueError("connection must be 'weak' or 'strong'")
# weak connections <=> components of undirected graph
if connection.lower() == 'weak':
directed = False
csgraph = validate_graph(csgraph, directed,
dense_output=False)
labels = np.empty(csgraph.shape[0], dtype=ITYPE)
labels.fill(NULL_IDX)
if directed:
n_components = _connected_components_directed(csgraph.indices,
csgraph.indptr,
labels)
else:
csgraph_T = csgraph.T.tocsr()
n_components = _connected_components_undirected(csgraph.indices,
csgraph.indptr,
csgraph_T.indices,
csgraph_T.indptr,
labels)
if return_labels:
return n_components, labels
else:
return n_components
def breadth_first_tree(csgraph, i_start, directed=True):
r"""
breadth_first_tree(csgraph, i_start, directed=True)
Return the tree generated by a breadth-first search
Note that a breadth-first tree from a specified node is unique.
Parameters
----------
csgraph: array_like or sparse matrix
The N x N matrix representing the compressed sparse graph. The input
csgraph will be converted to csr format for the calculation.
i_start: int
The index of starting node.
directed: bool, optional
if True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
if False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
Returns
-------
cstree : csr matrix
The N x N directed compressed-sparse representation of the breadth-
first tree drawn from csgraph, starting at the specified node.
Examples
--------
The following example shows the computation of a depth-first tree
over a simple four-component graph, starting at node 0::
input graph breadth first tree from (0)
(0) (0)
/ \ / \
3 8 3 8
/ \ / \
(3)---5---(1) (3) (1)
\ / /
6 2 2
\ / /
(2) (2)
In compressed sparse representation, the solution looks like this:
>>> from scipy.sparse import csr_matrix
>>> from scipy.sparse.csgraph import breadth_first_tree
>>> X = csr_matrix([[0, 8, 0, 3],
... [0, 0, 2, 5],
... [0, 0, 0, 6],
... [0, 0, 0, 0]])
>>> Tcsr = breadth_first_tree(X, 0, directed=False)
>>> Tcsr.toarray().astype(int)
array([[0, 8, 0, 3],
[0, 0, 2, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]])
Note that the resulting graph is a Directed Acyclic Graph which spans
the graph. A breadth-first tree from a given node is unique.
"""
node_list, predecessors = breadth_first_order(csgraph, i_start,
directed, True)
return reconstruct_path(csgraph, predecessors, directed)
def depth_first_tree(csgraph, i_start, directed=True):
r"""
depth_first_tree(csgraph, i_start, directed=True)
Return a tree generated by a depth-first search.
Note that a tree generated by a depth-first search is not unique:
it depends on the order that the children of each node are searched.
Parameters
----------
csgraph: array_like or sparse matrix
The N x N matrix representing the compressed sparse graph. The input
csgraph will be converted to csr format for the calculation.
i_start: int
The index of starting node.
directed: bool, optional
if True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
if False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
Returns
-------
cstree : csr matrix
The N x N directed compressed-sparse representation of the depth-
first tree drawn from csgraph, starting at the specified node.
Examples
--------
The following example shows the computation of a depth-first tree
over a simple four-component graph, starting at node 0::
input graph depth first tree from (0)
(0) (0)
/ \ \
3 8 8
/ \ \
(3)---5---(1) (3) (1)
\ / \ /
6 2 6 2
\ / \ /
(2) (2)
In compressed sparse representation, the solution looks like this:
>>> from scipy.sparse import csr_matrix
>>> from scipy.sparse.csgraph import depth_first_tree
>>> X = csr_matrix([[0, 8, 0, 3],
... [0, 0, 2, 5],
... [0, 0, 0, 6],
... [0, 0, 0, 0]])
>>> Tcsr = depth_first_tree(X, 0, directed=False)
>>> Tcsr.toarray().astype(int)
array([[0, 8, 0, 0],
[0, 0, 2, 0],
[0, 0, 0, 6],
[0, 0, 0, 0]])
Note that the resulting graph is a Directed Acyclic Graph which spans
the graph. Unlike a breadth-first tree, a depth-first tree of a given
graph is not unique if the graph contains cycles. If the above solution
had begun with the edge connecting nodes 0 and 3, the result would have
been different.
"""
node_list, predecessors = depth_first_order(csgraph, i_start,
directed, True)
return reconstruct_path(csgraph, predecessors, directed)
def breadth_first_order(csgraph, i_start,
directed=True, return_predecessors=True):
"""
breadth_first_order(csgraph, i_start, directed=True, return_predecessors=True)
Return a breadth-first ordering starting with specified node.
Note that a breadth-first order is not unique, but the tree which it
generates is unique.
Parameters
----------
csgraph: array_like or sparse matrix
The N x N compressed sparse graph. The input csgraph will be
converted to csr format for the calculation.
i_start: int
The index of starting node.
directed: bool, optional
If True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
If False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
return_predecessors: bool, optional
If True (default), then return the predecesor array (see below).
Returns
-------
node_array: ndarray, one dimension
The breadth-first list of nodes, starting with specified node. The
length of node_array is the number of nodes reachable from the
specified node.
predecessors: ndarray, one dimension
Returned only if return_predecessors is True.
The length-N list of predecessors of each node in a breadth-first
tree. If node i is in the tree, then its parent is given by
predecessors[i]. If node i is not in the tree (and for the parent
node) then predecessors[i] = -9999.
"""
global NULL_IDX
csgraph = validate_graph(csgraph, directed, dense_output=False)
cdef int N = csgraph.shape[0]
cdef np.ndarray node_list = np.empty(N, dtype=ITYPE)
cdef np.ndarray predecessors = np.empty(N, dtype=ITYPE)
node_list.fill(NULL_IDX)
predecessors.fill(NULL_IDX)
if directed:
length = _breadth_first_directed(i_start,
csgraph.indices, csgraph.indptr,
node_list, predecessors)
else:
csgraph_T = csgraph.T.tocsr()
length = _breadth_first_undirected(i_start,
csgraph.indices, csgraph.indptr,
csgraph_T.indices, csgraph_T.indptr,
node_list, predecessors)
if return_predecessors:
return node_list[:length], predecessors
else:
return node_list[:length]
cdef unsigned int _breadth_first_directed(
unsigned int head_node,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr,
np.ndarray[ITYPE_t, ndim=1, mode='c'] node_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] predecessors):
# Inputs:
# head_node: (input) index of the node from which traversal starts
# indices: (input) CSR indices of graph
# indptr: (input) CSR indptr of graph
# node_list: (output) breadth-first list of nodes
# predecessors: (output) list of predecessors of nodes in breadth-first
# tree. Should be initialized to NULL_IDX
# Returns:
# n_nodes: the number of nodes in the breadth-first tree
global NULL_IDX
cdef unsigned int i, pnode, cnode
cdef unsigned int i_nl, i_nl_end
cdef unsigned int N = node_list.shape[0]
node_list[0] = head_node
i_nl = 0
i_nl_end = 1
while i_nl < i_nl_end:
pnode = node_list[i_nl]
for i from indptr[pnode] <= i < indptr[pnode + 1]:
cnode = indices[i]
if (cnode == head_node):
continue
elif (predecessors[cnode] == NULL_IDX):
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
i_nl_end += 1
i_nl += 1
return i_nl
cdef unsigned int _breadth_first_undirected(
unsigned int head_node,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] node_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] predecessors):
# Inputs:
# head_node: (input) index of the node from which traversal starts
# indices1: (input) CSR indices of graph
# indptr1: (input) CSR indptr of graph
# indices2: (input) CSR indices of transposed graph
# indptr2: (input) CSR indptr of transposed graph
# node_list: (output) breadth-first list of nodes
# predecessors: (output) list of predecessors of nodes in breadth-first
# tree. Should be initialized to NULL_IDX
# Returns:
# n_nodes: the number of nodes in the breadth-first tree
global NULL_IDX
cdef unsigned int i, pnode, cnode
cdef unsigned int i_nl, i_nl_end
cdef unsigned int N = node_list.shape[0]
node_list[0] = head_node
i_nl = 0
i_nl_end = 1
while i_nl < i_nl_end:
pnode = node_list[i_nl]
for i from indptr1[pnode] <= i < indptr1[pnode + 1]:
cnode = indices1[i]
if (cnode == head_node):
continue
elif (predecessors[cnode] == NULL_IDX):
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
i_nl_end += 1
for i from indptr2[pnode] <= i < indptr2[pnode + 1]:
cnode = indices2[i]
if (cnode == head_node):
continue
elif (predecessors[cnode] == NULL_IDX):
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
i_nl_end += 1
i_nl += 1
return i_nl
def depth_first_order(csgraph, i_start,
directed=True, return_predecessors=True):
"""
depth_first_order(csgraph, i_start, directed=True, return_predecessors=True)
Return a depth-first ordering starting with specified node.
Note that a depth-first order is not unique. Furthermore, for graphs
with cycles, the tree generated by a depth-first search is not
unique either.
Parameters
----------
csgraph: array_like or sparse matrix
The N x N compressed sparse graph. The input csgraph will be
converted to csr format for the calculation.
i_start: int
The index of starting node.
directed: bool, optional
If True (default), then operate on a directed graph: only
move from point i to point j along paths csgraph[i, j].
If False, then find the shortest path on an undirected graph: the
algorithm can progress from point i to j along csgraph[i, j] or
csgraph[j, i].
return_predecessors: bool, optional
If True (default), then return the predecesor array (see below).
Returns
-------
node_array: ndarray, one dimension
The breadth-first list of nodes, starting with specified node. The
length of node_array is the number of nodes reachable from the
specified node.
predecessors: ndarray, one dimension
Returned only if return_predecessors is True.
The length-N list of predecessors of each node in a breadth-first
tree. If node i is in the tree, then its parent is given by
predecessors[i]. If node i is not in the tree (and for the parent
node) then predecessors[i] = -9999.
"""
global NULL_IDX
csgraph = validate_graph(csgraph, directed, dense_output=False)
cdef int N = csgraph.shape[0]
node_list = np.empty(N, dtype=ITYPE)
predecessors = np.empty(N, dtype=ITYPE)
root_list = np.empty(N, dtype=ITYPE)
flag = np.zeros(N, dtype=ITYPE)
node_list.fill(NULL_IDX)
predecessors.fill(NULL_IDX)
root_list.fill(NULL_IDX)
if directed:
length = _depth_first_directed(i_start,
csgraph.indices, csgraph.indptr,
node_list, predecessors,
root_list, flag)
else:
csgraph_T = csgraph.T.tocsr()
length = _depth_first_undirected(i_start,
csgraph.indices, csgraph.indptr,
csgraph_T.indices, csgraph_T.indptr,
node_list, predecessors,
root_list, flag)
if return_predecessors:
return node_list[:length], predecessors
else:
return node_list[:length]
cdef unsigned int _depth_first_directed(
unsigned int head_node,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr,
np.ndarray[ITYPE_t, ndim=1, mode='c'] node_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] predecessors,
np.ndarray[ITYPE_t, ndim=1, mode='c'] root_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] flag):
cdef unsigned int i, j, i_nl_end, cnode, pnode
cdef unsigned int N = node_list.shape[0]
cdef int no_children, i_root
node_list[0] = head_node
root_list[0] = head_node
i_root = 0
i_nl_end = 1
flag[head_node] = 1
while i_root >= 0:
pnode = root_list[i_root]
no_children = True
for i from indptr[pnode] <= i < indptr[pnode + 1]:
cnode = indices[i]
if flag[cnode]:
continue
else:
i_root += 1
root_list[i_root] = cnode
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
flag[cnode] = 1
i_nl_end += 1
no_children = False
break
if i_nl_end == N:
break
if no_children:
i_root -= 1
return i_nl_end
cdef unsigned int _depth_first_undirected(
unsigned int head_node,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] node_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] predecessors,
np.ndarray[ITYPE_t, ndim=1, mode='c'] root_list,
np.ndarray[ITYPE_t, ndim=1, mode='c'] flag):
cdef unsigned int i, j, i_nl_end, cnode, pnode
cdef unsigned int N = node_list.shape[0]
cdef int no_children, i_root
node_list[0] = head_node
root_list[0] = head_node
i_root = 0
i_nl_end = 1
flag[head_node] = 1
while i_root >= 0:
pnode = root_list[i_root]
no_children = True
for i from indptr1[pnode] <= i < indptr1[pnode + 1]:
cnode = indices1[i]
if flag[cnode]:
continue
else:
i_root += 1
root_list[i_root] = cnode
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
flag[cnode] = 1
i_nl_end += 1
no_children = False
break
if no_children:
for i from indptr2[pnode] <= i < indptr2[pnode + 1]:
cnode = indices2[i]
if flag[cnode]:
continue
else:
i_root += 1
root_list[i_root] = cnode
node_list[i_nl_end] = cnode
predecessors[cnode] = pnode
flag[cnode] = 1
i_nl_end += 1
no_children = False
break
if i_nl_end == N:
break
if no_children:
i_root -= 1
return i_nl_end
cdef int _connected_components_undirected(
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr1,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr2,
np.ndarray[ITYPE_t, ndim=1, mode='c'] labels):
cdef unsigned int N = labels.shape[0]
cdef unsigned int i, label=0
cdef np.ndarray node_list = np.empty(N, dtype=ITYPE)
cdef np.ndarray predecessors = np.empty(N, dtype=ITYPE)
cdef np.ndarray root_list = np.empty(N, dtype=ITYPE)
cdef np.ndarray flag = np.zeros(N, dtype=ITYPE)
root_list.fill(NULL_IDX)
predecessors.fill(NULL_IDX)
node_list.fill(NULL_IDX)
for i from 0 <= i < N:
if labels[i] < 0:
_depth_first_undirected(i, indices1, indptr1,
indices2, indptr2,
node_list, predecessors, root_list, flag)
labels[flag > 0] = label
flag.fill(0)
label += 1
return label
cdef int _connected_components_directed(
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr,
np.ndarray[ITYPE_t, ndim=1, mode='c'] labels):
# uses http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.102.1707
global NULL_IDX
cdef int N = labels.shape[0]
cdef int i_node
cdef NodeStack *stack = <NodeStack*>stdlib.malloc(sizeof(NodeStack)
+ N * sizeof(int))
stack.label = 0
stack.index = 0
stack.i = 0
stack.N = N
stack.c = N - 1
labels.fill(-1)
for i_node from 0 <= i_node < N:
if labels[i_node] == -1:
_cc_visit(i_node, stack,
labels, indices, indptr)
# labels count down from N-1 to zero. Modify them so they
# count upward from 0
labels *= -1
labels += (N - 1)
N -= stack.c + 1
stdlib.free(stack)
return N
cdef int _cc_visit(int i_node, NodeStack *stack,
np.ndarray[ITYPE_t, ndim=1, mode='c'] rindex,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indices,
np.ndarray[ITYPE_t, ndim=1, mode='c'] indptr):
global NULL_IDX
cdef int root = True
cdef int i, j, i_node2
rindex[i_node] = stack.index
stack.index += 1
# look at all edges coming out of node i_node. We'll label these i_node2
for j from indptr[i_node] <= j < indptr[i_node + 1]:
i_node2 = indices[j]
# i_node2 not yet seen. Search paths from this
if rindex[i_node2] == -1:
_cc_visit(i_node2, stack, rindex,
indices, indptr)
if rindex[i_node2] < rindex[i_node]:
rindex[i_node] = rindex[i_node2]
root = False
if root:
stack.index -= 1
while (stack.i > 0):
if rindex[i_node] > rindex[stack.arr[stack.i - 1]]:
break
i_node2 = stack_pop(stack) # i_node & i_node2 strongly connected
rindex[i_node2] = stack.c
stack.index -= 1
rindex[i_node] = stack.c
stack.c -= 1
else:
stack_push(stack, i_node)
cdef struct NodeStack:
int index
int label
int i
int N
int c
int arr[0]
cdef int stack_pop(NodeStack *s):
s.i -= 1
if s.i < 0:
raise ValueError('s.i < 0')
return s.arr[s.i]
cdef int stack_push(NodeStack *s, int item):
if s.i >= s.N:
raise ValueError('s.i >= N')
s.arr[s.i] = item
s.i += 1
cdef int in_stack(NodeStack *s, int item):
cdef int i = s.i - 1
while i >= 0:
if s.arr[i] == item:
return 1
i -= 1
return 0
Jump to Line
Something went wrong with that request. Please try again.