Skip to content

Commit 79f5598

Browse files
authored
Support directed graphs in clustering / triangle counting (#4)
* Support directed graphs in clustering / triangle counting * single_clustering_directed_core * This is better for counting single triangles. It helps to think! * Workaround, maybe
1 parent cc2d208 commit 79f5598

File tree

2 files changed

+130
-22
lines changed

2 files changed

+130
-22
lines changed

graphblas_algorithms/cluster.py

Lines changed: 100 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import graphblas as gb
22
import networkx as nx
3-
from graphblas import Matrix, agg, select
4-
from graphblas.semiring import any_pair, plus_pair
3+
from graphblas import binary, select
4+
from graphblas.semiring import plus_pair
55
from networkx import average_clustering as _nx_average_clustering
66
from networkx import clustering as _nx_clustering
77
from networkx.utils import not_implemented_for
@@ -51,27 +51,24 @@ def get_degrees(G, mask=None, *, L=None, U=None, has_self_edges=True):
5151
if L is None or U is None:
5252
L, U = get_properties(G, "L U", L=L, U=U)
5353
degrees = (
54-
L.reduce_rowwise(agg.count).new(mask=mask) + U.reduce_rowwise(agg.count).new(mask=mask)
54+
L.reduce_rowwise(gb.agg.count).new(mask=mask)
55+
+ U.reduce_rowwise(gb.agg.count).new(mask=mask)
5556
).new(name="degrees")
5657
else:
57-
degrees = G.reduce_rowwise(agg.count).new(mask=mask, name="degrees")
58+
degrees = G.reduce_rowwise(gb.agg.count).new(mask=mask, name="degrees")
5859
return degrees
5960

6061

6162
def single_triangle_core(G, index, *, L=None, has_self_edges=True):
62-
M = Matrix(bool, G.nrows, G.ncols)
63-
M[index, index] = True
64-
C = any_pair(G.T @ M.T).new(name="C") # select.coleq(G.T, index)
63+
r = G[index, :].new()
6564
has_self_edges = get_properties(G, "has_self_edges", L=L, has_self_edges=has_self_edges)
66-
if has_self_edges:
67-
del C[index, index] # Ignore self-edges
68-
R = C.T.new(name="R")
6965
if has_self_edges:
7066
# Pretty much all the time is spent here taking TRIL, which is used to ignore self-edges
7167
L = get_properties(G, "L", L=L)
72-
return plus_pair(L @ R.T).new(mask=C.S).reduce_scalar(allow_empty=False).value
68+
del r[index] # Ignore self-edges
69+
return plus_pair(L @ r).new(mask=r.S).reduce(allow_empty=False).value
7370
else:
74-
return plus_pair(G @ R.T).new(mask=C.S).reduce_scalar(allow_empty=False).value // 2
71+
return plus_pair(G @ r).new(mask=r.S).reduce(allow_empty=False).value // 2
7572

7673

7774
def triangles_core(G, mask=None, *, L=None, U=None):
@@ -114,12 +111,28 @@ def transitivity_core(G, *, L=None, U=None, degrees=None):
114111
return 6 * numerator / denom
115112

116113

117-
@not_implemented_for("directed") # Should we implement it for directed?
114+
def transitivity_directed_core(G, *, has_self_edges=True):
115+
# XXX" is transitivity supposed to work on directed graphs like this?
116+
if has_self_edges:
117+
A = select.offdiag(G)
118+
else:
119+
A = G
120+
numerator = plus_pair(A @ A.T).new(mask=A.S).reduce_scalar(allow_empty=False).value
121+
if numerator == 0:
122+
return 0
123+
deg = A.reduce_rowwise(gb.agg.count)
124+
denom = (deg * (deg - 1)).reduce().value
125+
return numerator / denom
126+
127+
118128
def transitivity(G):
119129
if len(G) == 0:
120130
return 0
121131
A = gb.io.from_networkx(G, weight=None, dtype=bool)
122-
return transitivity_core(A)
132+
if isinstance(G, nx.DiGraph):
133+
return transitivity_directed_core(A)
134+
else:
135+
return transitivity_core(A)
123136

124137

125138
def clustering_core(G, mask=None, *, L=None, U=None, degrees=None):
@@ -130,6 +143,29 @@ def clustering_core(G, mask=None, *, L=None, U=None, degrees=None):
130143
return (2 * tri / denom).new(name="clustering")
131144

132145

146+
def clustering_directed_core(G, mask=None, *, has_self_edges=True):
147+
# TODO: Alright, this introduces us to properties of directed graphs:
148+
# has_self_edges, offdiag, row_degrees, column_degrees, total_degrees, recip_degrees
149+
# (in_degrees, out_degrees?)
150+
if has_self_edges:
151+
A = select.offdiag(G)
152+
else:
153+
A = G
154+
AT = A.T.new()
155+
temp = plus_pair(A @ A.T).new(mask=A.S)
156+
tri = (
157+
temp.reduce_rowwise().new(mask=mask)
158+
+ temp.reduce_columnwise().new(mask=mask)
159+
+ plus_pair(AT @ A.T).new(mask=A.S).reduce_rowwise().new(mask=mask)
160+
+ plus_pair(AT @ AT.T).new(mask=A.S).reduce_columnwise().new(mask=mask)
161+
)
162+
recip_degrees = binary.pair(A & AT).reduce_rowwise().new(mask=mask)
163+
total_degrees = (
164+
A.reduce_rowwise(gb.agg.count).new(mask=mask) + A.reduce_columnwise(gb.agg.count)
165+
).new(mask=mask)
166+
return (tri / (total_degrees * (total_degrees - 1) - 2 * recip_degrees)).new(name="clustering")
167+
168+
133169
def single_clustering_core(G, index, *, L=None, degrees=None, has_self_edges=True):
134170
has_self_edges = get_properties(G, "has_self_edges", L=L, has_self_edges=has_self_edges)
135171
tri = single_triangle_core(G, index, L=L, has_self_edges=has_self_edges)
@@ -139,24 +175,50 @@ def single_clustering_core(G, index, *, L=None, degrees=None, has_self_edges=Tru
139175
degrees = degrees[index].value
140176
else:
141177
row = G[index, :].new()
142-
degrees = row.reduce(agg.count).value
178+
degrees = row.nvals
143179
if has_self_edges and row[index].value is not None:
144180
degrees -= 1
145181
denom = degrees * (degrees - 1)
146182
return 2 * tri / denom
147183

148184

185+
def single_clustering_directed_core(G, index, *, has_self_edges=True):
186+
if has_self_edges:
187+
A = select.offdiag(G)
188+
else:
189+
A = G
190+
r = A[index, :].new()
191+
c = A[:, index].new()
192+
tri = (
193+
plus_pair(A @ c).new(mask=c.S).reduce(allow_empty=False).value
194+
+ plus_pair(A @ c).new(mask=r.S).reduce(allow_empty=False).value
195+
+ plus_pair(A @ r).new(mask=c.S).reduce(allow_empty=False).value
196+
+ plus_pair(A @ r).new(mask=r.S).reduce(allow_empty=False).value
197+
)
198+
if tri == 0:
199+
return 0
200+
total_degrees = c.nvals + r.nvals
201+
recip_degrees = binary.pair(c & r).nvals
202+
return tri / (total_degrees * (total_degrees - 1) - 2 * recip_degrees)
203+
204+
149205
def clustering(G, nodes=None, weight=None):
150206
if len(G) == 0:
151207
return {}
152-
if isinstance(G, nx.DiGraph) or weight is not None:
153-
# TODO: Not yet implemented. Clustering implemented only for undirected and unweighted.
208+
if weight is not None:
209+
# TODO: Not yet implemented. Clustering implemented only for unweighted.
154210
return _nx_clustering(G, nodes=nodes, weight=weight)
155211
A, key_to_id = graph_to_adjacency(G, weight=weight)
156212
if nodes in G:
157-
return single_clustering_core(A, key_to_id[nodes])
213+
if isinstance(G, nx.DiGraph):
214+
return single_clustering_directed_core(A, key_to_id[nodes])
215+
else:
216+
return single_clustering_core(A, key_to_id[nodes])
158217
mask, id_to_key = list_to_mask(nodes, key_to_id)
159-
result = clustering_core(A, mask=mask)
218+
if isinstance(G, nx.DiGraph):
219+
result = clustering_directed_core(A, mask=mask)
220+
else:
221+
result = clustering_core(A, mask=mask)
160222
return vector_to_dict(result, key_to_id, id_to_key, mask=mask, fillvalue=0.0)
161223

162224

@@ -171,10 +233,26 @@ def average_clustering_core(G, mask=None, count_zeros=True, *, L=None, U=None, d
171233
return val / c.size
172234

173235

236+
def average_clustering_directed_core(G, mask=None, count_zeros=True, *, has_self_edges=True):
237+
c = clustering_directed_core(G, mask=mask, has_self_edges=has_self_edges)
238+
val = c.reduce(allow_empty=False).value
239+
if not count_zeros:
240+
return val / c.nvals
241+
elif mask is not None:
242+
return val / mask.parent.nvals
243+
else:
244+
return val / c.size
245+
246+
174247
def average_clustering(G, nodes=None, weight=None, count_zeros=True):
175-
if len(G) == 0 or isinstance(G, nx.DiGraph) or weight is not None:
176-
# TODO: Not yet implemented. Clustering implemented only for undirected and unweighted.
248+
if len(G) == 0:
249+
raise ZeroDivisionError() # Not covered
250+
if weight is not None:
251+
# TODO: Not yet implemented. Clustering implemented only for unweighted.
177252
return _nx_average_clustering(G, nodes=nodes, weight=weight, count_zeros=count_zeros)
178253
A, key_to_id = graph_to_adjacency(G, weight=weight)
179254
mask, _ = list_to_mask(nodes, key_to_id)
180-
return average_clustering_core(A, mask=mask, count_zeros=count_zeros)
255+
if isinstance(G, nx.DiGraph):
256+
return average_clustering_directed_core(A, mask=mask, count_zeros=count_zeros)
257+
else:
258+
return average_clustering_core(A, mask=mask, count_zeros=count_zeros)

graphblas_algorithms/tests/test_cluster.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,34 @@ def test_triangles_full():
8989
assert ga.cluster.average_clustering_core(G2, mask=mask.S) == 1
9090

9191

92+
def test_directed():
93+
# XXX" is transitivity supposed to work on directed graphs like this?
94+
G = nx.complete_graph(5, create_using=nx.DiGraph())
95+
G.remove_edge(1, 2)
96+
G.remove_edge(2, 3)
97+
G.add_node(5)
98+
expected = nx_transitivity(G)
99+
result = transitivity(G)
100+
assert expected == result
101+
# clustering
102+
expected = nx_clustering(G)
103+
result = clustering(G)
104+
assert result == expected
105+
expected = nx_clustering(G, [0, 1, 2])
106+
result = clustering(G, [0, 1, 2])
107+
assert result == expected
108+
for i in range(6):
109+
assert nx_clustering(G, i) == clustering(G, i)
110+
# average_clustering
111+
expected = nx_average_clustering(G)
112+
result = average_clustering(G)
113+
assert result == expected
114+
expected = nx_average_clustering(G, [0, 1, 2])
115+
result = average_clustering(G, [0, 1, 2])
116+
assert result == expected
117+
expected = nx_average_clustering(G, count_zeros=False)
118+
result = average_clustering(G, count_zeros=False)
119+
assert result == expected
120+
121+
92122
from networkx.algorithms.tests.test_cluster import * # noqa isort:skip

0 commit comments

Comments
 (0)