## Collection of Algorithms

1. [Scan statistics](#1.-Scan-statistics)
2. [SSSP/MSSP, GAP benchmark](#2.-SSSP-/-MSSP,-GAP-benchmark)
3. [Vertex nomination](#3.-Vertex-nomination)
4. [Graph projection](#4.-Graph-projection)
5. [Graph search](#5.-Graph-search)

## First, some setup

In [1]:
import inspect
import grblas as gb
import networkx as nx
import numpy as np
from grblas import Matrix, Vector, Scalar, unary, binary, monoid, semiring

In [2]:
a = np.array(
    [
        [0, 1, 0, 1, 1, 0, 0, 0],
        [1, 0, 0, 1, 1, 0, 0, 0],
        [0, 0, 0, 0, 1, 1, 1, 1],
        [1, 1, 0, 0, 1, 0, 0, 0],
        [1, 1, 1, 1, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 1, 0],
        [0, 0, 1, 0, 0, 1, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0],
    ]
)
A = gb.io.from_numpy(a)
A

0,1,2,3,4
grblas.Matrix,nvals,nrows,ncols,dtype
grblas.Matrix,22,8,8,INT64

Unnamed: 0,0,1,2,3,4,5,6,7
0,,1.0,,1.0,1.0,,,
1,1.0,,,1.0,1.0,,,
2,,,,,1.0,1.0,1.0,1.0
3,1.0,1.0,,,1.0,,,
4,1.0,1.0,1.0,1.0,,,,
5,,,1.0,,,,1.0,
6,,,1.0,,,1.0,,
7,,,1.0,,,,,


In [3]:
g = gb.io.to_networkx(A).to_undirected()

In [4]:
def _cname(x):
    try:
        return gb.ffi.typeof(x).cname
    except Exception:
        return None

_selectop_cname = _cname(gb.lib.GxB_TRIL)

SELECT_OPS = {
    key[4:]
    for key, val in inspect.getmembers(gb.lib)
    if _cname(val) == _selectop_cname
}
SELECT_OPS

{'DIAG',
 'EQ_THUNK',
 'EQ_ZERO',
 'GE_THUNK',
 'GE_ZERO',
 'GT_THUNK',
 'GT_ZERO',
 'LE_THUNK',
 'LE_ZERO',
 'LT_THUNK',
 'LT_ZERO',
 'NE_THUNK',
 'NONZERO',
 'OFFDIAG',
 'TRIL',
 'TRIU'}

In [5]:
class wrap:
    def __init__(self, carg, name):
        self._carg = carg
        self.name = name


# We should add GxB_Scalar to grblas
def scalar(value, dtype=None):
    s = gb.Scalar.from_value(value, dtype=dtype)
    gxb = gb.ffi.new("GxB_Scalar*")
    status = gb.lib.GxB_Scalar_new(gxb, s.dtype._carg)
    gb.exceptions.check_status_carg(status, 'Scalar', gxb[0])
    func = getattr(gb.lib, f'GxB_Scalar_setElement_{s.dtype.name}')
    status = func(gxb[0], value)
    gb.exceptions.check_status_carg(status, 'Scalar', gxb[0])
    return gxb


def select(A, opname, *, thunk=None, name=None):
    # Call GxB_Select manually until we implement select.
    # No masks or descriptors for now.
    if opname.upper() not in SELECT_OPS:
        raise ValueError(
            f'Unknown opname: {opname}.  Should be one of: '
            + ', '.join(sorted(SELECT_OPS))
        )
    opname = f'GxB_{opname.upper()}'
    if isinstance(A, Matrix):
        rv = Matrix.new(A.dtype, A.nrows, A.ncols, name=name)
    else:
        rv = Vector.new(A.dtype, A.size, name=name)
    if thunk is not None:
        if isinstance(thunk, Scalar):
            thunk = thunk.value
        thunk_ptr = scalar(thunk)
        thunk = thunk_ptr[0]
    try:
        gb.base.call(
            f'GxB_{type(A).__name__}_select',
            [rv, None, None, wrap(getattr(gb.lib, opname), opname), A, thunk, None]
        )
    finally:
        if thunk is not None:
            status = gb.lib.GxB_Scalar_free(thunk_ptr)
            gb.exceptions.check_status_carg(status, 'Scalar', thunk)
    return rv

In [6]:
select(A, 'tril')

0,1,2,3,4
grblas.Matrix,nvals,nrows,ncols,dtype
grblas.Matrix,11,8,8,INT64

Unnamed: 0,0,1,2,3,4,5,6,7
0,,,,,,,,
1,1.0,,,,,,,
2,,,,,,,,
3,1.0,1.0,,,,,,
4,1.0,1.0,1.0,1.0,,,,
5,,,1.0,,,,,
6,,,1.0,,,1.0,,
7,,,1.0,,,,,


## 1. Scan statistics
Determine which node is connected to the most triangles
- https://hiveprogram.com/wiki/display/WOR/V0+-+Scan+Statistics
- https://gunrock.github.io/docs/#/hive/hive_scan_statistics

In [7]:
def triangles(A):
    """Number of triangles per node"""
    L = select(A, 'tril')
    # REWRITE (high): matmul followed by reduce_rows
    A_triangles = semiring.plus_pair(A @ L.T).new(mask=A.S, dtype=int)
    return A_triangles.reduce_rows(monoid.plus).new()

In [8]:
triangles(A)

0,1,2,3
grblas.Vector,nvals,size,dtype
grblas.Vector,7,8,INT64

Unnamed: 0,0,1,2,3,4,5,6,7
,3,3,1,3,3,1,1,


In [9]:
d = nx.algorithms.cluster.triangles(g)
Vector.from_values(list(d.keys()), list(d.values()), size=A.nrows)

0,1,2,3
grblas.Vector,nvals,size,dtype
grblas.Vector,8,8,INT64

Unnamed: 0,0,1,2,3,4,5,6,7
,3,3,1,3,3,1,1,0


In [10]:
def argminmax(vector, min_or_max, which):
    best = vector.reduce(min_or_max).new()
    mask = binary.eq(vector, best).new()
    matching = unary.positioni(vector).new(mask=mask.V)
    return matching.reduce(which).value

In [11]:
def argminmax_select(vector, min_or_max, which):
    best = vector.reduce(min_or_max).new()
    vector = select(vector, 'eq_thunk', thunk=best)
    matching = unary.positioni(vector).new()
    return matching.reduce(which).value

In [12]:
def argmax(vector, *, which=monoid.any):
    return argminmax(vector, monoid.max, which)

def argmax_select(vector, *, which=monoid.any):
    return argminmax_select(vector, monoid.max, which)

def argmin(vector, *, which=monoid.any):
    return argminmax(vector, monoid.min, which)

def argmin_select(vector, *, which=monoid.any):
    return argminmax_select(vector, monoid.min, which)

In [13]:
t = triangles(A)
t

0,1,2,3
grblas.Vector,nvals,size,dtype
grblas.Vector,7,8,INT64

Unnamed: 0,0,1,2,3,4,5,6,7
,3,3,1,3,3,1,1,


In [14]:
argmax(t), argmax_select(t), argmin(t), argmin_select(t)

(0, 0, 2, 2)

In [15]:
# This is the workload!
def most_triangles(A):
    tri = triangles(A)
    # REWRITE: compute max_triangles when we compute triangles above
    return argmax(tri)

In [16]:
most_triangles(A)

0

In [17]:
d = nx.algorithms.cluster.triangles(g)
max(d, key=d.__getitem__)

0

## 2. SSSP / MSSP, GAP benchmark

In [18]:
def mssp(A, sources):
    v = sources.dup()  # don't modify the inputs
    v_dup = Vector.new(v.dtype, size=v.size)
    while True:
        v_dup << v
        v(binary.min) << semiring.min_plus(A.T @ v)
        if v.isequal(v_dup):
            return v

def sssp(A, source):
    v = Vector.from_values([source], [0], size=A.nrows)
    return mssp(A, v)

In [19]:
source = np.random.randint(A.nrows)
source

3

In [20]:
sssp(A, source)

0,1,2,3
grblas.Vector,nvals,size,dtype
grblas.Vector,8,8,INT64

Unnamed: 0,0,1,2,3,4,5,6,7
,1,1,2,0,1,3,3,3


## 3. Vertex nomination

In [21]:
# `seeds` is initial set of "interesting" nodes
num_seeds = 2
seeds = set()
while len(seeds) < num_seeds:
    seeds.add(np.random.randint(A.nrows))
seeds = Vector.from_values(sorted(seeds), [0] * len(seeds), size=A.nrows)
seeds

0,1,2,3
grblas.Vector,nvals,size,dtype
grblas.Vector,2,8,INT64

Unnamed: 0,0,1,2,3,4,5,6,7
,0,,0,,,,,


In [22]:
def vertex_nomination(A, seeds):
    v = semiring.min_first(A.T @ seeds).new(mask=~seeds.S)
    return argmin(v)

In [23]:
vertex_nomination(A, seeds)

1

## 4. Graph projection
- For bipartite graphs
- Often followed by filter

### Unnormalized 2-hop

In [24]:
# Note that A here represents the upper-right block of the full
# graph adjancency matrix shown here:
#
# B = [ 0   A ]
#     [ A.T 0 ]
#
# B is symmetric, but A probably isn't!

W1 = semiring.plus_times(A @ A.T).new()  # Left projection graph
W2 = semiring.plus_times(A.T @ A).new()  # Right projection graph
W1

0,1,2,3,4
grblas.Matrix,nvals,nrows,ncols,dtype
grblas.Matrix,42,8,8,INT64

Unnamed: 0,0,1,2,3,4,5,6,7
0,3.0,2.0,1.0,2.0,2.0,,,
1,2.0,3.0,1.0,2.0,2.0,,,
2,1.0,1.0,4.0,1.0,,1.0,1.0,
3,2.0,2.0,1.0,3.0,2.0,,,
4,2.0,2.0,,2.0,4.0,1.0,1.0,1.0
5,,,1.0,,1.0,2.0,1.0,1.0
6,,,1.0,,1.0,1.0,2.0,1.0
7,,,,,1.0,1.0,1.0,1.0


In [25]:
select(W1, 'gt_thunk', thunk=1)  # Filter!

0,1,2,3,4
grblas.Matrix,nvals,nrows,ncols,dtype
grblas.Matrix,19,8,8,INT64

Unnamed: 0,0,1,2,3,4,5,6,7
0,3.0,2.0,,2.0,2.0,,,
1,2.0,3.0,,2.0,2.0,,,
2,,,4.0,,,,,
3,2.0,2.0,,3.0,2.0,,,
4,2.0,2.0,,2.0,4.0,,,
5,,,,,,2.0,,
6,,,,,,,2.0,
7,,,,,,,,


### Normalized 2-hop projection

In [26]:
def row_degrees(A, *, mask=None):
    B = A.apply(unary.one).new(dtype=int)
    return B.reduce_rows(monoid.plus).new(mask=mask)

def col_degrees(A, *, mask=None):
    return row_degrees(A.T, mask=mask)

In [27]:
Dn = gb.ss.diag(row_degrees(A), dtype=float)
invDn = unary.minv(Dn).new()
invDn

0,1,2,3,4
grblas.Matrix,nvals,nrows,ncols,dtype
grblas.Matrix,8,8,8,FP64

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.333333,,,,,,,
1,,0.333333,,,,,,
2,,,0.25,,,,,
3,,,,0.333333,,,,
4,,,,,0.25,,,
5,,,,,,0.5,,
6,,,,,,,0.5,
7,,,,,,,,1.0


In [28]:
Dm = gb.ss.diag(col_degrees(A), dtype=float)
invDm = unary.minv(Dm).new()

In [29]:
W1 = semiring.plus_times(
    semiring.any_times(invDn @ A).new()
    @
    semiring.any_times(A.T @ invDm).new()
).new()
W1

0,1,2,3,4
grblas.Matrix,nvals,nrows,ncols,dtype
grblas.Matrix,42,8,8,FP64

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.333333,0.222222,0.083333,0.222222,0.166667,,,
1,0.222222,0.333333,0.083333,0.222222,0.166667,,,
2,0.083333,0.083333,0.25,0.083333,,0.125,0.125,
3,0.222222,0.222222,0.083333,0.333333,0.166667,,,
4,0.166667,0.166667,,0.166667,0.25,0.125,0.125,0.25
5,,,0.125,,0.125,0.5,0.25,0.5
6,,,0.125,,0.125,0.25,0.5,0.5
7,,,,,0.25,0.5,0.5,1.0


In [30]:
select(W1, 'gt_thunk', thunk=0.2)

0,1,2,3,4
grblas.Matrix,nvals,nrows,ncols,dtype
grblas.Matrix,22,8,8,FP64

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.333333,0.222222,,0.222222,,,,
1,0.222222,0.333333,,0.222222,,,,
2,,,0.25,,,,,
3,0.222222,0.222222,,0.333333,,,,
4,,,,,0.25,,,0.25
5,,,,,,0.5,0.25,0.5
6,,,,,,0.25,0.5,0.5
7,,,,,0.25,0.5,0.5,1.0


In [31]:
# Alt, if symmetric
Ar = semiring.any_times(invDn @ A).new()
W1 = semiring.plus_times(Ar @ Ar.T).new()
select(W1, 'gt_thunk', thunk=0.2)

0,1,2,3,4
grblas.Matrix,nvals,nrows,ncols,dtype
grblas.Matrix,22,8,8,FP64

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.333333,0.222222,,0.222222,,,,
1,0.222222,0.333333,,0.222222,,,,
2,,,0.25,,,,,
3,0.222222,0.222222,,0.333333,,,,
4,,,,,0.25,,,0.25
5,,,,,,0.5,0.25,0.5
6,,,,,,0.25,0.5,0.5
7,,,,,0.25,0.5,0.5,1.0


## 5. Graph search

### Greedy (argmin), single node

In [32]:
cur = np.random.randint(A.nrows)
counts = Vector.new(int, size=A.nrows)
for i in range(5):
    print(i, cur)
    v = Vector.from_values([cur], [1], size=A.nrows)
    counts(binary.plus) << v
    neighbors = semiring.any_first(A.T @ v).new()
    cur = argmin(neighbors)  # Can we choose randomly among candidates?
counts

0 5
1 2
2 4
3 0
4 1


0,1,2,3
grblas.Vector,nvals,size,dtype
grblas.Vector,5,8,INT64

Unnamed: 0,0,1,2,3,4,5,6,7
,1,1,1,,1,1,,


### Random neighbor, single node

In [33]:
cur = np.random.randint(A.nrows)
for i in range(5):
    print(i, cur)
    neighbors = A[cur, :].new().to_values()[0]
    cur = np.random.choice(neighbors)
cur

0 2
1 7
2 2
3 5
4 2


6

### Random neighbor with weights, single node

In [34]:
cur = np.random.randint(A.nrows)
for i in range(5):
    print(i, cur)
    indices, weights = semiring.plus_pair(A.T @ v).new().to_values()
    cur = np.random.choice(indices, p=weights / weights.sum())
cur

0 1
1 0
2 0
3 0
4 3


0

### Greedy (argmax), many nodes

In [35]:
def argmaxi(matrix):
    best = matrix.reduce_columns(monoid.max).new()
    D = gb.ss.diag(best)
    masked = semiring.any_eq(matrix @ D).new()

    # masked(mask=masked.V, replace=True) << masked  # Option 1
    masked = select(masked, 'nonzero')  # Option 2

    # Create and use iso-valued vector!
    ones = Vector.new(bool, size=matrix.nrows)
    ones[:] = 1
    return semiring.any_secondi(ones @ masked).new()

In [36]:
# `seeds` is initial set of "interesting" nodes
num_seeds = 2
seeds = set()
while len(seeds) < num_seeds:
    seeds.add(np.random.randint(A.nrows))
cur = Matrix.from_values(sorted(seeds), np.arange(num_seeds), [1] * num_seeds, nrows=A.nrows, ncols=num_seeds)
cur

0,1,2,3,4
grblas.Matrix,nvals,nrows,ncols,dtype
grblas.Matrix,2,8,2,INT64

Unnamed: 0,0,1
0,1.0,
1,,
2,,1.0
3,,
4,,
5,,
6,,
7,,


In [37]:
rows = np.array(sorted(seeds))
cols = np.arange(num_seeds)
values = np.ones(num_seeds)
counts = Vector.new(int, size=A.nrows)
for i in range(5):
    print(i, rows)
    B = Matrix.from_values(rows, cols, values, nrows=A.nrows, ncols=num_seeds)
    counts(binary.plus) << B.reduce_rows(monoid.plus)
    neighbors = semiring.any_first(A.T @ B).new()
    cur = argmaxi(neighbors)  # Can we choose randomly among candidates?
    rows = cur.to_values()[1]
counts

0 [0 2]
1 [4 7]
2 [3 2]
3 [4 7]
4 [3 2]


0,1,2,3
grblas.Vector,nvals,size,dtype
grblas.Vector,5,8,INT64

Unnamed: 0,0,1,2,3,4,5,6,7
,1,,3,2,2,,,2
