# GraphWave

In [None]:
import networkx as nx
import numpy as np
import pandas as pd
import scipy.sparse.linalg
from scipy import sparse
from grblas import *

In [None]:
def isclose(gb_x, np_y, rel_tol=1e-07, abs_tol=0.0):
    if isinstance(gb_x, Vector):
        return gb_x.isclose(Vector.ss.import_full(np_y), rel_tol=rel_tol, abs_tol=abs_tol)
    elif isinstance(gb_x, Matrix):
        if sparse.issparse(np_y):
            y = io.from_scipy_sparse_matrix(np_y)
        else:
            y = Matrix.ss.import_fullr(np_y)
        return gb_x.isclose(y, rel_tol=rel_tol, abs_tol=abs_tol)
    else:  # Scalar
        return gb_x.isclose(np_y, rel_tol=rel_tol, abs_tol=abs_tol)

In [None]:
taus = [0.5, 0.6]
lmax = 1.  # given or computed

In [None]:
edgelist = np.array(pd.read_csv('3200.edgelist', sep='\t', header=None, dtype=int))
W = sparse.csr_matrix((np.arange(edgelist.shape[0]), (edgelist[:,0], edgelist[:,1])))
W = ((W + W.T) > 0).astype(int)

In [None]:
edgelist = pd.read_csv('3200.edgelist', sep='\t', header=None, dtype=int).values
W_gb = Matrix.from_values(edgelist[:, 0], edgelist[:, 1], 1)
assert isclose(W_gb, W)

#### Note that isolated nodes should be dropped!

In [None]:
assert not (W.sum(axis=0) == 0).any()

In [None]:
num_nodes = W.shape[0]

### Choose signal nodes (or use all nodes)

In [None]:
use_all_signals = True
if use_all_signals:
    signal = np.eye(num_nodes)
    signal_gb = ss.diag(Vector.from_values(np.arange(num_nodes), 1))
else:
    n_signals = num_nodes // 2  # pick a number, any number!
    signal_idx = np.sort(np.random.choice(num_nodes, n_signals, replace=False))
    signal = np.zeros((num_nodes, n_signals), dtype=int)
    for i, idx in enumerate(signal_idx):
        signal[idx, i] = 1

    signal_gb = Matrix.from_values(
        signal_idx, np.arange(n_signals), 1, nrows=num_nodes, ncols=n_signals
    )
assert signal_gb.isclose(io.from_numpy(signal))

### Laplacian

In [None]:
dw = np.asarray(W.sum(axis=1)).squeeze()
d = np.power(dw, -0.5)
D = sparse.diags(np.ravel(d), 0).tocsc()
L = sparse.identity(W.shape[0]) - D @ W @ D

In [None]:
dw_gb = W_gb.reduce_rowwise(op.plus).new()
d_gb = op.pow(dw_gb, -0.5).new()
D_gb = ss.diag(d_gb)
L_gb = (-D_gb.T @ W_gb @ D_gb).new()
L_gb(op.plus) << ss.diag(Vector.from_values(np.arange(num_nodes), 1))
assert isclose(dw_gb, dw)
assert isclose(d_gb, d)
assert isclose(D_gb, D)
assert isclose(L_gb, L)

In [None]:
# or use given lmax
lmax = sparse.linalg.eigsh(
    L, k=1, tol=5e-3, ncv=min(L.shape[0], 10), return_eigenvectors=False
)[0]*1.01
lmax

### Filter

In [None]:
order = 30
n_signals = signal.shape[1]
n_features_out = len(taus)

#### Chebyshev coefficients

In [None]:
tau = taus[0]
N = order + 1
a = lmax / 2.
rangeN = np.arange(N)
tmpN = np.pi / N * (rangeN + 0.5)
num = np.cos(tmpN)

In [None]:
c = np.empty(N)
for o in range(N):
    kernel = lambda x: np.exp(-tau * x / lmax)
    c[o] = 2. / N * (kernel(a * num + a) @ np.cos(o * tmpN))
c

In [None]:
c2 = np.empty(N)
y = 2 / N * np.exp(-tau / lmax * (a * num + a))
for o in range(N):
    c2[o] = y @ np.cos(o * tmpN)
np.testing.assert_array_almost_equal(c, c2)

In [None]:
z = np.cos(np.outer(tmpN, rangeN))
c3 = y @ z
np.testing.assert_array_almost_equal(c, c3)

In [None]:
rangeN_gb = Vector.from_values(rangeN, rangeN)
tmpN_gb = op.plus(rangeN_gb, 0.5).new()
tmpN_gb = op.times(tmpN_gb, np.pi / N).new()
num_gb = op.cos(tmpN_gb).new()

y_gb = op.times(num_gb, a).new()
y_gb = op.plus(y_gb, a).new()
y_gb = op.times(y_gb, -tau / lmax).new()
y_gb = op.exp(y_gb).new()
y_gb = op.times(y_gb, 2 / N).new()
z_gb = tmpN_gb.outer(rangeN_gb).new()
z_gb = op.cos(z_gb).new()
c_gb = (y_gb @ z_gb).new()
assert isclose(rangeN_gb, rangeN)
assert isclose(tmpN_gb, tmpN)
assert isclose(num_gb, num)
assert isclose(y_gb, y)
assert isclose(z_gb, z)
assert isclose(c_gb, c, abs_tol=1e-15)

In [None]:
# Here we go
cs = []
z = np.cos(np.outer(tmpN, rangeN))
for tau in taus:
    y = 2 / N * np.exp(-tau / lmax * (a * num + a))
    cs.append(y @ z)
np.testing.assert_array_almost_equal(c, cs[0])

In [None]:
cs_gb = []
for tau in taus:
    y_gb = op.times(num_gb, a).new()
    y_gb = op.plus(y_gb, a).new()
    y_gb = op.times(y_gb, -tau / lmax).new()
    y_gb = op.exp(y_gb).new()
    y_gb = op.times(y_gb, 2 / N).new()
    z_gb = tmpN_gb.outer(rangeN_gb).new()
    z_gb = op.cos(z_gb).new()
    cs_gb.append((y_gb @ z_gb).new())

In [None]:
for i in range(n_features_out):
    assert isclose(cs_gb[i], cs[i], abs_tol=1e-15), i

In [None]:
c = np.atleast_2d(cs)

In [None]:
heat_print = np.zeros((n_features_out, num_nodes, n_signals))
heat_print.shape  # do one feature at a time so we can do it in GraphBLAS

In [None]:
twf_old = signal
twf_cur = 1. / a * (L.dot(signal) - a * signal)
for i in range(n_features_out):
    tmp = 0.5 * c[i, 0] * twf_old + c[i, 1] * twf_cur
    if sparse.issparse(tmp):
        tmp = tmp.todense()
    heat_print[i] = tmp

In [None]:
heat_print_gbs = []

In [None]:
twf_old_gb = signal_gb.dup()
twf_cur_gb = (L_gb @ signal_gb).new()
twf_cur_gb = op.truediv(twf_cur_gb, a).new()

# twf_cur_gb = op.minus(twf_cur_gb | signal_gb, require_monoid=False).new()  # also works
twf_cur_gb = op.plus(twf_cur_gb | -signal_gb).new()

assert twf_old_gb.isclose(io.from_numpy(twf_old))
assert twf_cur_gb.isclose(io.from_numpy(twf_cur))

In [None]:
for i in range(n_features_out):
    tmp_gb = op.plus(
        op.times(twf_old_gb, 0.5 * cs_gb[i][0].value)
        | op.times(twf_cur_gb, cs_gb[i][1].value)
    ).new()
    heat_print_gbs.append(tmp_gb)
assert tmp_gb.isclose(io.from_numpy(tmp))
for i in range(n_features_out):
    assert heat_print_gbs[i].isclose(io.from_numpy(heat_print[i])), i

In [None]:
factor = 2 / a * (L - a * sparse.eye(num_nodes))
for k in range(2, c.shape[1]):
    twf_new = factor.dot(twf_cur) - twf_old

    for i in range(n_features_out):
        tmp = c[i, k] * twf_new
        if sparse.issparse(tmp):
            tmp = tmp.todense()

        heat_print[i] += tmp

    twf_old = twf_cur
    twf_cur = twf_new

In [None]:
# factor_gb = op.minus(  # also works
#     L_gb | ss.diag(Vector.from_values(np.arange(num_nodes), a)),
#     require_monoid=False
# ).new()
factor_gb = op.plus(
    L_gb | -ss.diag(Vector.from_values(np.arange(num_nodes), a))
).new()

factor_gb = op.times(factor_gb, 2 / a).new()
assert isclose(factor_gb, factor)

In [None]:
for k in range(2, c.shape[1]):
    # twf_new_gb = op.minus(  # also works
    #     (factor_gb @ twf_cur_gb) | twf_old_gb,
    #     require_monoid=False
    # ).new()
    twf_new_gb = op.plus(
        (factor_gb @ twf_cur_gb) | -twf_old_gb
    ).new()

    for i in range(n_features_out):
        heat_print_gbs[i](op.plus) << op.times(twf_new_gb, cs_gb[i][k].value)
    twf_old_gb = twf_cur_gb
    twf_cur_gb = twf_new_gb

In [None]:
assert twf_new_gb.isclose(io.from_numpy(twf_new))
assert twf_cur_gb.isclose(io.from_numpy(twf_cur))
assert twf_old_gb.isclose(io.from_numpy(twf_old))
for i in range(n_features_out):
    assert heat_print_gbs[i].isclose(io.from_numpy(heat_print[i]), abs_tol=1e-15), i

### Featurize

First, let's verify the characteristic function and implement it without complex numbers

In [None]:
# `t` here should probably be an input to the GraphWave algorithm
def characteristic_function(s, t=np.arange(0, 100, 2)):
    return (np.exp(complex(0, 1) * s) ** t.reshape(-1, 1)).mean(axis=1)

In [None]:
if use_all_signals:
    np.testing.assert_array_almost_equal(heat_print.transpose((0, 2, 1)), heat_print)
else:
    heat_print = heat_print.transpose((0, 2, 1))

In [None]:
for i, sig in enumerate(heat_print):
    for node_sig in sig:
        break
    break

In [None]:
characteristic_function(node_sig)

In [None]:
s = node_sig
t = np.arange(0, 100, 2)
tt = t.reshape(-1, 1)

In [None]:
# Using complex numbers
tmp = np.exp(complex(0, 1) * s) ** tt
rv = tmp.mean(axis=1)
rv_real = np.real(rv)
rv_imag = np.imag(rv)

In [None]:
# Using real numbers
A = np.cos(s)
B = np.sin(s)
theta = np.arctan2(B, A)
theta2 = theta * tt
rv_real2 = np.cos(theta2).mean(axis=1)
rv_imag2 = np.sin(theta2).mean(axis=1)

In [None]:
np.testing.assert_array_almost_equal(characteristic_function(node_sig), rv)
np.testing.assert_array_almost_equal(rv_real, rv_real2)
np.testing.assert_array_almost_equal(rv_imag, rv_imag2)

In [None]:
s_gb = heat_print_gbs[0][:, 0].new()
t_gb = Vector.from_values(np.arange(t.size), t)
A_gb = op.cos(s_gb).new()
B_gb = op.sin(s_gb).new()
theta_gb = op.atan2(B_gb & A_gb).new()
theta2_gb = t_gb.outer(theta_gb).new()

rv_real_gb = op.cos(theta2_gb).new()
rv_real_gb(~rv_real_gb.S) << 1
rv_real_gb = rv_real_gb.reduce_rows(op.plus).new()
rv_real_gb = op.truediv(rv_real_gb, num_nodes).new()

rv_imag_gb = op.sin(theta2_gb).new()
rv_imag_gb = rv_imag_gb.reduce_rows(op.plus).new()
rv_imag_gb = op.truediv(rv_imag_gb, num_nodes).new()

assert s_gb.isclose(io.from_numpy(s), abs_tol=1e-15)
assert isclose(t_gb, t)
assert theta_gb.isclose(io.from_numpy(theta), abs_tol=1e-15)
assert isclose(rv_real_gb, rv_real, abs_tol=1e-15)
assert isclose(rv_imag_gb, rv_imag, abs_tol=1e-15)

In [None]:
def characteristic_function_gb(s_gb, t=np.arange(0, 100, 2)):
    t_gb = Vector.from_values(np.arange(t.size), t)
    A_gb = op.cos(s_gb).new()
    B_gb = op.sin(s_gb).new()
    theta_gb = op.atan2(B_gb & A_gb).new()
    theta2_gb = t_gb.outer(theta_gb).new()

    rv_real_gb = op.cos(theta2_gb).new()
    rv_real_gb(~rv_real_gb.S) << 1
    rv_real_gb = rv_real_gb.reduce_rows(op.plus).new()
    rv_real_gb = op.truediv(rv_real_gb, num_nodes).new()

    rv_imag_gb = op.sin(theta2_gb).new()
    rv_imag_gb = rv_imag_gb.reduce_rows(op.plus).new()
    rv_imag_gb = op.truediv(rv_imag_gb, num_nodes).new()
    return rv_real_gb, rv_imag_gb

rv_real_gb, rv_imag_gb = characteristic_function_gb(heat_print_gbs[0][:, 0].new())
assert isclose(rv_real_gb, rv_real, abs_tol=1e-15)
assert isclose(rv_imag_gb, rv_imag, abs_tol=1e-15)

#### Featurize, for real this time

In [None]:
feats = []
for i, sig in enumerate(heat_print):
    sig_feats = []
    for node_sig in sig:
        node_feats = characteristic_function(node_sig)
        node_feats = np.column_stack([node_feats.real, node_feats.imag]).reshape(-1)
        sig_feats.append(node_feats)
    feats.append(np.vstack(sig_feats))
result = np.hstack(feats)

In [None]:
feats_gb = []
for sig_gb in heat_print_gbs:
    if not use_all_signals:
        sig_gb = sig_gb.T.new()
    sig_feats_gb = []
    for i in range(sig_gb.nrows):
        node_sig_gb = sig_gb[i, :].new()
        real_gb, imag_gb = characteristic_function_gb(node_sig_gb)
        # Vector concat or zip may be nice
        # We don't need to zip here.  We could pack into `result_gb` however we wish.
        # I do it this way to match the numpy code.
        node_feats_gb = Vector.new(float, size=2*real_gb.size)
        node_feats_gb[::2] = real_gb
        node_feats_gb[1::2] = imag_gb
        sig_feats_gb.append(node_feats_gb)
    feats_gb.append(sig_feats_gb)

In [None]:
# Vector concat to Matrix may also be nice (see vstack and hstack)
result_gb = Matrix.new(float, nrows=n_signals, ncols=n_features_out * node_feats_gb.size)
for j, sig_feats_gb in enumerate(feats_gb):
    for i, node_feats_gb in enumerate(sig_feats_gb):
        result_gb[i, j*node_feats_gb.size:(j+1)*node_feats_gb.size] = node_feats_gb
assert isclose(result_gb, result)