In [7]:
import numba
import numpy as np
import pandas as pd
import networkx as nx
import scipy
from scipy.sparse import csr_matrix, coo_matrix

In [8]:
def vrepr(v):
    nrows, ncols = v.shape
    assert nrows == 1
    df = pd.DataFrame(index=range(ncols), columns=[""])
    coo = v.tocoo()
    for i, val in zip(coo.col, coo.data):
        df.iloc[i] = val
    return df.where(pd.notnull(df), "")


def hrepr(v):
    return vrepr(v).T


def mrepr(m):
    nrows, ncols = m.shape
    df = pd.DataFrame(columns=range(ncols), index=range(nrows))
    coo = m.tocoo()
    for i, j, val in zip(coo.row, coo.col, coo.data):
        df.iloc[i, j] = val
    return df.where(pd.notnull(df), "")


def draw(m):
    g = nx.DiGraph()
    coo = m.tocoo()
    for row, col, val in zip(coo.row, coo.col, coo.data):
        g.add_edge(row, col, weight=val)
    pos = nx.spring_layout(g)
    edge_labels = {(i, j): d["weight"] for i, j, d in g.edges(data=True)}
    nx.draw_networkx(g, pos, node_color="red", node_size=500)
    nx.draw_networkx_edge_labels(g, pos, edge_labels=edge_labels)

In [None]:
data = [
    [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
    [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
    [3, 2, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4],
]

In [None]:
rows, cols, weights = data
m = coo_matrix((weights, (rows, cols)), shape=(7, 7))
m

In [None]:
m = m.tocsr()

In [None]:
m.data

In [None]:
m.nnz

In [None]:
m.shape

In [None]:
m.toarray()

In [None]:
mrepr(m)

In [None]:
draw(m)

## How to update values without changing nnz

In [None]:
m2 = m.copy()

In [None]:
m2.data = np.array([1] * len(m2.data))

In [None]:
m2.toarray()

In [None]:
(m * m2).toarray()  # Appears to do matrix multiplication

In [None]:
m.multiply(m2).toarray()

In [None]:
m.toarray()

In [None]:
m.data

In [None]:
m.tocoo().col

In [None]:
m.shape

## Sparse vector?

Simulate by using a (1 x n) sparse matrix

In [None]:
v = coo_matrix(([1], ([0], [1])), shape=(1, 7))
v = v.tocsr()

In [None]:
v.toarray()

In [None]:
hrepr(v)

## Attributes of csr_matrix object

Because scipy.sparse has no concept of semirings, we will need to implement lots of things in numba.

To work efficiently, we need access to the underlying array objects.

In [None]:
m.data

In [None]:
m.dtype

In [None]:
m.format

In [None]:
m.indices

In [None]:
m.indptr

In [None]:
mrepr(m)

In [None]:
m.nnz

In [None]:
m.nonzero()

In [None]:
m.shape

In [None]:
type(m.indptr)

### Can we update sparse matrices without changing the shape?

In [None]:
m.data

In [None]:
m.data = np.array([2, 3, 8, 4, 1, 3, 3, 7, 1, 5, 7, 3, 2], dtype=np.int64)

In [None]:
m.data

In [None]:
m.indices

In [None]:
m.indices = np.array([1, 3, 4, 6, 5, 0, 2, 5, 2, 2, 3, 4, 5], dtype=np.int32)

In [None]:
m.indices

In [None]:
m.indptr

In [None]:
m.indptr = np.array([0, 2, 4, 5, 7, 8, 9, 13], dtype=np.int32)

In [None]:
m.indptr

In [None]:
m.toarray()

In [None]:
mrepr(m)

In [None]:
m.nnz

In [None]:
m.shape

### Use Numba to perform matrix multiplication with a semiring

In [None]:
@numba.njit
def numba_plus(x, y):
    return x + y


@numba.njit
def numba_times(x, y):
    return x * y


@numba.njit
def numba_min(x, y):
    return min(x, y)


@numba.njit
def numba_max(x, y):
    return max(x, y)


@numba.njit
def numba_bnot(x):
    return ~x

In [None]:
@numba.njit
def mxm(C, A, B, semiring):
    cr, cc = C.shape
    ar, ac = A.shape
    br, bc = B.shape
    if cr != ar:
        return -1
    if cc != bc:
        return -1
    if ac != br:
        return -1
    plus, times, identity = semiring
    for i in range(cr):
        for j in range(cc):
            val = identity
            for k in range(ac):
                val = plus(val, times(A[i, k], B[k, j]))
            C[i, j] = val
    return 0

In [None]:
A = np.array([[1, 2, 3], [4, 5, 6]])

In [None]:
B = np.array([[1], [5], [9]])

In [None]:
A

In [None]:
B

In [None]:
A.dot(B)

In [None]:
C = np.array([[1], [1]])

In [None]:
mxm(C, A, B, (numba_plus, numba_times, 0))

In [None]:
C

In [None]:
mxm(C, A, B, (numba_max, numba_plus, 0))
C

In [None]:
mxm(C, A, B, (numba_min, numba_plus, np.iinfo(A.dtype).max))
C

## Can we use sparse matrices?

In [None]:
v = coo_matrix(([0], ([0], [1])), shape=(1, 7)).tocsr()
hrepr(v)

In [None]:
rows, cols, weights = data
m = coo_matrix((weights, (rows, cols)), shape=(7, 7)).tocsr()
mrepr(m)

In [None]:
C = coo_matrix(([], ([], [])), shape=(1, 7), dtype=np.int64).tocsr()

In [None]:
hrepr(C)

In [None]:
semiring = (numba_min, numba_plus, np.iinfo(A.dtype).max)
mxm(C, v, m, semiring)

In [17]:
def mxm(C, A, B, semiring):
    cr, cc = C.shape
    ar, ac = A.shape
    br, bc = B.shape
    if cr != ar:
        return -1
    if cc != bc:
        return -1
    if ac != br:
        return -1
    plus, times, identity = semiring
    b = B.tocsc()
    d, i, ip = _mxm(
        A.data, A.indices, A.indptr, b.data, b.indices, b.indptr, plus, times, identity, C.dtype
    )
    C.data = d
    C.indices = i
    C.indptr = ip
    return 0


@numba.njit
def _mxm(a_data, a_indices, a_indptr, b_data, b_indices, b_indptr, plus, times, identity, dtype):
    # Final array size is unknown, so we give ourselves room and then adjust on the fly
    tmp_output_size = a_data.size * 2
    data = np.empty((tmp_output_size,), dtype=dtype)
    indices = np.empty((tmp_output_size,), dtype=a_indices.dtype)
    indptr = np.empty((a_indptr.size,), dtype=a_indptr.dtype)
    output_counter = 0
    for iptr in range(a_indptr.size - 1):
        indptr[iptr] = output_counter
        for jptr in range(b_indptr.size - 1):
            a_counter = a_indptr[iptr]
            a_stop = a_indptr[iptr + 1]
            b_counter = b_indptr[jptr]
            b_stop = b_indptr[jptr + 1]
            val = identity
            nonempty = False
            while a_counter < a_stop and b_counter < b_stop:
                a_k = a_indices[a_counter]
                b_k = b_indices[b_counter]
                if a_k == b_k:
                    val = plus(val, times(a_data[a_counter], b_data[b_counter]))
                    nonempty = True
                    a_counter += 1
                    b_counter += 1
                elif a_k < b_k:
                    a_counter += 1
                else:
                    b_counter += 1
            if nonempty:
                if output_counter >= tmp_output_size:
                    # We filled up the allocated space; copy existing data to a larger array
                    tmp_output_size *= 2
                    new_data = np.empty((tmp_output_size,), dtype=data.dtype)
                    new_indices = np.empty((tmp_output_size,), dtype=indices.dtype)
                    new_data[:output_counter] = data[:output_counter]
                    new_indices[:output_counter] = indices[:output_counter]
                    data = new_data
                    indices = new_indices
                data[output_counter] = val
                indices[output_counter] = jptr
                output_counter += 1
    # Add final entry to indptr (should indicate nnz in the output)
    nnz = output_counter
    indptr[iptr + 1] = nnz
    # Trim output arrays
    data = data[:nnz]
    indices = indices[:nnz]

    return (data, indices, indptr)

In [None]:
m.data

In [None]:
m.indices

In [None]:
m.indptr

In [None]:
mrepr(m)

In [None]:
v

In [None]:
m

In [None]:
# Hmm, this is problematic. Apparently scipy.sparse considers empty to be zero when doing dot product.
# It should have two non-empty elements, both of which have a value of 0.
v.dot(m)

In [10]:
A = coo_matrix(([1, 3, 5, 6], ([0, 0, 1, 1], [0, 2, 1, 2])), shape=(2, 3)).tocsr()
mrepr(A)

Unnamed: 0,0,1,2
0,1.0,,3
1,,5.0,6


In [11]:
B = coo_matrix(([1, 5, 7], ([0, 1, 2], [0, 0, 1])), shape=(3, 2)).tocsr()
b = B.tocsc()
mrepr(B)

Unnamed: 0,0,1
0,1.0,
1,5.0,
2,,7.0


In [12]:
C = coo_matrix(([], ([], [])), shape=(2, 2), dtype=np.int64).tocsr()
mrepr(C)

Unnamed: 0,0,1
0,,
1,,


In [18]:
mxm(C, A, B, (plus, times, 0))

0

In [19]:
A.data

array([1, 3, 5, 6], dtype=int64)

In [20]:
A.indices

array([0, 2, 1, 2], dtype=int32)

In [21]:
A.indptr

array([0, 2, 4], dtype=int32)

In [22]:
b.data

array([1, 5, 7], dtype=int64)

In [23]:
b.indices

array([0, 1, 2], dtype=int32)

In [24]:
b.indptr

array([0, 2, 3], dtype=int32)

In [25]:
C.data

array([ 1, 21, 25, 42])

In [26]:
C.indices

array([0, 1, 0, 1], dtype=int32)

In [27]:
C.indptr

array([0, 2, 4], dtype=int32)

In [None]:
mrepr(C)

In [None]:
np.empty((1,), np.uint)

In [None]:
_318[0] = 1

In [None]:
_318[0]

In [None]:
type(np.uint) is type

In [None]:
type(float) is type

In [None]:
class Matrix:
    pass

In [None]:
type(Matrix) is type

In [None]:
np.dtype(Matrix)

In [None]:
_333 == object

In [None]:
np.dtype(np.bool)

In [None]:
_340 == object

In [None]:
x = scipy.sparse.csr_matrix((4, 3), dtype=np.int32)
y = csr_matrix(([7], ([0], [0])), shape=(4, 3), dtype=np.int32)
z = csr_matrix(y)

In [None]:
z is y

In [None]:
z.data is y.data

In [None]:
z[0, 1] = 12

In [None]:
z.toarray()

In [None]:
y.toarray()

In [None]:
z.resize((12, 1))

In [None]:
z.toarray()

In [None]:
not True

In [None]:
not (True ^ True)

In [None]:
~(5 ^ 2)

In [None]:
5 // 2

In [14]:
from numba import types as nt
from numba import njit

# Most operations with semirings will require numba
# Even if an equivalent function exists in numpy or scipy, numba
# doesn't seem to like it as much as a jit'd function doing the same thing

_unary_bool = [nt.boolean(nt.boolean)]
_unary_int = [
    nt.uint8(nt.uint8),
    nt.int8(nt.int8),
    nt.uint16(nt.uint16),
    nt.int16(nt.int16),
    nt.uint32(nt.uint32),
    nt.int32(nt.int32),
    nt.uint64(nt.uint64),
    nt.int64(nt.int64),
]
_unary_float = [nt.float32(nt.float32), nt.float64(nt.float64)]
_unary_all = _unary_bool + _unary_int + _unary_float

_binary_bool = [nt.boolean(nt.boolean, nt.boolean)]
_binary_int = [
    nt.uint8(nt.uint8, nt.uint8),
    nt.int8(nt.int8, nt.int8),
    nt.uint16(nt.uint16, nt.uint16),
    nt.int16(nt.int16, nt.int16),
    nt.uint32(nt.uint32, nt.uint32),
    nt.int32(nt.int32, nt.int32),
    nt.uint64(nt.uint64, nt.uint64),
    nt.int64(nt.int64, nt.int64),
]
_binary_float = [nt.float32(nt.float32, nt.float32), nt.float64(nt.float64, nt.float64)]
_binary_all = _binary_bool + _binary_int + _binary_float

_binary_int_to_bool = [
    nt.boolean(nt.uint8, nt.uint8),
    nt.boolean(nt.int8, nt.int8),
    nt.boolean(nt.uint16, nt.uint16),
    nt.boolean(nt.int16, nt.int16),
    nt.boolean(nt.uint32, nt.uint32),
    nt.boolean(nt.int32, nt.int32),
    nt.boolean(nt.uint64, nt.uint64),
    nt.boolean(nt.int64, nt.int64),
]
_binary_float_to_bool = [nt.boolean(nt.float32, nt.float32), nt.boolean(nt.float64, nt.float64)]
_binary_all_to_bool = _binary_bool + _binary_int_to_bool + _binary_float_to_bool


#################
# Unary Operators
#################


@njit(_unary_all)
def identity(x):
    """Identity"""
    return x


@njit(_unary_all)
def abs(x):
    """Absolute value"""
    return abs(x)


@njit(_unary_int + _unary_float)
def ainv(x):
    """Additive inverse"""
    return -x


@njit(_unary_float)
def minv(x):
    """Multiplicative inverse"""
    return 1 / x


@njit(_unary_bool)
def lnot(x):
    """Logical inverse"""
    return not x


@njit(_unary_int)
def bnot(x):
    """Bitwise complement"""
    return ~x


##################
# Binary Operators
##################


@njit(_binary_bool)
def lor(x, y):
    """Logical OR"""
    return x | y


@njit(_binary_bool)
def land(x, y):
    """Logical AND"""
    return x & y


@njit(_binary_bool)
def lxor(x, y):
    """Logical XOR"""
    return x ^ y


@njit(_binary_bool)
def lxnor(x, y):
    """Logical XNOR"""
    return not (x ^ y)


@njit(_binary_int)
def bor(x, y):
    """Bitwise OR"""
    return x | y


@njit(_binary_int)
def band(x, y):
    """Bitwise AND"""
    return x & y


@njit(_binary_int)
def bxor(x, y):
    """Bitwise XOR"""
    return x ^ y


@njit(_binary_int)
def bxnor(x, y):
    """Bitwise XNOR"""
    return ~(x ^ y)


@njit(_binary_all_to_bool)
def eq(x, y):
    """Equal"""
    return x == y


@njit(_binary_all_to_bool)
def ne(x, y):
    """Not equal"""
    return x != y


@njit(_binary_all_to_bool)
def gt(x, y):
    """Greater than"""
    return x > y


@njit(_binary_all_to_bool)
def lt(x, y):
    """Less than"""
    return x < y


@njit(_binary_all_to_bool)
def ge(x, y):
    """Greater than or equal"""
    return x >= y


@njit(_binary_all_to_bool)
def le(x, y):
    """Less than or equal"""
    return x <= y


@njit(_binary_all)
def first(x, y):
    """First argument"""
    return x


@njit(_binary_all)
def second(x, y):
    """Second argument"""
    return y


@njit(_binary_int + _binary_float)
def min(x, y):
    """Minimum"""
    return min(x, y)


@njit(_binary_int + _binary_float)
def max(x, y):
    """Maximum"""
    return max(x, y)


@njit(_binary_int + _binary_float)
def plus(x, y):
    """Addition"""
    return x + y


@njit(_binary_int + _binary_float)
def minus(x, y):
    """Subtraction"""
    return x - y


@njit(_binary_int + _binary_float)
def times(x, y):
    """Multiplication"""
    return x * y


@njit(_binary_int)
def floordiv(x, y):
    """Integer division (ex. 5/4=1)"""
    return x // y


@njit(_binary_float)
def truediv(x, y):
    """Float division (ex. 5/4=1.25)"""
    return x / y


@njit(_binary_int + _binary_float)
def div(x, y):
    return x / y

In [1]:
cd ..

/Users/jkitchen/Projects/HIVE/grblas/grblas/backends


In [2]:
import python as py

In [5]:
py.GrB_BinaryOp.GrB_LXOR(False, False)

False