In [None]:
import ctypes
import numbers
import math
import mlir
import numpy as np
from mlir import ir
from mlir import passmanager
from mlir import execution_engine
from mlir import runtime
from mlir import dialects

from mlir.dialects import arith
from mlir.dialects import bufferization
from mlir.dialects import func
from mlir.dialects import linalg
from mlir.dialects import sparse_tensor
from mlir.dialects import tensor
from mlir.dialects import scf
from mlir.dialects import memref

from mlir.dialects.sparse_tensor import DimLevelType

In [None]:
from ctypes.util import find_library
SHARED_LIB = find_library("mlir_c_runner_utils")
c_lib = ctypes.CDLL(SHARED_LIB)

_support_types = [(np.int8, c_lib.convertToMLIRSparseTensorI8,
                  c_lib.convertFromMLIRSparseTensorI8),
                 (np.int16, c_lib.convertToMLIRSparseTensorI16,
                  c_lib.convertFromMLIRSparseTensorI16),
                 (np.int32, c_lib.convertToMLIRSparseTensorI32,
                  c_lib.convertFromMLIRSparseTensorI32),
                 (np.int64, c_lib.convertToMLIRSparseTensorI64,
                  c_lib.convertFromMLIRSparseTensorI64),
                 (np.float16, c_lib.convertToMLIRSparseTensorF16,
                  c_lib.convertFromMLIRSparseTensorF16),
                 (np.float32, c_lib.convertToMLIRSparseTensorF32,
                  c_lib.convertFromMLIRSparseTensorF32),
                 (np.float64, c_lib.convertToMLIRSparseTensorF64,
                  c_lib.convertFromMLIRSparseTensorF64),
                 (np.complex64, c_lib.convertToMLIRSparseTensorC32,
                  c_lib.convertFromMLIRSparseTensorC32),
                 (np.complex128, c_lib.convertToMLIRSparseTensorC64,
                  c_lib.convertFromMLIRSparseTensorC64)]

types_to_funcs = {}
for typ, to_, from_ in _support_types:
    to_.restype = ctypes.c_void_p
    from_.restype = ctypes.c_void_p
    types_to_funcs[typ] = (to_, from_)

In [None]:
# LLVMPTR = ctypes.POINTER(ctypes.c_int8)
LLVMPTR = ctypes.POINTER(ctypes.c_void_p)

def to_sparse_tensor(indices, values, shape, sparsity=None, perm=None):
    rank = len(shape)
    np_indices = np.array(indices, dtype=np.uint64)
    assert isinstance(values, np.ndarray)
    np_shape = np.array(shape, dtype=np.uint64)
    if sparsity is None:
        if rank == 2:
            sparsity = [DimLevelType.dense, DimLevelType.compressed]
        elif rank == 1:
            sparsity = [DimLevelType.compressed]
        else:
            sparsity = []
    np_sparse = np.array(sparsity, dtype=np.uint8)
    if perm is None:
        np_perm = np.arange(rank, dtype=np.uint64)
    else:
        np_perm = np.array(perm, dtype=np.uint64)
    el_type = values.dtype
    rank = ctypes.c_ulonglong(rank)
    nse = ctypes.c_ulonglong(len(values))
    shape = np_shape.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
    values = values.ctypes.data_as(ctypes.POINTER(runtime.as_ctype(el_type)))
    indices = np_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
    perm = np_perm.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
    sparse = np_sparse.ctypes.data_as(ctypes.POINTER(ctypes.c_uint8))
    convert_to = types_to_funcs[el_type.type][0]
    ptr = convert_to(rank, nse, shape, values, indices, perm, sparse)
    return ctypes.pointer(ctypes.cast(ptr, LLVMPTR))

In [None]:
def from_sparse_tensor(tensor_ptr, dtype):
    convert_from = types_to_funcs[dtype][1]
    rank = ctypes.c_ulonglong(0)
    nse = ctypes.c_ulonglong(0)
    shape = ctypes.POINTER(ctypes.c_ulonglong)()
    values = ctypes.POINTER(np.ctypeslib.as_ctypes_type(np.dtype(dtype)))()
    indices = ctypes.POINTER(ctypes.c_ulonglong)()
    convert_from(tensor_ptr, ctypes.byref(rank), ctypes.byref(nse),
                 ctypes.byref(shape), ctypes.byref(values), ctypes.byref(indices))
    shape = np.ctypeslib.as_array(shape, shape=[rank.value])
    values = np.ctypeslib.as_array(values, shape=[nse.value])
    indices = np.ctypeslib.as_array(indices, shape=[nse.value, rank.value])
    return indices, values, shape, rank.value, nse.value

In [None]:
x = to_sparse_tensor([[0, 0], [0, 2], [0, 4], [1, 1], [2, 3], [3, 0], [3, 1]], np.array([1., 3., 6., 2., 4., 3., 4.]),
                     shape=[4, 5], perm=[0, 1])

In [None]:
from_sparse_tensor(x[0], np.float64)

In [None]:
y = to_sparse_tensor([[0, 0], [0, 2], [0, 4], [1, 1], [2, 3], [3, 0], [3, 1]], np.array([1., 3., 6., 2., 4., 3., 4.]),
                     shape=[4, 5], perm=[1, 0])

In [None]:
from_sparse_tensor(y[0], np.float64)

### Add hardcoded ints

In [None]:
with ir.Context(), ir.Location.unknown():
    module = ir.Module.create()
    with ir.InsertionPoint(module.body):
        i32 = ir.IntegerType.get_signless(32)
        @func.FuncOp.from_py_func()
        def main():
            one = arith.ConstantOp(i32, 1)
            two = arith.ConstantOp(i32, 2)
            total = arith.AddIOp(one, two)
            return total
        main.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
    # It is okay to manually specify passes
    pm = passmanager.PassManager.parse("builtin.module(convert-arith-to-llvm,convert-func-to-llvm)")
print(module)
pm = pm.run(module)
print(module)
engine = execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[SHARED_LIB])
arg_pointers = [
    ctypes.pointer(ctypes.c_long(0)),
]
engine.invoke("main", *arg_pointers)
print('-'*30)
print(f"result = {arg_pointers[0].contents.value}")

**Notes:**

Values (like `total`) have:

- total.attributes
- total.regions
- total.operands
- total.results

### Multiply input floats

In [None]:
with ir.Context(), ir.Location.unknown():
    module = ir.Module.create()
    with ir.InsertionPoint(module.body):
        f64 = ir.F64Type.get()
        @func.FuncOp.from_py_func(f64, f64)
        def main(x, y):
            product = arith.MulFOp(x, y)
            return product
        main.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
    pm = passmanager.PassManager.parse("builtin.module(sparse-compiler)")  # pre-built pass pipeline
print(module)
pm = pm.run(module)
engine = execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[SHARED_LIB])

def mul2(x, y):
    arg_pointers = [
        ctypes.byref(ctypes.c_double(x)),
        ctypes.byref(ctypes.c_double(y)),
        ctypes.pointer(ctypes.c_double(0)),  # result usually goes at the end
    ]
    engine.invoke("main", *arg_pointers)
    return arg_pointers[-1].contents.value

result = mul2(-1.5, 21.25)
print('-'*30)
print(f"{result=}")

### Multiply sparse tensor type by 3

In [None]:
with ir.Context(), ir.Location.unknown():
    module = ir.Module.create()
    with ir.InsertionPoint(module.body):
        f64 = ir.F64Type.get()
        index = ir.IndexType.get()
        sp_encoding = sparse_tensor.EncodingAttr.get(
            [sparse_tensor.DimLevelType.compressed, sparse_tensor.DimLevelType.compressed],
            ir.AffineMap.get_permutation([0, 1]),
            None,
            0,
            0
        )
        dynamic = ir.ShapedType.get_dynamic_size()
        rtt = ir.RankedTensorType.get([dynamic, dynamic], f64, sp_encoding)
        @func.FuncOp.from_py_func(rtt)
        def main(x):
            ci3 = arith.ConstantOp(f64, 3.0)
            c0 = arith.ConstantOp(index, 0)
            c1 = arith.ConstantOp(index, 1)
            d0 = tensor.DimOp(x, c0)
            d1 = tensor.DimOp(x, c1)
            vv = bufferization.AllocTensorOp(rtt, [d0.result, d1.result], None, None, False)
            generic_op = linalg.GenericOp(
                [rtt],
                [x],
                [vv],
                ir.ArrayAttr.get([ir.AffineMapAttr.get(ir.AffineMap.get_permutation([0, 1])),
                                  ir.AffineMapAttr.get(ir.AffineMap.get_permutation([0, 1]))]),
                #ir.ArrayAttr.get([ir.StringAttr.get("parallel")]*2),
                ir.ArrayAttr.get([ir.Attribute.parse('#linalg.iterator_type<parallel>')]*2),
            )
            # Construct the linalg.generic body
            block = generic_op.regions[0].blocks.append(f64, f64)
            with ir.InsertionPoint(block):
                a, b = block.arguments
                res = arith.MulFOp(a, ci3)
                linalg.YieldOp([res])
            return generic_op.result
        main.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
    #pm = passmanager.PassManager.parse("sparse-compiler")
    pm = passmanager.PassManager.parse("builtin.module(sparse-compiler{reassociate-fp-reductions=1 enable-index-optimizations=1})")
#print(module)
pm = pm.run(module)
#print('='*50)
#print(module)
engine3f = execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[SHARED_LIB])

In [None]:
mem_in = to_sparse_tensor([[0, 0], [1, 1], [7, 5]], np.array([11., 22., -4.], dtype=np.float64),
                          [10, 20], sparsity=[DimLevelType.compressed]*2)

out = ctypes.c_char(0)
mem_out = ctypes.pointer(ctypes.pointer(out))

arg_pointers = [
    mem_in,
    mem_out,
]
engine3f.invoke("main", *arg_pointers)

In [None]:
indices, values, shape, rank, nse = from_sparse_tensor(mem_out[0], np.float64)

In [None]:
print(f"{indices=}")
print(f"{values=}")
print(f"{shape=}")
print(f"{rank=}")
print(f"{nse=}")

#### Int32 Version

In [None]:
with ir.Context(), ir.Location.unknown():
    module = ir.Module.create()
    with ir.InsertionPoint(module.body):
        i32 = ir.IntegerType.get_signless(32)
        index = ir.IndexType.get()
        sp_encoding = sparse_tensor.EncodingAttr.get(
            [sparse_tensor.DimLevelType.compressed, sparse_tensor.DimLevelType.compressed],
            ir.AffineMap.get_permutation([0, 1]),
            None,
            0,
            0
        )
        dynamic = ir.ShapedType.get_dynamic_size()
        rtt = ir.RankedTensorType.get((dynamic, dynamic), i32, sp_encoding)
        @func.FuncOp.from_py_func(rtt)
        def main(x):
            ci3 = arith.ConstantOp(i32, 3)
            c0 = arith.ConstantOp(index, 0)
            c1 = arith.ConstantOp(index, 1)
            d0 = tensor.DimOp(x, c0)
            d1 = tensor.DimOp(x, c1)
            vv = bufferization.AllocTensorOp(rtt, [d0.result, d1.result], None, None, False)
            generic_op = linalg.GenericOp(
                [rtt],
                [x],
                [vv],
                ir.ArrayAttr.get([ir.AffineMapAttr.get(ir.AffineMap.get(2, 0, [ir.AffineDimExpr.get(0), ir.AffineDimExpr.get(1)])),
                                  ir.AffineMapAttr.get(ir.AffineMap.get_permutation([0, 1]))]),
                ir.ArrayAttr.get([ir.Attribute.parse('#linalg.iterator_type<parallel>')]*2),
            )
            # Construct the linalg.generic body
            block = generic_op.regions[0].blocks.append(i32, i32)
            with ir.InsertionPoint(block):
                a, b = block.arguments
                res = arith.MulIOp(a, ci3)
                linalg.YieldOp([res])
            return generic_op.result
        main.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
    #pm = passmanager.PassManager.parse("sparse-compiler")
    pm = passmanager.PassManager.parse("builtin.module(sparse-compiler)")
    print(module)
    pm = pm.run(module)
    #print(module)
    engine = execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[SHARED_LIB])

In [None]:
mem_in = to_sparse_tensor([[0, 0], [1, 1], [7, 5]], np.array([11, 22, -4], dtype=np.int32),
                          [10, 20], sparsity=[DimLevelType.compressed]*2)

out = ctypes.c_char(0)
mem_out = ctypes.pointer(ctypes.pointer(out))

arg_pointers = [
    mem_in,
    mem_out,
]
engine.invoke("main", *arg_pointers)

In [None]:
indices, values, shape, rank, nse = from_sparse_tensor(mem_out[0], np.int32)

In [None]:
print(f"{indices=}")
print(f"{values=}")
print(f"{shape=}")
print(f"{rank=}")
print(f"{nse=}")

#### eWiseMult

In [None]:
with ir.Context(), ir.Location.unknown():
    module = ir.Module.create()
    with ir.InsertionPoint(module.body):
        f64 = ir.F64Type.get()
        i1 = ir.IntegerType.get_signless(1)
        i8 = ir.IntegerType.get_signless(8)
        i64 = ir.IntegerType.get_signless(64)
        index = ir.IndexType.get()
        sp_encoding = sparse_tensor.EncodingAttr.get(
            [sparse_tensor.DimLevelType.compressed, sparse_tensor.DimLevelType.compressed],
            ir.AffineMap.get_permutation([0, 1]),
            None,
            0,
            0
        )
        dynamic = ir.ShapedType.get_dynamic_size()
        rtt = ir.RankedTensorType.get([dynamic, dynamic], f64, sp_encoding)
        rtt_out = ir.RankedTensorType.get([dynamic, dynamic], i8, sp_encoding)
        @func.FuncOp.from_py_func(rtt, rtt)
        def main(x, y):
            zero = arith.ConstantOp(f64, 0.0)
            c0 = arith.ConstantOp(index, 0)
            c1 = arith.ConstantOp(index, 1)
            d0 = tensor.DimOp(x, c0)
            d1 = tensor.DimOp(x, c1)
            vv = bufferization.AllocTensorOp(rtt_out, [d0.result, d1.result], None, None, False)
            generic_op = linalg.GenericOp(
                [rtt_out],
                [x, y],
                [vv],
                ir.ArrayAttr.get([ir.AffineMapAttr.get(ir.AffineMap.get_permutation([0, 1])),
                                  ir.AffineMapAttr.get(ir.AffineMap.get_permutation([0, 1])),
                                  ir.AffineMapAttr.get(ir.AffineMap.get_permutation([0, 1]))]),
                ir.ArrayAttr.get([ir.Attribute.parse('#linalg.iterator_type<parallel>')]*2),
            )
            # Construct the linalg.generic body
            block = generic_op.regions[0].blocks.append(f64, f64, i8)
            with ir.InsertionPoint(block):
                a, b, o = block.arguments
                #res = arith.MulFOp(a, b)
                res = sparse_tensor.BinaryOp(i8, a, b)
                overlap = res.regions[0].blocks.append(f64, f64)
                with ir.InsertionPoint(overlap):
                    arg0, arg1 = overlap.arguments
                    #ret = arith.AddFOp(arg0, arg1)
                    cmp = arith.CmpFOp(ir.IntegerAttr.get(i64, 2), arg0, arg1)
                    ret = arith.ExtUIOp(i8, cmp)
                    sparse_tensor.YieldOp(result=ret)
                left = res.regions[1].blocks.append(f64)
                with ir.InsertionPoint(left):
                    arg0, = left.arguments
                    cmp = arith.CmpFOp(ir.IntegerAttr.get(i64, 6), arg0, zero)
                    ret = arith.ExtUIOp(i8, cmp)
                    sparse_tensor.YieldOp(result=ret)
                right = res.regions[2].blocks.append(f64)
                with ir.InsertionPoint(right):
                    arg0, = right.arguments
                    cmp = arith.CmpFOp(ir.IntegerAttr.get(i64, 6), arg0, zero)
                    ret = arith.ExtUIOp(i8, cmp)
                    sparse_tensor.YieldOp(result=ret)
                linalg.YieldOp([res])
            return generic_op.result
        main.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
    #pm = passmanager.PassManager.parse("sparse-compiler")
    pm = passmanager.PassManager.parse("builtin.module(sparse-compiler{reassociate-fp-reductions=1 enable-index-optimizations=1})")
print(module)
pm = pm.run(module)
#print('='*50)
#print(module)
engine = execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[SHARED_LIB])

In [None]:
mem_a = to_sparse_tensor([[0, 0], [1, 1], [7, 5]], np.array([11., 0., -4.], dtype=np.float64),
                         [10, 20], sparsity=[DimLevelType.compressed, DimLevelType.compressed])
mem_b = to_sparse_tensor([[0, 0], [1, 2], [7, 5]], np.array([4.1, 2.3, 10.0], dtype=np.float64),
                         [10, 20], sparsity=[DimLevelType.compressed, DimLevelType.compressed])

out = ctypes.c_char(0)
mem_out = ctypes.pointer(ctypes.pointer(out))

arg_pointers = [
    mem_a,
    mem_b,
    mem_out,
]
engine.invoke("main", *arg_pointers)

In [None]:
indices, values, shape, rank, nse = from_sparse_tensor(mem_out[0], np.int8)
print(f"{indices=}")
print(f"{values=}")
print(f"{shape=}")
print(f"{rank=}")
print(f"{nse=}")

#### mxm

In [None]:
with ir.Context(), ir.Location.unknown():
    module = ir.Module.create()
    with ir.InsertionPoint(module.body):
        f64 = ir.F64Type.get()
        index = ir.IndexType.get()
        sp_encoding = sparse_tensor.EncodingAttr.get(
            [sparse_tensor.DimLevelType.compressed, sparse_tensor.DimLevelType.compressed],
            ir.AffineMap.get_permutation([0, 1]),
            None,
            0,
            0
        )
        dynamic = ir.ShapedType.get_dynamic_size()
        rtt = ir.RankedTensorType.get([dynamic, dynamic], f64, sp_encoding)
        @func.FuncOp.from_py_func(rtt, rtt)
        def main(x, y):
            cf0 = arith.ConstantOp(f64, 0.0)
            c0 = arith.ConstantOp(index, 0)
            c1 = arith.ConstantOp(index, 1)
            nrows = tensor.DimOp(x, c0)
            ncols = tensor.DimOp(y, c1)
            vv = bufferization.AllocTensorOp(rtt, [nrows.result, ncols.result], None, None, False)
            generic_op = linalg.GenericOp(
                [rtt],
                [x, y],
                [vv],
                ir.ArrayAttr.get([
                    ir.AffineMapAttr.get(ir.AffineMap.get(3, 0, [ir.AffineDimExpr.get(0), ir.AffineDimExpr.get(2)])),
                    ir.AffineMapAttr.get(ir.AffineMap.get(3, 0, [ir.AffineDimExpr.get(2), ir.AffineDimExpr.get(1)])),
                    ir.AffineMapAttr.get(ir.AffineMap.get(3, 0, [ir.AffineDimExpr.get(0), ir.AffineDimExpr.get(1)]))
                ]),
                ir.ArrayAttr.get([
                    ir.Attribute.parse('#linalg.iterator_type<parallel>'),
                    ir.Attribute.parse('#linalg.iterator_type<parallel>'),
                    ir.Attribute.parse('#linalg.iterator_type<reduction>'),
                ]),
            )
            # Construct the linalg.generic body
            block = generic_op.regions[0].blocks.append(f64, f64, f64)
            with ir.InsertionPoint(block):
                a, b, o = block.arguments
                #res = arith.MulFOp(a, b)
                res = sparse_tensor.BinaryOp(f64, a, b)
                overlap = res.regions[0].blocks.append(f64, f64)
                with ir.InsertionPoint(overlap):
                    arg0, arg1 = overlap.arguments
                    ret = arith.MulFOp(arg0, arg1)
                    sparse_tensor.YieldOp(result=ret)
                res2 = sparse_tensor.ReduceOp(res, o, cf0)
                reduce = res2.regions[0].blocks.append(f64, f64)
                with ir.InsertionPoint(reduce):
                    arg0, arg1 = reduce.arguments
                    ret = arith.AddFOp(arg0, arg1)
                    sparse_tensor.YieldOp(result=ret)
                linalg.YieldOp([res2])
            return generic_op.result
        main.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
    #pm = passmanager.PassManager.parse("sparse-compiler")
    pm = passmanager.PassManager.parse("builtin.module(sparse-compiler{reassociate-fp-reductions=1 enable-index-optimizations=1})")
print(module)
pm = pm.run(module)
#print('='*50)
#print(module)
engine = execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[SHARED_LIB])

In [None]:
mem_a = to_sparse_tensor([[0, 0], [0, 1], [0, 3]], np.array([11., 22., -4.], dtype=np.float64),
                         [3, 4], sparsity=[DimLevelType.compressed, DimLevelType.compressed])
mem_b = to_sparse_tensor([[0, 0], [0, 2], [1, 0]], np.array([4.1, 2.3, 10.0], dtype=np.float64),
                         [4, 3], sparsity=[DimLevelType.compressed, DimLevelType.compressed])

out = ctypes.c_char(0)
mem_out = ctypes.pointer(ctypes.pointer(out))

arg_pointers = [
    mem_a,
    mem_b,
    mem_out,
]
engine.invoke("main", *arg_pointers)

In [None]:
indices, values, shape, rank, nse = from_sparse_tensor(mem_out[0], np.float64)
print(f"{indices=}")
print(f"{values=}")
print(f"{shape=}")
print(f"{rank=}")
print(f"{nse=}")

#### select

In [None]:
with ir.Context(), ir.Location.unknown():
    module = ir.Module.create()
    with ir.InsertionPoint(module.body):
        f64 = ir.F64Type.get()
        i1 = ir.IntegerType.get_signless(1)
        i64 = ir.IntegerType.get_signless(64)
        index = ir.IndexType.get()
        sp_encoding = sparse_tensor.EncodingAttr.get(
            [sparse_tensor.DimLevelType.compressed, sparse_tensor.DimLevelType.compressed],
            ir.AffineMap.get_permutation([0, 1]),
            None,
            0,
            0
        )
        dynamic = ir.ShapedType.get_dynamic_size()
        rtt = ir.RankedTensorType.get([dynamic, dynamic], f64, sp_encoding)
        @func.FuncOp.from_py_func(rtt)
        def main(x):
            cf0 = arith.ConstantOp(f64, 0.0)
            c0 = arith.ConstantOp(index, 0)
            c1 = arith.ConstantOp(index, 1)
            nrows = tensor.DimOp(x, c0)
            ncols = tensor.DimOp(x, c1)
            vv = bufferization.AllocTensorOp(rtt, [nrows.result, ncols.result], None, None, False)
            generic_op = linalg.GenericOp(
                [rtt],
                [x],
                [vv],
                ir.ArrayAttr.get([
                    ir.AffineMapAttr.get(ir.AffineMap.get_permutation([0, 1])),
                    ir.AffineMapAttr.get(ir.AffineMap.get_permutation([0, 1])),
                ]),
                ir.ArrayAttr.get([
                    ir.Attribute.parse('#linalg.iterator_type<parallel>'),
                    ir.Attribute.parse('#linalg.iterator_type<parallel>'),
                ]),
            )
            # Construct the linalg.generic body
            block = generic_op.regions[0].blocks.append(f64, f64)
            with ir.InsertionPoint(block):
                a, o = block.arguments
                rowidx = linalg.IndexOp(ir.IntegerAttr.get(i64, 0))
                colidx = linalg.IndexOp(ir.IntegerAttr.get(i64, 1))
                res = sparse_tensor.SelectOp(a)
                region = res.regions[0].blocks.append(f64)
                with ir.InsertionPoint(region):
                    arg0, = region.arguments
                    #cmp = arith.CmpFOp(ir.IntegerAttr.get(i64, 2), arg0, cf0)
                    cmp = arith.CmpIOp(ir.IntegerAttr.get(i64, 5), colidx, c1)
                    sparse_tensor.YieldOp(result=cmp)
                linalg.YieldOp([res])
            return generic_op.result
        main.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
    #pm = passmanager.PassManager.parse("sparse-compiler")
    pm = passmanager.PassManager.parse("builtin.module(sparse-compiler{reassociate-fp-reductions=1 enable-index-optimizations=1})")
print(module)
pm = pm.run(module)
#print('='*50)
#print(module)
engine = execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[SHARED_LIB])

In [None]:
mem_a = to_sparse_tensor([[0, 0], [0, 1], [0, 3]], np.array([11., 22., -4.], dtype=np.float64),
                         [3, 4], sparsity=[DimLevelType.compressed, DimLevelType.compressed])

out = ctypes.c_char(0)
mem_out = ctypes.pointer(ctypes.pointer(out))

arg_pointers = [
    mem_a,
    mem_out,
]
engine.invoke("main", *arg_pointers)

In [None]:
indices, values, shape, rank, nse = from_sparse_tensor(mem_out[0], np.float64)
print(f"{indices=}")
print(f"{values=}")
print(f"{shape=}")
print(f"{rank=}")
print(f"{nse=}")

#### reduce_to_vector

In [None]:
with ir.Context(), ir.Location.unknown():
    module = ir.Module.create()
    with ir.InsertionPoint(module.body):
        f64 = ir.F64Type.get()
        i1 = ir.IntegerType.get_signless(1)
        i64 = ir.IntegerType.get_signless(64)
        index = ir.IndexType.get()
        mat_sp_encoding = sparse_tensor.EncodingAttr.get(
            [sparse_tensor.DimLevelType.compressed, sparse_tensor.DimLevelType.compressed],
            ir.AffineMap.get_permutation([0, 1]),
            None,
            0,
            0
        )
        vec_sp_encoding = sparse_tensor.EncodingAttr.get(
            [sparse_tensor.DimLevelType.compressed],
            ir.AffineMap.get_permutation([0]),
            None,
            0,
            0
        )
        dynamic = ir.ShapedType.get_dynamic_size()
        rtt_in = ir.RankedTensorType.get([dynamic, dynamic], f64, mat_sp_encoding)
        rtt_out = ir.RankedTensorType.get([dynamic], f64, vec_sp_encoding)
        @func.FuncOp.from_py_func(rtt_in)
        def main(x):
            cf0 = arith.ConstantOp(f64, 0.0)
            cf1 = arith.ConstantOp(f64, 1.0)
            c0 = arith.ConstantOp(index, 0)
            c1 = arith.ConstantOp(index, 1)
            nrows = tensor.DimOp(x, c0)
            ncols = tensor.DimOp(x, c1)
            vv = bufferization.AllocTensorOp(rtt_out, [ncols.result], None, None, False)
            generic_op = linalg.GenericOp(
                [rtt_out],
                [x],
                [vv],
                ir.ArrayAttr.get([
                    ir.AffineMapAttr.get(ir.AffineMap.get_permutation([0, 1])),
                    ir.AffineMapAttr.get(ir.AffineMap.get(2, 0, [ir.AffineDimExpr.get(1)]))
                ]),
                ir.ArrayAttr.get([
                    ir.Attribute.parse('#linalg.iterator_type<reduction>'),
                    ir.Attribute.parse('#linalg.iterator_type<parallel>'),
                ]),
            )
            # Construct the linalg.generic body
            block = generic_op.regions[0].blocks.append(f64, f64)
            with ir.InsertionPoint(block):
                a, o = block.arguments
                res = sparse_tensor.ReduceOp(o, a, cf1)
                region = res.regions[0].blocks.append(f64, f64)
                with ir.InsertionPoint(region):
                    arg0, arg1 = region.arguments
                    red_res = arith.MulFOp(arg0, arg1)
                    sparse_tensor.YieldOp(result=red_res)
                linalg.YieldOp([res])
            return generic_op.result
        main.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
    #pm = passmanager.PassManager.parse("sparse-compiler")
    pm = passmanager.PassManager.parse("builtin.module(sparse-compiler{reassociate-fp-reductions=1 enable-index-optimizations=1})")
print(module)
pm = pm.run(module)
#print('='*50)
#print(module)
engine = execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[SHARED_LIB])

In [None]:
mem_a = to_sparse_tensor([[0, 0], [0, 1], [0, 3], [2, 3]], np.array([11., 22., -4., 14.5], dtype=np.float64),
                         [3, 4], sparsity=[DimLevelType.compressed, DimLevelType.compressed])

out = ctypes.c_char(0)
mem_out = ctypes.pointer(ctypes.pointer(out))

arg_pointers = [
    mem_a,
    mem_out,
]
engine.invoke("main", *arg_pointers)

In [None]:
indices, values, shape, rank, nse = from_sparse_tensor(mem_out[0], np.float64)
print(f"{indices=}")
print(f"{values=}")
print(f"{shape=}")
print(f"{rank=}")
print(f"{nse=}")

#### reduce_to_scalar

In [None]:
with ir.Context(), ir.Location.unknown():
    module = ir.Module.create()
    with ir.InsertionPoint(module.body):
        f64 = ir.F64Type.get()
        i1 = ir.IntegerType.get_signless(1)
        i64 = ir.IntegerType.get_signless(64)
        index = ir.IndexType.get()
        sp_encoding = sparse_tensor.EncodingAttr.get(
            [sparse_tensor.DimLevelType.compressed, sparse_tensor.DimLevelType.compressed],
            ir.AffineMap.get_permutation([0, 1]),
            None,
            0,
            0
        )
        dynamic = ir.ShapedType.get_dynamic_size()
        rtt_in = ir.RankedTensorType.get([dynamic, dynamic], f64, sp_encoding)
        rtt_out = ir.RankedTensorType.get([], f64)
        @func.FuncOp.from_py_func(rtt_in)
        def main(x):
            cf1 = arith.ConstantOp(f64, 1.0)
            ss = bufferization.AllocTensorOp(rtt_out, [], None, None, False)
            generic_op = linalg.GenericOp(
                [rtt_out],
                [x],
                [ss],
                ir.ArrayAttr.get([
                    ir.AffineMapAttr.get(ir.AffineMap.get_permutation([0, 1])),
                    ir.AffineMapAttr.get(ir.AffineMap.get(2, 0, []))
                ]),
                ir.ArrayAttr.get([
                    ir.Attribute.parse('#linalg.iterator_type<reduction>'),
                    ir.Attribute.parse('#linalg.iterator_type<reduction>'),
                ]),
            )
            # Construct the linalg.generic body
            block = generic_op.regions[0].blocks.append(f64, f64)
            with ir.InsertionPoint(block):
                a, o = block.arguments
                res = sparse_tensor.ReduceOp(o, a, cf1)
                region = res.regions[0].blocks.append(f64, f64)
                with ir.InsertionPoint(region):
                    arg0, arg1 = region.arguments
                    red_res = arith.MulFOp(arg0, arg1)
                    sparse_tensor.YieldOp(result=red_res)
                linalg.YieldOp([res])
            s = tensor.ExtractOp(generic_op, [])
            return s.result
        main.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
    #pm = passmanager.PassManager.parse("sparse-compiler")
    pm = passmanager.PassManager.parse("builtin.module(sparse-compiler{reassociate-fp-reductions=1 enable-index-optimizations=1})")
print(module)
pm = pm.run(module)
#print('='*50)
#print(module)
engine = execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[SHARED_LIB])

In [None]:
mem_a = to_sparse_tensor([[0, 0], [0, 1], [0, 3], [2, 3]], np.array([11., 22., -4., 14.5], dtype=np.float64),
                         [3, 4], sparsity=[DimLevelType.compressed, DimLevelType.compressed])

mem_out = ctypes.pointer(ctypes.c_double(0))

arg_pointers = [
    mem_a,
    mem_out,
]
engine.invoke("main", *arg_pointers)
mem_out.contents.value

# Full Example of generic dispatching

### Sum a passed in tensor using linalg.generic

In [None]:
NP_TYPE_TO_MLIR = {
    np.dtype(np.int8).name: lambda: ir.IntegerType.get_signless(8),
    np.dtype(np.int16).name: lambda: ir.IntegerType.get_signless(16),
    np.dtype(np.int32).name: lambda: ir.IntegerType.get_signless(32),
    np.dtype(np.int64).name: lambda: ir.IntegerType.get_signless(64),
    np.dtype(np.float32).name: lambda: ir.F32Type.get(),
    np.dtype(np.float64).name: lambda: ir.F64Type.get(),
}
NP_TYPE_TO_CTYPE = {
    np.dtype(np.int8).name: ctypes.c_int8,
    np.dtype(np.int16).name: ctypes.c_int16,
    np.dtype(np.int32).name: ctypes.c_int32,
    np.dtype(np.int64).name: ctypes.c_int64,
    np.dtype(np.float32).name: ctypes.c_float,
    np.dtype(np.float64).name: ctypes.c_double,
}

memoized = {}

def reduce_sum(arr):
    key = (len(arr), arr.dtype.name)
    if key not in memoized:
        memoized[key] = _build_reduce_sum(arr)
    engine = memoized[key]
    
    c_typ = NP_TYPE_TO_CTYPE[arr.dtype.name]
    #out = runtime.make_nd_memref_descriptor(1, c_typ)()
    
    arg_pointers = [
        ctypes.pointer(ctypes.pointer(runtime.get_ranked_memref_descriptor(arr))),
        ctypes.pointer(c_typ(0)),
    ]
    #print(arg_pointers[0])
    #print(arg_pointers[1])
    #vp0 = ctypes.cast(arg_pointers[0], ctypes.c_void_p)
    #vp1 = ctypes.cast(arg_pointers[1], ctypes.c_void_p)
    #print(vp0)
    #print(vp1)
    #print(vp1.value - vp0.value)
    engine.invoke("main", *arg_pointers)
    result = arg_pointers[-1].contents.value
    return result

def _build_reduce_sum(arr):
    with ir.Context(), ir.Location.unknown():
        module = ir.Module.create()
        with ir.InsertionPoint(module.body):
            dtype = NP_TYPE_TO_MLIR[arr.dtype.name]()
            type_a = ir.RankedTensorType.get([len(arr)], dtype)
            type_out = ir.RankedTensorType.get([], dtype)
            @func.FuncOp.from_py_func(type_a)
            def main(x):
                vv = bufferization.AllocTensorOp(type_out, [], None, None, None)
                generic_op = linalg.GenericOp(
                    [type_out],
                    [x],
                    [vv],
                    ir.ArrayAttr.get([ir.AffineMapAttr.get(ir.AffineMap.get_permutation([0])),
                                      ir.AffineMapAttr.get(ir.AffineMap.get(1, 0, []))]),
                    ir.ArrayAttr.get([ir.Attribute.parse('#linalg.iterator_type<reduction>')]),
                )
                # Construct the linalg.generic body
                block = generic_op.regions[0].blocks.append(dtype, dtype)
                with ir.InsertionPoint(block):
                    a, b = block.arguments
                    if issubclass(arr.dtype.type, numbers.Integral):
                        res = arith.AddIOp(a, b)
                    else:
                        res = arith.AddFOp(a, b)
                    linalg.YieldOp([res])
                final_result = tensor.ExtractOp(generic_op.result, [])
                return final_result
            main.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
        pm = passmanager.PassManager.parse("builtin.module(sparse-compiler)")
    #print(module)
    pm = pm.run(module)
    engine = execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[SHARED_LIB])
    return engine

In [None]:
reduce_sum(np.array([1., 2., 3., 4.], dtype=np.float32))

In [None]:
arr = np.arange(50)
reduce_sum(arr)

In [None]:
# Is the bug actually fixed? Appears to be.
for _ in range(5000):
    z = reduce_sum(np.arange(50))
    if z != 1225:
        print(z)
        break

### Add 1 to a passed in numpy array

In [None]:
with ir.Context(), ir.Location.unknown():
    module = ir.Module.create()
    with ir.InsertionPoint(module.body):
        f32 = ir.F32Type.get()
        type_a = ir.RankedTensorType.get([4], f32)
        @func.FuncOp.from_py_func(type_a)
        def main(arr):
            one = arith.ConstantOp(f32, 1.0)
            vv = tensor.SplatOp(type_a, one)
            generic_op = linalg.GenericOp(
                [type_a],
                [arr],
                [vv],
                ir.ArrayAttr.get([ir.AffineMapAttr.get(ir.AffineMap.get_permutation([0])),
                                  ir.AffineMapAttr.get(ir.AffineMap.get_permutation([0]))]),
                ir.ArrayAttr.get([ir.Attribute.parse('#linalg.iterator_type<parallel>')]),
            )
            # Construct the linalg.generic body
            block = generic_op.regions[0].blocks.append(f32, f32)
            with ir.InsertionPoint(block):
                a, b = block.arguments
                res = arith.AddFOp(a, b)
                linalg.YieldOp([res])
            return generic_op.result
        main.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
    pm = passmanager.PassManager.parse("builtin.module(sparse-compiler)")
print(module)
pm = pm.run(module)
#print('-'*50)
#print(module)
engine = execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[SHARED_LIB])

In [None]:
a = np.array([1., 2., 3., 4.], dtype=np.float32)
out = runtime.make_nd_memref_descriptor(1, ctypes.c_float)()

arg_pointers = [
    ctypes.pointer(ctypes.pointer(out)),  # result goes first for some strange reason?!?
    ctypes.pointer(ctypes.pointer(runtime.get_ranked_memref_descriptor(a))),
]
engine.invoke("main", *arg_pointers)
result = runtime.ranked_memref_to_numpy(arg_pointers[0][0])
result

### SCF Example

In [None]:
with ir.Context(), ir.Location.unknown():
    module = ir.Module.create()
    with ir.InsertionPoint(module.body):
        i1 = ir.IntegerType.get_signless(1)
        i64 = ir.IntegerType.get_signless(64)
        f64 = ir.F64Type.get()
        @func.FuncOp.from_py_func(f64)
        def main(x):
            zero = arith.ConstantOp(f64, 0.0)
            # TODO: Figure out why this isn't allowed
            # cmp = arith.CmpFOp(ir.StringAttr.get("ogt"), x, zero)
            cmp = arith.CmpFOp(ir.IntegerAttr.get(i64, 2), x, zero)
            if_ = scf.IfOp(cmp.result, [f64], hasElse=True)
            with ir.InsertionPoint(if_.then_block):
                negX = arith.NegFOp(x)
                scf.YieldOp([negX])
            with ir.InsertionPoint(if_.else_block):
                scf.YieldOp([x])
            return if_.result
        main.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
    pm = passmanager.PassManager.parse("builtin.module(sparse-compiler)")
print(module)
pm = pm.run(module)
engine = execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[SHARED_LIB])

def negabs(x):
    arg_pointers = [
        ctypes.pointer(ctypes.c_double(x)),
        ctypes.pointer(ctypes.c_double(0)),
    ]
    engine.invoke("main", *arg_pointers)
    return arg_pointers[-1].contents.value

print('Convert everything to negative')
print('-'*30)
for n in [2.3, -1.5, 0.0, -5.6, 5.9]:
    print(f"{' ' if n >= 0 else ''}{n} -> {negabs(n)}")

#### Get Dimensions

In [None]:
c_lib.sparseDimSize(mem_a[0], 0), c_lib.sparseDimSize(mem_a[0], 1)