In [1]:
import numpy as np
import tvm
from tvm import te

In [2]:
n = te.var(name='n')
type(n), n.dtype

(tvm.tir.expr.Var, 'int32')

In [3]:
A = te.placeholder((n,), name='a')
B = te.placeholder((n,), name='b')
C = te.compute(A.shape, lambda i: A[i] + B[i], name='c')
s = te.create_schedule(C.op)
tvm.lower(s, [A, B, C], simple_mode=True)

#[version = "0.0.5"]
@main = primfn(a_1: handle, b_1: handle, c_1: handle) -> ()
  attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
  buffers = {a: Buffer(a_2: Pointer(float32), float32, [(stride: int32*n: int32)], [], type="auto"),
             b: Buffer(b_2: Pointer(float32), float32, [(stride_1: int32*n)], [], type="auto"),
             c: Buffer(c_2: Pointer(float32), float32, [(stride_2: int32*n)], [], type="auto")}
  buffer_map = {a_1: a, b_1: b, c_1: c}
  preflattened_buffer_map = {a_1: a_3: Buffer(a_2, float32, [n], [stride], type="auto"), b_1: b_3: Buffer(b_2, float32, [n], [stride_1], type="auto"), c_1: c_3: Buffer(c_2, float32, [n], [stride_2], type="auto")} {
  for (i: int32, 0, n) {
    c[(i*stride_2)] = (a[(i*stride)] + b[(i*stride_1)])
  }
}

#[metadata]
{
  "root": 1, 
  "nodes": [
    {
      "type_key": ""
    }, 
    {
      "type_key": "Map", 
      "keys": [
        "IntImm"
      ], 
      "data": [2]
    }, 
    {
      "type

In [13]:
def get_abc(shape, constructor=None):
    np.random.seed(0)
    # Note that NumPy in default uses 64-bit floating-points or 64-bit integers,
    # which is different from 32-bit floating point typically used in deep learning,
    # so we explicitly cast the data type
    a = np.random.normal(size=shape).astype(np.float32)
    b = np.random.normal(size=shape).astype(np.float32)
    c = np.empty_like(a)
    if constructor:
        a, b, c = [constructor(x) for x in (a, b, c)]
    return a, b, c

In [14]:
def test_mod(mod, m):
    a, b, c = get_abc(m, tvm.nd.array)
    mod(a, b, c)
    print('c.shape:', c.shape)
    np.testing.assert_equal(c.asnumpy(), a.asnumpy() + b.asnumpy())

mod = tvm.build(s, [A, B, C])
test_mod(mod, 5)
test_mod(mod, 1000)

c.shape: (5,)
c.shape: (1000,)


In [17]:
def tvm_vector_add(ndim):
    A = te.placeholder([te.var() for _ in range(ndim)])
    B = te.placeholder(A.shape)
    C = te.compute(A.shape, lambda *i: A[i] + B[i])
    s = te.create_schedule(C.op)
    return tvm.build(s, [A, B, C])

In [18]:
mod = tvm_vector_add(2)
test_mod(mod, (2, 2))

mod = tvm_vector_add(4)
test_mod(mod, (2, 3, 4, 5))

c.shape: (2, 2)
c.shape: (2, 3, 4, 5)


In [19]:
ndim = 2
A = te.placeholder([te.var() for _ in range(ndim)])
B = te.placeholder(A.shape)
C = te.compute(A.shape, lambda *i: A[i] + B[i])
s = te.create_schedule(C.op)
tvm.lower(s, [A, B, C], simple_mode=True)

#[version = "0.0.5"]
@main = primfn(placeholder_2: handle, placeholder_3: handle, compute_1: handle) -> ()
  attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
  buffers = {placeholder: Buffer(placeholder_4: Pointer(float32), float32, [(stride: int32*tindex: int32)], [], type="auto"),
             placeholder_1: Buffer(placeholder_5: Pointer(float32), float32, [(stride_1: int32*tindex)], [], type="auto"),
             compute: Buffer(compute_2: Pointer(float32), float32, [(stride_2: int32*tindex)], [], type="auto")}
  buffer_map = {placeholder_2: placeholder, placeholder_3: placeholder_1, compute_1: compute}
  preflattened_buffer_map = {placeholder_2: placeholder_6: Buffer(placeholder_4, float32, [tindex, tindex_1: int32], [stride, stride_3: int32], type="auto"), placeholder_3: placeholder_7: Buffer(placeholder_5, float32, [tindex, tindex_1], [stride_1, stride_4: int32], type="auto"), compute_1: compute_3: Buffer(compute_2, float32, [tindex, tindex_1