In [1]:
import tvm
import numpy as np
import topi
from tvm import relay
import topi.testing
from tvm.contrib import graph_runtime
from topi.util import get_const_tuple

# Convolution Test

In [2]:
batch = 1
in_height = in_width = in_size = 32
in_dim = 32
out_dim = 32
in_channel = 32
num_filter = 32
kernel = 3
stride = (1, 1)
padding = (1, 1)
activation_bits = 1
weight_bits = 1
unipolar = True

input_dtype = 'uint32'
out_dtype = 'int32'

def generate_quantized_np(shape, bits, out_dtype):
    min_val = 0 
    max_val = 1 << bits
    return np.random.randint(min_val, max_val, size=shape).astype(out_dtype)

with tvm.target.create('llvm'):
    A = tvm.placeholder((batch, in_channel, in_height, in_width), dtype=input_dtype, name='A')
    W = tvm.placeholder((num_filter, in_channel, kernel, kernel), dtype=input_dtype, name='W')
    QW = topi.nn.bitpack(W, weight_bits, pack_axis=1, bit_axis=0)
    B = topi.nn.bitserial_conv2d_nchw(A, QW, stride, padding, activation_bits, weight_bits,
                                      out_dtype=out_dtype, unipolar=unipolar)
    s = topi.generic.schedule_bitserial_conv2d_nchw([B])
    
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)

a_np = generate_quantized_np(a_shape, activation_bits, input_dtype)
w_np = generate_quantized_np(w_shape, weight_bits, input_dtype)

if unipolar:
    w_ = np.copy(w_np).astype(out_dtype)
    for x in np.nditer(w_, op_flags=['readwrite']):
        x[...] = 1 if x == 1 else -1
    b_np = topi.testing.conv2d_nchw_python(a_np.astype(out_dtype), w_, stride, padding)
else:
    b_np = topi.testing.conv2d_nchw_python(a_np, w_np, stride, padding)
    
ctx = tvm.cpu(0)
a = tvm.nd.array(a_np, ctx)
w = tvm.nd.array(w_np, ctx)
#b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
b = tvm.nd.empty(get_const_tuple(B.shape), dtype=B.dtype, ctx=ctx)
func = tvm.build(s, [A, W, B], 'llvm')
func(a, w, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

Cannot find config for target=llvm, workload=('bitserial_conv2d_nchw', (1, 32, 32, 32, 'uint32'), (1, 32, 1, 3, 3, 'uint32'), (1, 1), (1, 1), 1, 1, 'uint32', 'int32', True). A fallback configuration is used, which may bring great performance regression.


In [3]:
input_var = relay.var('input', shape=A.shape, dtype=A.dtype)
kernel_var = relay.var('kernel', shape=W.shape, dtype=W.dtype)
q_kernel = relay.nn.bitpack(kernel_var, bits=1, pack_axis=1, bit_axis=0)
q_out = relay.nn.bitserial_conv2d(input_var, q_kernel, channels=32, kernel_size=(3,3), padding=(1, 1), data_layout='NCHW', pack_dtype='uint32', out_dtype='int32')

In [4]:
q_func = relay.Function([input_var, kernel_var], q_out)

with relay.build_config(opt_level=3):
    graph, lib, params = relay.build(q_func, 'llvm', params={'kernel': w_np})

In [5]:
module = graph_runtime.create(graph, lib, tvm.cpu())
module.set_input('input', a_np)
module.set_input(**params)
module.run()

In [6]:
tvm.testing.assert_allclose(module.get_output(0).asnumpy(), b_np, rtol=1e-5)

# Dense Test

In [7]:
with tvm.target.create('llvm'):
    A = tvm.placeholder((batch, in_dim), dtype=input_dtype, name='A')
    B = tvm.placeholder((out_dim, in_dim), dtype=input_dtype, name='B')
    QB = topi.nn.bitpack(B, bits=1, bit_axis=1, pack_axis=1)
    C = topi.nn.bitserial_dense(A, QB, activation_bits, weight_bits, out_dtype=out_dtype,
                                unipolar=unipolar)
    s = topi.generic.schedule_bitserial_dense([C])

a_shape = get_const_tuple(A.shape)
b_shape = get_const_tuple(B.shape)

a_np = generate_quantized_np(get_const_tuple(a_shape), activation_bits, input_dtype)
b_np = generate_quantized_np(get_const_tuple(b_shape), weight_bits, input_dtype)
if unipolar:
    b_ = np.copy(b_np).astype(out_dtype)
    for x in np.nditer(b_, op_flags=['readwrite']):
        x[...] = 1 if x == 1 else -1
    c_np = np.dot(a_np, b_.T)
else:
    c_np = np.dot(a_np, b_np.T)
    
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(b_np, ctx)
c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
func = tvm.build(s, [A, B, C], "llvm")
func(a, b, c)
tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)



In [8]:
input_var = relay.var('input', shape=A.shape, dtype=A.dtype)
kernel_var = relay.var('kernel', shape=B.shape, dtype=B.dtype)
q_kernel = relay.nn.bitpack(kernel_var, bits=1, pack_axis=1, bit_axis=1)
q_out = relay.nn.bitserial_dense(input_var, q_kernel, units=32)

In [9]:
q_func = relay.Function([input_var, kernel_var], q_out)

with relay.build_config(opt_level=3):
    graph, lib, params = relay.build(q_func, 'llvm', params={'kernel': b_np})



In [10]:
module = graph_runtime.create(graph, lib, tvm.cpu())
module.set_input('input', a_np)
module.set_input(**params)
module.run()

In [11]:
tvm.testing.assert_allclose(module.get_output(0).asnumpy(), c_np, rtol=1e-5)