In [10]:
import numpy as np
import tensorflow as tf

## Generate input data and weights for conv2 in AlexNet

In [57]:
## Generate random data for input
input = np.random.rand(96 * 27 * 27).astype(np.float32)
tf_input = input.reshape([1, 27, 27, 96])

## Write input data as binary
fpga_input = tf_input.transpose([0, 3, 1, 2]).flatten()
byte_data = fpga_input.tobytes()
with open('conv2_fpga_in.bin', 'wb') as f:
    f.write(byte_data)
with open('conv2_fpga_in.txt', 'w') as f:
    for v in fpga_input:
        f.write("{}\n".format(v))

In [62]:
## Load weights and bias for convolution2 in AlexNet(from ONNX)
load_data = np.fromfile('conv2_fpga_in.bin', dtype = np.float32)
tf_load_input = load_data.reshape([1, 96 ,27, 27]).transpose([0, 2, 3, 1])
onnx_weight = np.load('conv2_w_0.npy', allow_pickle=True, encoding='bytes').astype(np.float32)
onnx_bias = np.load('conv2_b_0.npy', allow_pickle=True, encoding='bytes').astype(np.float32)
tf_weight = onnx_weight.transpose([2, 3, 1, 0]) # NCHW to NHWC
print(np.array_equal(tf_load_input, tf_input))

True


## Define group convolution and run

In [63]:
## Define group convolution
def conv(x, weights, biases, stride_y, stride_x, name,
         padding='SAME', groups=1):

    # Create lambda function for the convolution
    convolve = lambda i, k: tf.nn.conv2d(i, k,
                                         strides=[1, stride_y, stride_x, 1],
                                         padding=padding)

    if groups == 1:
        conv = convolve(x, weights)

    # In the cases of multiple groups, split inputs & weights and
    else:
        # Split input and weights and convolve them separately
        input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x)
        weight_groups = tf.split(axis=3, num_or_size_splits=groups,
                                 value=weights)
        output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)]

        # Concat the convolved output together again
        conv = tf.concat(axis=3, values=output_groups)

    # Add biases
    bias = tf.reshape(tf.nn.bias_add(conv, biases), tf.shape(conv))
    return bias

In [64]:
# Run convolution

stride = 1
pad = 2
padding = [[0, 0], [pad, pad], [pad, pad], [0, 0]]
output = conv(tf_input, tf_weight, onnx_bias, 1, 1, padding=padding, name='group_test_conv', groups=2)
with tf.compat.v1.Session() as sess:
    tf_out = sess.run(output)
    print(type(tf_out))
    print(tf_out.shape)
    result = tf_out.transpose([0, 3, 1, 2])# NHWC to NCHW
    print(result.shape)

<class 'numpy.ndarray'>
(1, 27, 27, 256)
(1, 256, 27, 27)


In [65]:
## Write the result as binary data binary data format is NCHW
byte_data = result.flatten()
print(byte_data[0])
byte_data = byte_data.tobytes()
with open('tf_conv2_out.bin', 'wb') as f:
    f.write(byte_data)

0.6047369


## Compare fpga result with tf result

In [91]:
fpga_result = np.fromfile('fpga_conv2_out.bin', dtype = np.float32)
tf_result = np.fromfile('tf_conv2_out.bin', dtype = np.float32)

In [92]:
if(fpga_result.shape != tf_result.shape):
    print("The size of weight is wrong")
    print("FPGA:: ", fpga_result.shape)
    print("TF:: ", tf_result.shape)
if(fpga_result.dtype != tf_result.dtype):
    print("The data type of bias is wrong")
if not np.array_equal(fpga_result, tf_result):
    print("error is somewhere...")
print(fpga_result.shape)
print(fpga_result[0])
print(tf_result[0])

error is somewhere...
(186624,)
0.6047369
0.6047369


In [95]:
errors = []
for i, e in enumerate(fpga_result):
    error = abs(tf_result[i] - fpga_result[i])
    errors.append(error)

error_ratio = [abs(e[0]/e[1]) for e in zip(errors, tf_result)]
error_ratio = np.array(error_ratio, dtype=np.float32)
                  
mi = error_ratio.argmax()　
print("Max err index: ", mi, "\nError value: ", errors[mi], "\nError rate: ", error_ratio[mi], "\nTF result: ",tf_result[mi], "\nFPGA result: ", fpga_result[mi])
cnt = 0
for i, err in enumerate(error_ratio):
    if err > 1.0e-03:
        cnt = cnt+1
print(cnt)

Max err index:  46902 
Error value:  4.7270441e-07 
Error rate:  0.07217897 
TF result:  -6.5490603e-06 
FPGA result:  -6.076356e-06
37


In [96]:
print(fpga_result[134110])
print(tf_result[134110])

-1.1077417
-1.1077412


In [100]:
cnt = 0
for i, e in enumerate(errors):
    if e > 1.0e-6:
#         print("index: ", i, " error: ", e)
        cnt = cnt + 1
print(cnt)

11679
