In [1]:
import numpy as np
from time import time
from pynq import Overlay
from pynq import allocate

In [2]:
ol = Overlay("/home/xilinx/Jeff/fp_mmult/fp_mmult.bit")
fp_mmult_ip = ol.HLS_accel_0
dma0 = ol.axi_dma_0

In [3]:
DIM = 32

inBuffer0 = allocate(shape=(DIM*2, DIM), dtype=np.float32)
outBuffer0 = allocate(shape=(DIM, DIM), dtype=np.float32)

In [4]:
a = np.zeros((DIM, DIM))
b = np.zeros((DIM, DIM))

for i in range(DIM):
    for j in range(DIM):
        a[i][j] = i + j
        inBuffer0[i][j] = a[i][j]

for i in range(DIM):
    for j in range(DIM):
        b[i][j] = i * j
        inBuffer0[i+DIM][j] = b[i][j]

In [5]:
timeKernelStart = time()

fp_mmult_ip.write(0x00, 0x01)
dma0.sendchannel.start()
dma0.recvchannel.start()
dma0.sendchannel.transfer(inBuffer0)
dma0.recvchannel.transfer(outBuffer0)
dma0.sendchannel.wait()
dma0.recvchannel.wait()

timeKernelEnd = time()
print("Kernel execution time: " + str(timeKernelEnd - timeKernelStart) + " s")

Kernel execution time: 0.0035893917083740234 s


In [6]:
err = 0
c = np.zeros((DIM, DIM))
for i in range(DIM):
    for j in range(DIM):
        for k in range(DIM):
            c[i][j] += a[i][k] * b[k][j]
        if outBuffer0[i][j] != c[i][j]:
            err += 1

print("A =")
print(a)
print("B =")
print(b)
print("C_Golden =")
print(c)
print("C_accel =")
print(outBuffer0)
print("====================================================================")

if err == 0:
    print("err = 0")
    print("Test passed!")
else:
    print("err = " + str(err))
    print("Test failed!")

A =
[[  0.   1.   2. ...,  29.  30.  31.]
 [  1.   2.   3. ...,  30.  31.  32.]
 [  2.   3.   4. ...,  31.  32.  33.]
 ..., 
 [ 29.  30.  31. ...,  58.  59.  60.]
 [ 30.  31.  32. ...,  59.  60.  61.]
 [ 31.  32.  33. ...,  60.  61.  62.]]
B =
[[   0.    0.    0. ...,    0.    0.    0.]
 [   0.    1.    2. ...,   29.   30.   31.]
 [   0.    2.    4. ...,   58.   60.   62.]
 ..., 
 [   0.   29.   58. ...,  841.  870.  899.]
 [   0.   30.   60. ...,  870.  900.  930.]
 [   0.   31.   62. ...,  899.  930.  961.]]
C_Golden =
[[      0.   10416.   20832. ...,  302064.  312480.  322896.]
 [      0.   10912.   21824. ...,  316448.  327360.  338272.]
 [      0.   11408.   22816. ...,  330832.  342240.  353648.]
 ..., 
 [      0.   24800.   49600. ...,  719200.  744000.  768800.]
 [      0.   25296.   50592. ...,  733584.  758880.  784176.]
 [      0.   25792.   51584. ...,  747968.  773760.  799552.]]
C_accel =
[[      0.   10416.   20832. ...,  302064.  312480.  322896.]
 [      0.   10912.  