In [1]:
from pynq import Overlay
ol = Overlay('matrix.bit')

In [2]:
dma = ol.axi_dma_0

In [3]:
data_send = dma.sendchannel
data_recv = dma.recvchannel

In [4]:
import numpy as np
import random 

In [5]:
random.seed(2)
data_A = np.random.uniform(low=0, high=10, size=(32, 32)).astype(float)
data_B = np.random.uniform(low=0, high=10, size=(32, 32)).astype(float)

In [6]:
data_A, data_B

(array([[5.58618347, 4.07037211, 2.16574242, ..., 4.65942326, 1.10041576,
         1.05938077],
        [6.23236979, 2.07108377, 8.91692661, ..., 0.52885557, 6.71617649,
         5.60162533],
        [7.05050639, 0.63802057, 9.45349588, ..., 3.32068736, 7.0172619 ,
         6.65232565],
        ...,
        [2.73897622, 1.70977656, 0.71568514, ..., 4.24903936, 9.69498441,
         1.00512484],
        [1.28765722, 3.71547114, 4.53217602, ..., 7.06784307, 4.12045165,
         2.76758379],
        [8.63692121, 9.16497785, 7.24947851, ..., 0.82535657, 8.72454581,
         2.14640396]]),
 array([[5.47781577, 2.31474974, 0.88850018, ..., 9.6088866 , 6.77647463,
         0.91229625],
        [6.41750766, 0.95691697, 8.05301308, ..., 5.87940989, 5.36987056,
         0.12587154],
        [1.58282006, 2.10993426, 4.75495593, ..., 0.24684051, 9.94258454,
         1.85665357],
        ...,
        [6.21785036, 9.23839576, 3.06572681, ..., 9.83515088, 5.74537465,
         1.78476514],
        [0.0

In [8]:
def multiply(matrix_a, matrix_b):
    result_matrix = np.zeros((32,32))
    for i in range(32):
        for j in range(32):
            for k in range(32):
                result_matrix[i][j] += matrix_a[i][k] * matrix_b[k][j]
                
    return result_matrix

In [9]:
import time
start_time = time.time()
multiply(data_A, data_B) #software
end_time = time.time()
print(end_time - start_time)

0.6404907703399658


In [10]:
from pynq import allocate

In [11]:
data_a_f = data_A.flatten()
data_b_f = data_B.flatten()

data_input = np.concatenate([data_a_f, data_b_f])

In [12]:
data_input

array([5.58618347, 4.07037211, 2.16574242, ..., 6.65410536, 2.33227236,
       6.1152555 ])

In [13]:
input_buffer = allocate(2048, np.float32)
output_buffer = allocate((32,32), np.float32)

In [14]:
np.copyto(input_buffer, data_input)

In [15]:
start_time = time.time()
data_send.transfer(input_buffer)
data_recv.transfer(output_buffer)
data_send.wait()
data_recv.wait()
end_time = time.time()
print(end_time - start_time)

0.007139921188354492


In [16]:
output_buffer

PynqBuffer([[ 765.5427 ,  673.472  ,  941.42505, ...,  759.3936 ,
              888.3853 ,  751.73584],
            [ 926.2644 ,  683.18396, 1002.74615, ...,  825.4156 ,
             1051.6072 ,  754.4533 ],
            [ 786.41797,  739.4544 ,  995.3097 , ...,  814.84595,
              983.7216 ,  774.1037 ],
            ...,
            [ 589.7141 ,  563.06805,  738.77734, ...,  642.47754,
              659.3649 ,  603.89734],
            [ 720.4192 ,  704.4468 ,  764.1818 , ...,  662.04407,
              816.14404,  653.45294],
            [ 737.1899 ,  633.12427,  909.44165, ...,  691.673  ,
              936.0558 ,  727.24005]], dtype=float32)

In [17]:
diff = np.dot(data_A, data_B) - output_buffer

In [18]:
rms_error = np.sqrt(np.mean(diff**2))

In [19]:
diff, rms_error

(PynqBuffer([[ 3.22306626e-05, -7.09567951e-06, -1.25157352e-04, ...,
              -5.28500681e-05,  1.87174629e-05,  4.31549502e-05],
             [ 2.79106247e-05, -8.81895420e-05, -9.83259625e-05, ...,
               1.36049274e-04,  2.13860858e-08, -3.37322657e-05],
             [ 1.48117088e-04,  5.61753136e-05,  3.26576902e-05, ...,
              -2.69555735e-05, -5.41617646e-05,  8.84166676e-05],
             ...,
             [-3.60363637e-05, -1.07490983e-04,  7.40756889e-05, ...,
              -2.17863342e-05, -5.79756022e-05, -2.85113272e-05],
             [-1.08515069e-04,  3.76699304e-05, -9.17593449e-05, ...,
              -8.63710966e-07, -5.08612710e-05, -4.16035823e-05],
             [-1.03256434e-04, -8.29566280e-05, -6.83953847e-05, ...,
               4.23906929e-05, -3.33300544e-05,  1.72376316e-05]]),
 PynqBuffer(6.82287336e-05))