In [22]:
from pynq import Overlay
ol = Overlay('matrix.bit')

In [23]:
ol?

In [24]:
dma = ol.axi_dma_0

In [25]:
data_send = dma.sendchannel
data_recv = dma.recvchannel

In [26]:
import numpy as np
import random 

In [27]:
np.random.seed(2)
data_A = np.random.uniform(low=0, high=1, size=(100, 100)).astype(np.float32)
data_B = np.random.uniform(low=0, high=1, size=(100, 100)).astype(np.float32)

In [28]:
data_A, data_B

(array([[0.4359949 , 0.02592623, 0.5496625 , ..., 0.6614813 , 0.17047714,
         0.88165224],
        [0.77800816, 0.13395421, 0.86891663, ..., 0.57027423, 0.9167146 ,
         0.70226425],
        [0.5012165 , 0.5060868 , 0.21882078, ..., 0.4366699 , 0.01505613,
         0.8929059 ],
        ...,
        [0.8475011 , 0.13135862, 0.7483047 , ..., 0.6138644 , 0.30138135,
         0.8357257 ],
        [0.5905587 , 0.28155923, 0.47100276, ..., 0.65550596, 0.21874154,
         0.8921831 ],
        [0.72069114, 0.88509136, 0.78875065, ..., 0.73417616, 0.7731632 ,
         0.17327332]], dtype=float32),
 array([[0.04520936, 0.26083475, 0.8567043 , ..., 0.23711477, 0.9927218 ,
         0.8107768 ],
        [0.19436759, 0.83472526, 0.15114179, ..., 0.9865399 , 0.27422923,
         0.37147334],
        [0.3303086 , 0.36142477, 0.11134013, ..., 0.9762257 , 0.8752609 ,
         0.8830626 ],
        ...,
        [0.4594083 , 0.37802202, 0.21500729, ..., 0.5419246 , 0.8748582 ,
         0.9212987 

In [29]:
def multiply(matrix_a, matrix_b):
    result_matrix = np.zeros((100,100))
    for i in range(100):
        for j in range(100):
            for k in range(100):
                result_matrix[i][j] += matrix_a[i][k] * matrix_b[k][j]
                
    return result_matrix

In [30]:
import time
start_time = time.time()
ans = multiply(data_A, data_B) #software
end_time = time.time()
print(end_time - start_time)

6.890790700912476


In [31]:
from pynq import allocate

In [32]:
data_a_f = data_A.flatten()
data_b_f = data_B.flatten()

data_input = np.concatenate([data_a_f, data_b_f])

In [33]:
data_input

array([0.4359949 , 0.02592623, 0.5496625 , ..., 0.00816872, 0.87086016,
       0.03899087], dtype=float32)

In [34]:
input_buffer = allocate(20000, np.float32)
output_buffer = allocate((100,100), np.float32)

In [35]:
np.copyto(input_buffer, data_input)

In [36]:
start_time = time.time()
data_send.transfer(input_buffer)
data_recv.transfer(output_buffer)
data_send.wait()
data_recv.wait()
end_time = time.time()
print(end_time - start_time)

0.002061128616333008


In [37]:
output_buffer, ans

(PynqBuffer([[22.602877, 24.11262 , 23.864393, ..., 23.211348, 25.834352,
              22.717093],
             [24.063475, 24.371958, 24.040434, ..., 24.048021, 24.349699,
              24.534414],
             [26.561407, 28.221485, 28.110092, ..., 27.545736, 28.532488,
              25.616997],
             ...,
             [23.29963 , 24.187273, 24.604368, ..., 24.545145, 27.46152 ,
              21.26934 ],
             [23.98164 , 25.262651, 25.172256, ..., 23.723015, 26.628115,
              24.254265],
             [25.794188, 29.286129, 27.576231, ..., 25.389294, 27.434633,
              26.889084]], dtype=float32),
 array([[22.60287335, 24.11261823, 23.86439793, ..., 23.21134907,
         25.83435382, 22.7170966 ],
        [24.0634748 , 24.3719555 , 24.04043433, ..., 24.04802274,
         24.34969362, 24.53441502],
        [26.56141167, 28.22148338, 28.11009454, ..., 27.54573791,
         28.53248852, 25.61699982],
        ...,
        [23.29963047, 24.1872752 , 24.60437588

In [38]:
diff = np.dot(data_A, data_B) - output_buffer

In [39]:
#rms error 
rms_error = np.sqrt(np.mean(diff**2))

In [40]:
diff

PynqBuffer([[ 0.0000000e+00,  1.9073486e-06,  1.9073486e-06, ...,
              0.0000000e+00,  0.0000000e+00,  1.9073486e-06],
            [ 1.9073486e-06,  1.9073486e-06,  0.0000000e+00, ...,
              0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
            [ 0.0000000e+00, -1.9073486e-06,  0.0000000e+00, ...,
              0.0000000e+00,  0.0000000e+00,  1.9073486e-06],
            ...,
            [-1.9073486e-06, -1.9073486e-06,  0.0000000e+00, ...,
              0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
            [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
              0.0000000e+00,  1.9073486e-06,  0.0000000e+00],
            [-1.9073486e-06,  0.0000000e+00,  0.0000000e+00, ...,
              0.0000000e+00,  0.0000000e+00,  0.0000000e+00]],
           dtype=float32)

In [41]:
rms_error

PynqBuffer(1.2574043e-06, dtype=float32)

In [42]:
%%time

np.dot(data_A, data_B)

CPU times: user 1.89 ms, sys: 104 µs, total: 1.99 ms
Wall time: 1.18 ms


array([[22.602877, 24.112621, 23.864395, ..., 23.211348, 25.834352,
        22.717094],
       [24.063477, 24.37196 , 24.040434, ..., 24.048021, 24.349699,
        24.534414],
       [26.561407, 28.221483, 28.110092, ..., 27.545736, 28.532488,
        25.616999],
       ...,
       [23.299627, 24.187271, 24.604368, ..., 24.545145, 27.46152 ,
        21.26934 ],
       [23.98164 , 25.262651, 25.172256, ..., 23.723015, 26.628117,
        24.254265],
       [25.794186, 29.286129, 27.576231, ..., 25.389294, 27.434633,
        26.889084]], dtype=float32)