In [41]:
from pynq import Overlay
ol = Overlay('matrix.bit')

In [42]:
ol?

In [43]:
dma = ol.axi_dma_0

In [44]:
data_send = dma.sendchannel
data_recv = dma.recvchannel

In [45]:
import numpy as np
import random 

In [46]:
np.random.seed(2)
data_A = np.random.uniform(low=1, high=2, size=(100, 100)).astype(np.float32)
data_B = np.random.uniform(low=1, high=2, size=(100, 100)).astype(np.float32)

In [47]:
data_A, data_B

(array([[1.4359949, 1.0259262, 1.5496625, ..., 1.6614814, 1.1704772,
         1.8816522],
        [1.7780081, 1.1339542, 1.8689166, ..., 1.5702742, 1.9167147,
         1.7022642],
        [1.5012164, 1.5060868, 1.2188208, ..., 1.4366698, 1.0150561,
         1.8929058],
        ...,
        [1.847501 , 1.1313586, 1.7483047, ..., 1.6138644, 1.3013813,
         1.8357257],
        [1.5905588, 1.2815592, 1.4710027, ..., 1.655506 , 1.2187415,
         1.8921831],
        [1.7206911, 1.8850913, 1.7887506, ..., 1.7341762, 1.7731632,
         1.1732733]], dtype=float32),
 array([[1.0452094, 1.2608347, 1.8567044, ..., 1.2371148, 1.9927218,
         1.8107768],
        [1.1943675, 1.8347253, 1.1511418, ..., 1.9865398, 1.2742292,
         1.3714733],
        [1.3303086, 1.3614248, 1.1113402, ..., 1.9762256, 1.875261 ,
         1.8830626],
        ...,
        [1.4594083, 1.378022 , 1.2150073, ..., 1.5419246, 1.8748583,
         1.9212986],
        [1.1159571, 1.1270329, 1.3810983, ..., 1.49693  ,

In [48]:
def multiply(matrix_a, matrix_b):
    result_matrix = np.zeros((100,100))
    for i in range(100):
        for j in range(100):
            for k in range(100):
                result_matrix[i][j] += matrix_a[i][k] * matrix_b[k][j]
                
    return result_matrix

In [49]:
import time
start_time = time.time()
ans = multiply(data_A, data_B) #software
end_time = time.time()
print(end_time - start_time)

7.063893556594849


In [50]:
from pynq import allocate

In [51]:
data_a_f = data_A.flatten()
data_b_f = data_B.flatten()

data_input = np.concatenate([data_a_f, data_b_f])

In [52]:
data_input

array([1.4359949, 1.0259262, 1.5496625, ..., 1.0081687, 1.8708602,
       1.0389909], dtype=float32)

In [53]:
input_buffer = allocate(20000, np.float32)
output_buffer = allocate((100,100), np.float32)

In [54]:
np.copyto(input_buffer, data_input)

In [55]:
start_time = time.time()
data_send.transfer(input_buffer)
data_recv.transfer(output_buffer)
data_send.wait()
data_recv.wait()
end_time = time.time()
print(end_time - start_time)

0.0014595985412597656


In [56]:
output_buffer, ans

(PynqBuffer([[217.48277, 222.71255, 220.83287, ..., 219.4906 , 224.9741 ,
              217.24992],
             [220.3781 , 224.40662, 222.44363, ..., 221.762  , 224.92418,
              220.50197],
             [229.01155, 234.39165, 232.6488 , ..., 231.39522, 235.24248,
              227.72006],
             ...,
             [220.08456, 224.69226, 223.47789, ..., 222.72943, 228.50632,
              217.70721],
             [221.11069, 226.11174, 224.38988, ..., 222.25142, 228.01703,
              221.03624],
             [227.85168, 235.06366, 231.7223 , ..., 228.84613, 233.75198,
              228.59952]], dtype=float32),
 array([[217.48294699, 222.71273971, 220.83305275, ..., 219.49077332,
         224.97428167, 217.25010383],
        [220.37826884, 224.40679729, 222.44380903, ..., 221.76216829,
         224.92434335, 220.50214183],
        [229.01171958, 234.39184022, 232.64898372, ..., 231.39539874,
         235.24265206, 227.72024119],
        ...,
        [220.08473825, 224.6

In [57]:
diff = np.dot(data_A, data_B) - output_buffer

In [58]:
#rms error 
rms_error = np.sqrt(np.mean(diff**2))

In [59]:
diff

PynqBuffer([[0.00013733, 0.00019836, 0.00021362, ..., 0.00021362,
             0.00016785, 0.00013733],
            [0.00019836, 0.00012207, 0.00018311, ..., 0.00022888,
             0.00021362, 0.00019836],
            [0.00018311, 0.00019836, 0.00021362, ..., 0.00018311,
             0.00016785, 0.00012207],
            ...,
            [0.00018311, 0.00015259, 0.00013733, ..., 0.00018311,
             0.00016785, 0.00019836],
            [0.00019836, 0.00019836, 0.00019836, ..., 0.00013733,
             0.00015259, 0.00015259],
            [0.00012207, 0.0002594 , 0.00019836, ..., 0.00016785,
             0.00013733, 0.00013733]], dtype=float32)

In [60]:
rms_error

PynqBuffer(0.00017698, dtype=float32)

In [68]:
%%time

np.dot(data_A, data_B)

CPU times: user 17.5 ms, sys: 0 ns, total: 17.5 ms
Wall time: 7.58 ms


array([[217.48291, 222.71275, 220.83308, ..., 219.49081, 224.97427,
        217.25006],
       [220.3783 , 224.40674, 222.44382, ..., 221.76222, 224.9244 ,
        220.50217],
       [229.01173, 234.39185, 232.64902, ..., 231.3954 , 235.24265,
        227.72018],
       ...,
       [220.08475, 224.69241, 223.47803, ..., 222.72961, 228.50648,
        217.70741],
       [221.11089, 226.11194, 224.39008, ..., 222.25156, 228.01718,
        221.03639],
       [227.8518 , 235.06392, 231.7225 , ..., 228.8463 , 233.75212,
        228.59966]], dtype=float32)