In [18]:
from pynq import Overlay
ol = Overlay('matrix.bit')

In [19]:
dma = ol.axi_dma_0

In [20]:
data_send = dma.sendchannel
data_recv = dma.recvchannel

In [21]:
import numpy as np
import random 

In [22]:
np.random.seed(2)
data_A = np.random.uniform(low=0, high=10, size=(100, 100)).astype(float)
data_B = np.random.uniform(low=0, high=10, size=(100, 100)).astype(float)

In [23]:
data_A, data_B

(array([[4.35994902, 0.25926232, 5.49662478, ..., 6.61481327, 1.70477133,
         8.81652236],
        [7.7800816 , 1.33954208, 8.68916626, ..., 5.7027421 , 9.16714612,
         7.02264241],
        [5.01216452, 5.06086834, 2.18820786, ..., 4.3666988 , 0.15056134,
         8.92905883],
        ...,
        [8.47501085, 1.31358627, 7.48304695, ..., 6.13864418, 3.01381357,
         8.35725715],
        [5.90558724, 2.81559236, 4.71002749, ..., 6.55505965, 2.18741538,
         8.92183124],
        [7.20691133, 8.85091351, 7.88750645, ..., 7.34176132, 7.73163186,
         1.73273328]]),
 array([[0.45209358, 2.60834748, 8.56704315, ..., 2.37114778, 9.92721777,
         8.10776828],
        [1.94367587, 8.3472529 , 1.51141799, ..., 9.86539894, 2.7422922 ,
         3.71473337],
        [3.30308581, 3.61424772, 1.11340129, ..., 9.76225661, 8.75260893,
         8.83062626],
        ...,
        [4.59408327, 3.78022009, 2.15007284, ..., 5.41924595, 8.7485823 ,
         9.21298658],
        [1.1

In [24]:
def multiply(matrix_a, matrix_b):
    result_matrix = np.zeros((100,100))
    for i in range(100):
        for j in range(100):
            for k in range(100):
                result_matrix[i][j] += matrix_a[i][k] * matrix_b[k][j]
                
    return result_matrix

In [25]:
import time
start_time = time.time()
multiply(data_A, data_B) #software
end_time = time.time()
print(end_time - start_time)

19.89862895011902


In [26]:
from pynq import allocate

In [27]:
data_a_f = data_A.flatten()
data_b_f = data_B.flatten()

data_input = np.concatenate([data_a_f, data_b_f])

In [28]:
data_input

array([4.35994902, 0.25926232, 5.49662478, ..., 0.08168716, 8.70860167,
       0.38990867])

In [29]:
input_buffer = allocate(20000, np.float32)
output_buffer = allocate((100,100), np.float32)

In [30]:
np.copyto(input_buffer, data_input)

In [31]:
start_time = time.time()
data_send.transfer(input_buffer)
data_recv.transfer(output_buffer)
data_send.wait()
data_recv.wait()
end_time = time.time()
print(end_time - start_time)

0.003995180130004883


In [32]:
output_buffer

PynqBuffer([[2259.3535, 2410.1992, 2385.4102, ..., 2320.123 , 2582.3887,
             2270.7012],
            [2405.375 , 2436.127 , 2403.0078, ..., 2403.7441, 2433.9434,
             2452.3926],
            [2655.0586, 2820.9414, 2809.914 , ..., 2753.498 , 2852.0996,
             2560.5645],
            ...,
            [2328.9238, 2417.623 , 2459.414 , ..., 2453.4453, 2745.0508,
             2125.873 ],
            [2397.162 , 2525.2227, 2516.1816, ..., 2371.248 , 2661.7344,
             2424.3887],
            [2578.416 , 2927.5   , 2756.5645, ..., 2537.8926, 2742.3809,
             2687.8613]], dtype=float32)

In [33]:
diff = np.dot(data_A, data_B) - output_buffer

In [34]:
#rms error 
rms_error = np.sqrt(np.mean(diff**2))

In [35]:
diff

PynqBuffer([[0.93383598, 1.06261225, 1.02964051, ..., 1.01186692,
             1.04670748, 1.00848469],
            [0.97246493, 1.06856966, 1.03561987, ..., 1.05811233,
             1.02598922, 1.04891241],
            [1.08255719, 1.20690335, 1.0953939 , ..., 1.07573574,
             1.1492385 , 1.13551354],
            ...,
            [1.03919995, 1.10446107, 1.02353405, ..., 1.07018318,
             1.1016384 , 1.06103724],
            [1.00137359, 1.04311261, 1.04364932, ..., 1.05399364,
             1.07767315, 1.03793857],
            [1.00260251, 1.1132308 , 1.05929982, ..., 1.03667905,
             1.08202525, 1.04680624]])

In [36]:
rms_error

PynqBuffer(1.07059775)

In [38]:
%%time

np.dot(data_A, data_B)

CPU times: user 0 ns, sys: 8.76 ms, total: 8.76 ms
Wall time: 6.19 ms


array([[2260.2873516 , 2411.261831  , 2386.43979676, ..., 2321.13491379,
        2583.43537935, 2271.70965656],
       [2406.34746493, 2437.19552278, 2404.04343237, ..., 2404.80225296,
        2434.96934859, 2453.44149054],
       [2656.14115094, 2822.1483096 , 2811.0094564 , ..., 2754.57378261,
        2853.24884787, 2561.69996666],
       ...,
       [2329.96302807, 2418.72750794, 2460.43759655, ..., 2454.51549568,
        2746.15241965, 2126.93408412],
       [2398.16348297, 2526.26576886, 2517.22528995, ..., 2372.30204052,
        2662.81204815, 2425.42661045],
       [2579.41861813, 2928.6132308 , 2757.62375294, ..., 2538.92925718,
        2743.46288463, 2688.90813436]])