In [3]:
import numpy as np
import time
from pynq import Overlay, allocate

In [4]:
# Load the overlay (ensure the bitstream is compiled from the HLS kernel above)
ol = Overlay("sistema.bit")

# Get the kernel instance (adjust the instance name if needed; here assumed to be cmpx_matmul_0)
help(ol)

Help on Overlay in module pynq.overlay:

<pynq.overlay.Overlay object>
    Default documentation for overlay sistema.bit. The following
    attributes are available on this overlay:
    
    IP Blocks
    ----------
    cmpx_mat_product_0   : pynq.overlay.DefaultIP
    axi_intc_0           : pynq.overlay.DefaultIP
    zynq_ultra_ps_e_0    : pynq.overlay.DefaultIP
    
    Hierarchies
    -----------
    None
    
    Interrupts
    ----------
    None
    
    GPIO Outputs
    ------------
    None
    
    Memories
    ------------
    PSDDR                : Memory



In [5]:
kr = ol.cmpx_mat_product_0
print(kr.register_map)

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, INTERRUPT=0, RESERVED_3=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED_0=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED_0=0),
  Ma_1 = Register(Ma=write-only),
  Ma_2 = Register(Ma=write-only),
  Mb_1 = Register(Mb=write-only),
  Mb_2 = Register(Mb=write-only),
  Mc_1 = Register(Mc=write-only),
  Mc_2 = Register(Mc=write-only)
}


In [5]:
# Define matrix dimensions
DIM = 48

# Allocate contiguous memory for the matrices
# np.complex64 gives 32-bit float for both real and imaginary parts.
Ma = allocate(shape=(DIM, DIM), dtype=np.complex64)
Mb = allocate(shape=(DIM, DIM), dtype=np.complex64)
Mc = allocate(shape=(DIM, DIM), dtype=np.complex64)

# Initialize Ma and Mb with random complex numbers
Ma[:] = np.random.rand(DIM, DIM).astype(np.float32) + 1j * np.random.rand(DIM, DIM).astype(np.float32)
Mb[:] = np.random.rand(DIM, DIM).astype(np.float32) + 1j * np.random.rand(DIM, DIM).astype(np.float32)

In [6]:
# Write the physical addresses of the matrices to the registers.
# Depending on the synthesis, the register map names might be "Ma_1", "Mb_1", "Mc_1"
kr.write(kr.register_map.Ma_1.address, Ma.physical_address)
kr.write(kr.register_map.Mb_1.address, Mb.physical_address)
kr.write(kr.register_map.Mc_1.address, Mc.physical_address)

In [7]:
# Start the kernel by setting the AP_START bit in the control register
kr.register_map.CTRL.AP_START = 1

# Wait for the kernel to complete (here a simple sleep is used; for robust operation poll AP_DONE)
time.sleep(1)

In [8]:
# Print the hardware result
print("Hardware result (first 3 rows):")
print(Mc[:3, :])

Hardware result (first 3 rows):
[[ 1.9975162 +23.91357j  -0.06169912+23.021677j  0.5170438 +24.86861j
  -1.4208806 +23.116474j  0.43042678+25.184177j  1.4733404 +25.134941j
   0.60335016+23.2114j   -3.4570482 +23.531248j  1.7270083 +22.387402j
   0.89515996+22.63974j  -1.2681922 +24.41797j   0.71289855+25.6054j
  -2.1101565 +24.686975j  0.10804204+22.85642j  -3.345058  +22.976242j
  -1.0471436 +24.973436j  1.620372  +24.368078j -3.9838064 +21.357918j
  -1.3961053 +26.749235j -1.785009  +22.6533j   -3.5124109 +26.73857j
  -1.4901825 +20.145832j  0.7135203 +22.95981j  -0.755782  +23.399311j
  -2.5616448 +23.356674j  0.8516019 +23.405968j  0.23703276+23.180199j
  -2.9315648 +26.620232j -0.7656517 +27.414686j  1.7605522 +26.242971j
   0.6924521 +24.766558j -0.60665154+25.886286j -0.44404155+23.36217j
  -1.1087257 +24.37495j  -3.6230104 +24.626722j -1.4417003 +24.423834j
  -5.2687964 +25.3817j   -1.800384  +22.697025j -4.1906896 +24.00359j
   1.585137  +25.825829j  0.11902107+26.512812j -1.

In [9]:
# Compute a software reference for matrix multiplication
Mc_ref = np.matmul(Ma, Mb)

print("Software reference result (first 3 rows):")
print(Mc_ref[:3, :])

Software reference result (first 3 rows):
[[ 1.9975162 +23.913565j -0.06169902+23.021673j  0.5170439 +24.868612j
  -1.4208815 +23.116468j  0.43042707+25.18418j   1.4733405 +25.134943j
   0.6033504 +23.211397j -3.457048  +23.531248j  1.7270098 +22.387398j
   0.8951645 +22.639742j -1.2681913 +24.417976j  0.7128973 +25.605404j
  -2.1101573 +24.686968j  0.10804202+22.856422j -3.3450582 +22.976252j
  -1.0471436 +24.973434j  1.6203725 +24.36808j  -3.9838078 +21.357918j
  -1.3961048 +26.749237j -1.7850095 +22.6533j   -3.512412  +26.738567j
  -1.4901838 +20.145836j  0.7135172 +22.959814j -0.7557802 +23.399307j
  -2.5616446 +23.356674j  0.8516013 +23.40597j   0.23703265+23.180199j
  -2.9315643 +26.620235j -0.7656517 +27.41468j   1.7605518 +26.24297j
   0.69245213+24.766544j -0.6066514 +25.886286j -0.4440403 +23.362171j
  -1.1087265 +24.374947j -3.6230097 +24.62672j  -1.4417019 +24.423828j
  -5.268797  +25.381697j -1.8003831 +22.697023j -4.1906905 +24.003592j
   1.5851365 +25.82583j   0.119021  

In [10]:
# Optionally compare the two (for example, by computing the error)
error = np.linalg.norm(Mc - Mc_ref)
print("Frobenius norm of difference:", error)

Frobenius norm of difference: 0.00019124227


In [11]:
# When finished, free the allocated buffers (if desired)
Ma.freebuffer()
Mb.freebuffer()
Mc.freebuffer()