In [1]:
import numpy as np
import time
from pynq import Overlay, allocate

In [3]:
# Load the overlay (make sure the bitstream is compiled from your HLS kernel)
ol = Overlay("sistema.bit")

# Get the kernel instance (the instance name depends on your design, here assumed to be cmpx_conv_0)
kr = ol.cmpx_conv_0

In [7]:
kr.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, INTERRUPT=0, RESERVED_3=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED_0=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED_0=0),
  A_1 = Register(A=write-only),
  A_2 = Register(A=write-only),
  B_1 = Register(B=write-only),
  B_2 = Register(B=write-only),
  C_1 = Register(C=write-only),
  C_2 = Register(C=write-only)
}

In [8]:
# Define the array size
N = 8192

# Allocate contiguous memory for the complex arrays
# np.complex64 uses 32-bit float for both real and imaginary parts.
A = allocate(shape=(N,), dtype=np.complex64)
B = allocate(shape=(N,), dtype=np.complex64)
C = allocate(shape=(N,), dtype=np.complex64)

# Initialize A and B with random complex numbers
A[:] = np.random.rand(N).astype(np.float32) + 1j * np.random.rand(N).astype(np.float32)
B[:] = np.random.rand(N).astype(np.float32) + 1j * np.random.rand(N).astype(np.float32)


In [9]:
# Write the physical addresses of the arrays to the registers using the generated register map
kr.write(kr.register_map.A_1.address, A.physical_address)
kr.write(kr.register_map.B_1.address, B.physical_address)
kr.write(kr.register_map.C_1.address, C.physical_address)

In [10]:
# Start the kernel by setting the AP_START bit in the control register
kr.register_map.CTRL.AP_START = 1

# Optionally wait for a short while (or poll the status) for the kernel to complete
time.sleep(3)

In [11]:
# Print the first 10 output values from the hardware convolution
print("Hardware result (first 10 elements):")
print(C[:10])

Hardware result (first 10 elements):
[-35.097935 +4088.7727j -27.203289 +4099.961j   -7.48085  +4084.0557j
 -17.369724 +4089.3137j -11.051613 +4081.9019j -36.34986  +4092.7888j
 -16.589727 +4073.1777j  -1.4390953+4076.2239j -10.643052 +4098.7056j
  -7.0256085+4087.7441j]


In [12]:
# Compute the reference circular convolution in Python using FFT:
# Circular convolution can be computed via IFFT( FFT(A) * FFT(B) )
C_ref = np.fft.ifft(np.fft.fft(A) * np.fft.fft(B))
C_ref = C_ref.astype(np.complex64)  # Cast to complex64 for fair comparison

print("Software reference result (first 10 elements):")
print(C_ref[:10])

Software reference result (first 10 elements):
[-35.09794  +4088.7773j -27.203196 +4099.9644j  -7.4808664+4084.0562j
 -17.36969  +4089.3123j -11.051599 +4081.9077j -36.349987 +4092.7927j
 -16.589695 +4073.183j   -1.4391016+4076.2258j -10.643078 +4098.703j
  -7.025629 +4087.7415j]


In [13]:
# When finished, free the allocated buffers (if desired)
A.freebuffer()
B.freebuffer()
C.freebuffer()