following: https://discuss.pynq.io/t/tutorial-using-a-hls-stream-ip-with-dma-part-3-using-the-hls-ip-from-pynq/3346

In [1]:
from pynq import Overlay

In [2]:
!ls /home/xilinx/pynq/overlays/

adder	 dma_prequel   fir_accel   __init__.py	move_ab        __pycache__
add_vec  dma_sequel    fir_accel2  linreg_vadd	new_adder
base	 dma_sequel_2  fir_accel3  logictools	new_fir_accel


In [3]:
!ls /home/xilinx/pynq/overlays/dma_sequel_2

dma_sequel.bit	dma_sequel.hwh	dma_sequel.tcl


In [4]:
ol = Overlay("/home/xilinx/pynq/overlays/dma_sequel_2/dma_sequel.bit")

In [5]:
help(ol)

Help on Overlay in module pynq.overlay:

<pynq.overlay.Overlay object>
    Default documentation for overlay /home/xilinx/pynq/overlays/dma_sequel_2/dma_sequel.bit. The following
    attributes are available on this overlay:
    
    IP Blocks
    ----------
    example_0            : pynq.overlay.DefaultIP
    axi_dma_0            : pynq.lib.dma.DMA
    processing_system7_0 : pynq.overlay.DefaultIP
    
    Hierarchies
    -----------
    None
    
    Interrupts
    ----------
    None
    
    GPIO Outputs
    ------------
    None
    
    Memories
    ------------
    PSDDR                : Memory



In [7]:
dma = ol.axi_dma_0

In [8]:
help(dma)

Help on DMA in module pynq.lib.dma object:

class DMA(pynq.overlay.DefaultIP)
 |  DMA(description, *args, **kwargs)
 |  
 |  Class for Interacting with the AXI Simple DMA Engine
 |  
 |  This class provides two attributes for the read and write channels.
 |  The read channel copies data from the stream into memory and
 |  the write channel copies data from memory to the output stream.
 |  Both channels have an identical API consisting of `transfer` and
 |  `wait` functions. If interrupts have been enabled and connected
 |  for the DMA engine then `wait_async` is also present.
 |  
 |  Buffers to be transferred must be a `PynqBuffer` object allocated
 |  through `pynq.allocate()` function either directly or indirectly. This
 |  means that Frames from the video subsystem can be transferred using
 |  this class.
 |  
 |  Attributes
 |  ----------
 |  recvchannel : _SDMAChannel / _SGDMAChannel
 |      The stream to memory channel  (if enabled in hardware)
 |  sendchannel : _SDMAChannel / _

In [9]:
dma.buffer_max_size

67108863

In [10]:
e = ol.example_0

In [11]:
help(e)

Help on DefaultIP in module pynq.overlay object:

class DefaultIP(builtins.object)
 |  DefaultIP(description)
 |  
 |  Driver for an IP without a more specific driver
 |  
 |  This driver wraps an MMIO device and provides a base class
 |  for more specific drivers written later. It also provides
 |  access to GPIO outputs and interrupts inputs via attributes. More specific
 |  drivers should inherit from `DefaultIP` and include a
 |  `bindto` entry containing all of the IP that the driver
 |  should bind to. Subclasses meeting these requirements will
 |  automatically be registered.
 |  
 |  Attributes
 |  ----------
 |  mmio : pynq.MMIO
 |      Underlying MMIO driver for the device
 |  _interrupts : dict
 |      Subset of the PL.interrupt_pins related to this IP
 |  _gpio : dict
 |      Subset of the PL.gpio_dict related to this IP
 |  
 |  Methods defined here:
 |  
 |  __init__(self, description)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  read(se

In [12]:
e.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, INTERRUPT=0, RESERVED_3=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED_0=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED_0=0)
}

## Create aliases

In [13]:
dma = ol.axi_dma_0
dma_send = ol.axi_dma_0.sendchannel
dma_recv = ol.axi_dma_0.recvchannel

hls_ip = ol.example_0

In [14]:
hls_ip.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, INTERRUPT=0, RESERVED_3=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED_0=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED_0=0)
}

Note that the HLS IP is not started yet (AP_START=0). You can also see the IP is idle (AP_IDLE=1).

We will start the HLS IP and then start some transfers from the DMA.

We could initiate the DMA transfers first if we preferred. The DMA transfers would stall until the IP is started.


## Start the HLS IP

We can start the HLS IP by writing 0x81 to the control register. This will set bit 0 (AP_START) to “1” and bit 7 (AUTO_RESTART) to “1”. AUTO_RESTART means the IP will run continuously. If we don’t set this then after the IP completes one full operation or iteration, it will stop and wait until AP_START is set again. We would have to set this every time we want the IP to process some data.

In [15]:
CONTROL_REGISTER = 0x0
hls_ip.write(CONTROL_REGISTER, 0x81)  # 0x81 will set bit 0

Check the correct bits have been set.



In [16]:
hls_ip.register_map

RegisterMap {
  CTRL = Register(AP_START=1, AP_DONE=0, AP_IDLE=0, AP_READY=0, RESERVED_1=0, AUTO_RESTART=1, RESERVED_2=0, INTERRUPT=0, RESERVED_3=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED_0=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED_0=0)
}

## DMA send

In [17]:
from pynq import allocate
import numpy as np

data_size = 100
input_buffer = allocate(shape=(data_size,), dtype=np.uint32)

In [18]:
for i in range(data_size):
    input_buffer[i] = i

In [19]:
dma_send.transfer(input_buffer)

In [21]:
for i in range(10):
    print("0x" + format(input_buffer[i], "02x"))

0x00
0x01
0x02
0x03
0x04
0x05
0x06
0x07
0x08
0x09


## DMA receive

Readback data from the HLS IP and store in DRAM. Start by creating the output buffer



In [22]:
output_buffer = allocate(shape=(data_size,), dtype=np.uint32)

In [23]:
dma_recv.transfer(output_buffer)

In [24]:
for i in range(10):
    print("0x" + format(output_buffer[i], "02x"))

0x05
0x06
0x07
0x08
0x09
0x0a
0x0b
0x0c
0x0d
0x0e


In [25]:
print("Arrays are equal: {}".format(np.array_equal(input_buffer, output_buffer - 5)))

Arrays are equal: True


In [26]:
input_buffer

PynqBuffer([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
            15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
            30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
            60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
            75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
            90, 91, 92, 93, 94, 95, 96, 97, 98, 99], dtype=uint32)

In [27]:
output_buffer

PynqBuffer([  5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,
             17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,
             29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
             41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
             53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
             65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,
             77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,
             89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100,
            101, 102, 103, 104], dtype=uint32)

**there was a problem**: we are transferring 64 bits => notice that 0 goes to 5, but the second number is 7 (2+5)

fixed somehow:
- memory map data width is 64
- stream data width is 32

In [28]:
hls_ip.register_map

RegisterMap {
  CTRL = Register(AP_START=1, AP_DONE=0, AP_IDLE=0, AP_READY=0, RESERVED_1=0, AUTO_RESTART=1, RESERVED_2=0, INTERRUPT=0, RESERVED_3=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED_0=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED_0=0)
}

In [40]:
del input_buffer, output_buffer

In [41]:
ol.free()

In [42]:
import gc

gc.collect()

925

# OLD

In [23]:
dma_send = ol.axi_dma_0.sendchannel
dma_recv = ol.axi_dma_0.recvchannel

In [24]:
from pynq import allocate
import numpy as np

data_size = 1000
input_buffer = allocate(shape=(data_size,), dtype=np.uint32)

In [25]:
for i in range(data_size):
    input_buffer[i] = i + 0xCAFE0000

Let’s check the contents of the array. The data in the following cell will be sent from PS (DDR memory) to PL (streaming FIFO).

In [26]:
for i in range(10):
    print(hex(input_buffer[i]))

0xcafe0000
0xcafe0001
0xcafe0002
0xcafe0003
0xcafe0004
0xcafe0005
0xcafe0006
0xcafe0007
0xcafe0008
0xcafe0009


Now we are ready to carry out DMA transfer from a memory block in DDR to FIFO.



In [27]:
dma_send.transfer(input_buffer)

Let’s read the data back from FIFO stream, and write to MM memory. The steps are similar.

We will prepare an empty array before reading data back from FIFO.



In [28]:
output_buffer = allocate(shape=(data_size,), dtype=np.uint32)

for i in range(10):
    print("0x" + format(output_buffer[i], "02x"))

0x00
0x00
0x00
0x00
0x00
0x00
0x00
0x00
0x00
0x00


In [29]:
dma_recv.transfer(output_buffer)

In [30]:
for i in range(10):
    print("0x" + format(output_buffer[i], "02x"))

0xcafe0000
0xcafe0001
0xcafe0002
0xcafe0003
0xcafe0004
0xcafe0005
0xcafe0006
0xcafe0007
0xcafe0008
0xcafe0009


In [31]:
print("Arrays are equal: {}".format(np.array_equal(input_buffer, output_buffer)))

Arrays are equal: True


In [32]:
input_buffer

PynqBuffer([3405643776, 3405643777, 3405643778, 3405643779, 3405643780,
            3405643781, 3405643782, 3405643783, 3405643784, 3405643785,
            3405643786, 3405643787, 3405643788, 3405643789, 3405643790,
            3405643791, 3405643792, 3405643793, 3405643794, 3405643795,
            3405643796, 3405643797, 3405643798, 3405643799, 3405643800,
            3405643801, 3405643802, 3405643803, 3405643804, 3405643805,
            3405643806, 3405643807, 3405643808, 3405643809, 3405643810,
            3405643811, 3405643812, 3405643813, 3405643814, 3405643815,
            3405643816, 3405643817, 3405643818, 3405643819, 3405643820,
            3405643821, 3405643822, 3405643823, 3405643824, 3405643825,
            3405643826, 3405643827, 3405643828, 3405643829, 3405643830,
            3405643831, 3405643832, 3405643833, 3405643834, 3405643835,
            3405643836, 3405643837, 3405643838, 3405643839, 3405643840,
            3405643841, 3405643842, 3405643843, 3405643844, 3405

In [33]:
output_buffer

PynqBuffer([3405643776, 3405643777, 3405643778, 3405643779, 3405643780,
            3405643781, 3405643782, 3405643783, 3405643784, 3405643785,
            3405643786, 3405643787, 3405643788, 3405643789, 3405643790,
            3405643791, 3405643792, 3405643793, 3405643794, 3405643795,
            3405643796, 3405643797, 3405643798, 3405643799, 3405643800,
            3405643801, 3405643802, 3405643803, 3405643804, 3405643805,
            3405643806, 3405643807, 3405643808, 3405643809, 3405643810,
            3405643811, 3405643812, 3405643813, 3405643814, 3405643815,
            3405643816, 3405643817, 3405643818, 3405643819, 3405643820,
            3405643821, 3405643822, 3405643823, 3405643824, 3405643825,
            3405643826, 3405643827, 3405643828, 3405643829, 3405643830,
            3405643831, 3405643832, 3405643833, 3405643834, 3405643835,
            3405643836, 3405643837, 3405643838, 3405643839, 3405643840,
            3405643841, 3405643842, 3405643843, 3405643844, 3405

## Check DMA status, and trigger an error


In [36]:
dma_recv.error

False

In [38]:
dma_recv.idle

True

In [39]:
dma_recv.transfer(output_buffer)

In [40]:
dma_recv.idle

False

Start another receive transfer while the DMA is not idle



In [41]:
dma_recv.transfer(output_buffer)

RuntimeError: DMA channel not idle

We can check the running state of the DMA



In [42]:
dma_recv.running

True

In [43]:
dma_recv.idle

False

In [44]:
dma_send.idle

True

## Check the DMA register map


We can read back individual status bits as show above. It can be useful to read back the full register map which will give details on all control and status bits. The meaning of each register and each bit will not be covered. For more details you can refer to the product guide for the DMA.



In [45]:
dma.register_map

RegisterMap {
  MM2S_DMACR = Register(RS=1, Reset=0, Keyhole=0, Cyclic_BD_Enable=0, IOC_IrqEn=0, Dly_IrqEn=0, Err_IrqEn=0, IRQThreshold=1, IRQDelay=0),
  MM2S_DMASR = Register(Halted=0, Idle=1, SGIncld=0, DMAIntErr=0, DMASlvErr=0, DMADecErr=0, SGIntErr=0, SGSlvErr=0, SGDecErr=0, IOC_Irq=1, Dly_Irq=0, Err_Irq=0, IRQThresholdSts=0, IRQDelaySts=0),
  MM2S_CURDESC = Register(Current_Descriptor_Pointer=0),
  MM2S_CURDESC_MSB = Register(Current_Descriptor_Pointer=0),
  MM2S_TAILDESC = Register(Tail_Descriptor_Pointer=0),
  MM2S_TAILDESC_MSB = Register(Tail_Descriptor_Pointer=0),
  MM2S_SA = Register(Source_Address=377786368),
  MM2S_SA_MSB = Register(Source_Address=0),
  MM2S_LENGTH = Register(Length=4000),
  SG_CTL = Register(SG_CACHE=0, SG_USER=0),
  S2MM_DMACR = Register(RS=1, Reset=0, Keyhole=0, Cyclic_BD_Enable=0, IOC_IrqEn=0, Dly_IrqEn=0, Err_IrqEn=0, IRQThreshold=1, IRQDelay=0),
  S2MM_DMASR = Register(Halted=0, Idle=0, SGIncld=0, DMAIntErr=0, DMASlvErr=0, DMADecErr=0, SGIntErr=0, SGS

In [46]:
print("Input buffer address   :", hex(input_buffer.physical_address))
print("Output buffer address  :", hex(output_buffer.physical_address))
print("---")
print("DMA Source address     :", hex(dma.register_map.MM2S_SA.Source_Address))
print("DMA Destination address:", hex(dma.register_map.S2MM_DA.Destination_Address))

Input buffer address   : 0x16849000
Output buffer address  : 0x1684a000
---
DMA Source address     : 0x16849000
DMA Destination address: 0x1684a000


## Free all memory

In [47]:
del input_buffer, output_buffer

In [49]:
ol.free()

In [50]:
help(ol.free)

Help on method free in module pynq.overlay:

free() method of pynq.overlay.Overlay instance



In [52]:
help(ol)

Help on Overlay in module pynq.overlay:

<pynq.overlay.Overlay object>
    Default documentation for overlay /home/xilinx/pynq/overlays/dma_prequel/dma_prequel.bit. The following
    attributes are available on this overlay:
    
    IP Blocks
    ----------
    axi_dma_0            : pynq.lib.dma.DMA
    processing_system7_0 : pynq.overlay.DefaultIP
    
    Hierarchies
    -----------
    None
    
    Interrupts
    ----------
    None
    
    GPIO Outputs
    ------------
    None
    
    Memories
    ------------
    PSDDR                : Memory



# OLD

In [16]:
tl = overlay.toplevel_0
help(tl)

Help on DefaultIP in module pynq.overlay object:

class DefaultIP(builtins.object)
 |  DefaultIP(description)
 |  
 |  Driver for an IP without a more specific driver
 |  
 |  This driver wraps an MMIO device and provides a base class
 |  for more specific drivers written later. It also provides
 |  access to GPIO outputs and interrupts inputs via attributes. More specific
 |  drivers should inherit from `DefaultIP` and include a
 |  `bindto` entry containing all of the IP that the driver
 |  should bind to. Subclasses meeting these requirements will
 |  automatically be registered.
 |  
 |  Attributes
 |  ----------
 |  mmio : pynq.MMIO
 |      Underlying MMIO driver for the device
 |  _interrupts : dict
 |      Subset of the PL.interrupt_pins related to this IP
 |  _gpio : dict
 |      Subset of the PL.gpio_dict related to this IP
 |  
 |  Methods defined here:
 |  
 |  __init__(self, description)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  read(se

In [17]:
tl.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, INTERRUPT=0, RESERVED_3=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED_0=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED_0=0),
  a_1 = Register(a=write-only),
  a_2 = Register(a=write-only),
  length_r = Register(length_r=write-only),
  value_r = Register(value_r=write-only)
}

In [18]:
from pynq import allocate
import numpy as np

py_buffer = allocate(shape=(100,), dtype=np.int32)

In [19]:
print("py_buffer physical address {}".format(hex(py_buffer.physical_address)))

py_buffer physical address 0x1684a000


## Configure the IP

Write the values of the memory address, the length, and the value to be added to each location. You must make sure that length doesn't exceed the lendth of the memory that we allocated earlier AND the maximum value that can be set in the HLS IP. The buffe

In [21]:
tl.register_map.a_1 = py_buffer.physical_address
tl.register_map.length_r = 100
tl.register_map.value_r = 7

## Start the IP

In [22]:
tl.register_map.CTRL.AP_START = 1

If we start the IP once, when the IP completes an iteration DONE will go high once. While the IP is working, AP_IDLE will be 0. In this example, the time for an iteration of the HLS IP is very low so you will probably not see AP_IDLE = 0.

Done is cleared when it is read. There is a limitation with the register_map. Multiple reads to the same register may be made to collect the information required for the register_map. This may mean DONE is cleared while the values of some of the other bits and registers are being read and consequentally will not show as high in the register m

In [23]:
tl.register_map.CTRL.AP_DONE

1

In [24]:
tl.register_map.CTRL.AP_IDLE

1

If you have a long running HLS IP, you can poll on the DONE bit before continuing with your application.

## Check the contents of the memory

In [25]:
py_buffer

PynqBuffer([7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7])

In [26]:
len(py_buffer)

100

In [None]:
my_ip.register_map.CTRL.AUTO_RESTART = 1

In [None]:
# %xdel in1_vadd
# %xdel in2_vadd
# %xdel out
overlay.free()

# OLD

In [21]:
overlay.scalar_add.add(15, 20)

35

In [8]:
add_ip.write(0x10, 4)
add_ip.write(0x18, 5)
add_ip.read(0x20)

9

In [18]:
from pynq import DefaultIP


class AddDriver(DefaultIP):
    def __init__(self, description):
        super().__init__(description=description)

    bindto = ["xilinx.com:hls:add:1.0"]

    def add(self, a, b):
        self.write(0x10, a)
        self.write(0x18, b)
        return self.read(0x20)

In [19]:
overlay = Overlay("/home/xilinx/pynq/overlays/adder/adder.bit")

In [20]:
help(overlay)

Help on Overlay in module pynq.overlay:

<pynq.overlay.Overlay object>
    Default documentation for overlay /home/xilinx/pynq/overlays/adder/adder.bit. The following
    attributes are available on this overlay:
    
    IP Blocks
    ----------
    scalar_add           : __main__.AddDriver
    processing_system7_0 : pynq.overlay.DefaultIP
    
    Hierarchies
    -----------
    None
    
    Interrupts
    ----------
    None
    
    GPIO Outputs
    ------------
    None
    
    Memories
    ------------
    PSDDR                : Memory

