Beta Support for XDNA2 Platform #179

Xeratec · 2026-05-06T09:37:44Z

Why use the local file and not ghcr.io/pulp-platform/deeploy-xdnba:devel"?

Also, if we align this file with the other _runner* files, it should be without the default tag but required: true.

Xeratec · 2026-05-06T09:37:26Z

Why use the local file and not ghcr.io/pulp-platform/deeploy-xdnba:devel"?

Xeratec · 2026-05-07T07:59:53Z

Are we sure we want this in the .gitignore? It could be useful if well-maintained.

Xeratec · 2026-05-06T09:33:43Z

Nit, but can we align this to the other Dockerfile?

Start with

WORKDIR /app/build

and the end do

# Remove unused files and clean up to reduce image size WORKDIR /app RUN rm -rf /app/build

You don't have to remove individual files during the build steps, which could lead to forgetting some and polluting the work directory.

Done in 7e8736e

But also the XDNA Dockerfile does not build anything locally for now so there is nothing to cleanup. It may be useful in the future tough.

Xeratec · 2026-05-07T08:20:02Z

In general, I like separating MLIR features from the others, but some of these generic abstractions seem very specific to XDNA.

Xeratec · 2026-05-07T08:18:27Z

Isn't this specific to XDNA?

Xeratec · 2026-05-07T08:29:14Z

The notion of device and runtime in this context seems specific to XDNA. Would it make sense to abstract it into a struct that encodes a list of passes for different domains?

And something like this:

def registerPasses(self, domain: str, passes: List[MLIRCodeTransformationPass])

def transform(self, ctxt: NetworkContext, domain: str, name: str, verbose: CodeGenVerbosity = _NoVerbosity):

XDNA2Transformer.registerPasses('device', devicePasses) XDNA2Transformer.registerPasses('runtime', runtimeSequencePasses)

Also note that the "apply" function is called transform in the equivalent CodeTransformationPass class.

-Original file line number
+Diff line change
@@ -0,0 +1,54 @@
+    # SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+    #
+    # SPDX-License-Identifier: Apache-2.0
+    ---
+    name: _runner-xdna2
+    "on":
+      workflow_call:
+        inputs:
+          pytest-marker:
+            required: true
+            type: string
+          docker-image:
+            required: false
+            type: string
+            default: "deeploy-xdna:local"
+    jobs:
+      test-runner-xdna2:
+        runs-on: xdna2-npu
+        # NOTE: We cannot use the `container:` directive here because
+        # GitHub Actions does not support `--device` flags required for
+        # NPU access (/dev/accel/accel0). Instead we use explicit
+        # `docker run` commands.
+        steps:
+          - name: Fix workspace permissions
+            shell: bash
+            run: |
+              docker run --rm \
+                -v "${{ github.workspace }}":/workspace \
+                ${{ inputs.docker-image }} \
+                chown -R $(id -u):$(id -g) /workspace || true
+          - name: Checkout Repo
+            uses: actions/checkout@v4
+            with:
+              submodules: recursive
+          - name: Run Tests in Docker
+            shell: bash
+            run: |
+              docker run --rm \
+                --device /dev/accel/accel0 \
+                --ulimit memlock=-1 \
+                -v /opt/xilinx:/opt/xilinx \
+                -v "${{ github.workspace }}":/app/Deeploy \
+                -w /app/Deeploy \
+                ${{ inputs.docker-image }} \
+                bash -c "
+                  pip install -e . &&
+                  cd DeeployTest &&
+                  pytest test_platforms.py -v -m 'xdna2 and ${{ inputs.pytest-marker }}'
+                "

-Original file line number
+Diff line change
@@ -0,0 +1,31 @@
+    # SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+    #
+    # SPDX-License-Identifier: Apache-2.0
+    ---
+    name: CI • XDNA2
+    "on":
+      push:
+        branches:
+          - "**"
+        tags:
+          - "v*.*.*"
+      pull_request:
+      workflow_dispatch:
+        inputs:
+          docker_image:
+            description: "XDNA2 Docker image (must be pre-built on the runner)"
+            required: false
+            default: "deeploy-xdna:local"
+    concurrency:
+      group: ${{ github.workflow }}-${{ github.ref }}
+      cancel-in-progress: true
+    jobs:
+      xdna2-kernels:
+        uses: ./.github/workflows/_runner-xdna2.yml
+        with:
+          pytest-marker: "kernels"
+          docker-image: ${{ inputs.docker_image || 'deeploy-xdna:local' }}

-Original file line number
+Diff line change
@@ Expand Up / @@ -57,3 +57,7 @@ CHANGELOG_GEN.md @@
     # Container Artifacts
     .pyusbip/
     .cache/
+    # Claude context file
+    CLAUDE.md
+    Container/xrt-debs/

-Original file line number
+Diff line change
@@ Expand Up / @@ -46,6 +46,8 @@ elseif(platform STREQUAL SoftHier) @@
       message(STATUS "Building for platform 'SoftHier'")
     elseif(platform STREQUAL Chimera)
       message(STATUS "Building for platform 'Chimera'")
+    elseif(platform STREQUAL XDNA2)
+      message(STATUS "Building for platform 'XDNA2'")
     else()
       message(FATAL_ERROR "Invalid platform '${platform}' specified!")
     endif()
@@ Expand Down Expand Up / @@ -299,5 +301,20 @@ if(platform STREQUAL Chimera) @@
     endif()
+    if(platform STREQUAL XDNA2)
+      project(${TESTNAME} LANGUAGES CXX)
+      message(STATUS "============================= XDNA2 Configuration ============================")
+      message(STATUS "[cMake  ]   GENERATED_SOURCE         = " ${GENERATED_SOURCE})
+      message(STATUS "[cMake  ]   TESTNAME                 = " ${TESTNAME})
+      message(STATUS "==============================================================================")
+      message(STATUS "")
+      add_subdirectory(TargetLibraries/XDNA2)
+      add_subdirectory(DeeployTest/Platforms/XDNA2)
+    endif()
     print_simulation_config()

-Original file line number
+Diff line change
@@ -0,0 +1,56 @@
+    # SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+    #
+    # SPDX-License-Identifier: Apache-2.0
+    FROM ubuntu:24.04
+    ARG DEBIAN_FRONTEND=noninteractive
+    ENV TZ=Etc/UTC
+    ENV LANG=C.UTF-8
+    ENV LC_ALL=C.UTF-8
+    ENV PIP_BREAK_SYSTEM_PACKAGES=1
+    ENV LLVM_INSTALL_DIR="nope"
+    WORKDIR /app/build
+    RUN apt-get update && apt-get install -y \
+        software-properties-common \
+        && add-apt-repository -y ppa:amd-team/xrt \
+        && apt-get update && apt-get install -y \
+        cmake \
+        ninja-build \
+        g++ \
+        git \
+        git-lfs \
+        python3 \
+        python3-pip \
+        python-is-python3 \
+        uuid-dev \
+        wget \
+        curl \
+        ccache \
+        libxrt2 \
+        libxrt-npu2 \
+        libxrt-dev \
+        libxrt-utils \
+        libxrt-utils-npu \
+        && rm -rf /var/lib/apt/lists/*
+    ENV XILINX_XRT=/opt/xilinx/xrt
+    ENV PATH=${XILINX_XRT}/bin:${PATH}
+    ENV LD_LIBRARY_PATH=${XILINX_XRT}/lib
+    # Remove unused files and clean up to reduce image size
+    WORKDIR /app
+    RUN rm -rf /app/build
+    COPY pyproject.toml requirements-xdna.txt ./
+    RUN pip install toml-to-requirements && \
+        toml-to-req --toml-file pyproject.toml && \
+        pip install -r requirements.txt && \
+        pip install -r requirements-xdna.txt && \
+        rm -f requirements.txt pyproject.toml requirements-xdna.txt
+    ENV MLIR_AIE_PYTHON=/usr/bin/python3
+    WORKDIR /app/Deeploy

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Beta Support for XDNA2 Platform #179

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Xeratec May 6, 2026

Uh oh!

Xeratec May 6, 2026

Uh oh!

Xeratec May 7, 2026

Uh oh!

Xeratec May 6, 2026

Uh oh!

Victor-Jung May 7, 2026

Uh oh!

Xeratec May 7, 2026

Uh oh!

Xeratec May 7, 2026

Uh oh!

Xeratec May 7, 2026

Uh oh!

Uh oh!

Uh oh!

-Original file line number
+Diff line change
@@ -0,0 +1,201 @@
+    # SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+    #
+    # SPDX-License-Identifier: Apache-2.0
+    """Base classes for MLIR-emitting node templates and code transformations.
+    This module provides:
+    * :class:`MLIRNodeTemplate` — a :class:`NodeTemplate` subclass whose
+      ``emit()`` method populates an ``mlir.ir.Module`` instead of rendering C.
+    * :class:`MLIRExecutionBlock` — MLIR-specific execution state replacing the
+      C-oriented :class:`ExecutionBlock` (code-snippet deque) with MLIR builder
+      state (tile references, ObjectFifo handles, tiling parameters).
+    * :class:`MLIRCodeTransformationPass` — base class for MLIR code
+      transformation passes that operate on an :class:`MLIRExecutionBlock`.
+    * :class:`MLIRCodeTransformation` — two-phase pass container
+      (``devicePasses`` + ``runtimeSequencePasses``) that the deployer
+      orchestrates inside ``@aie_d.device`` and ``@aiex_d.runtime_sequence``
+      regions respectively.
+    All classes are intentionally dialect-agnostic so that future MLIR-based
+    backends (NVGPU, Linalg, …) can reuse them.
+    """
+    from __future__ import annotations
+    from abc import abstractmethod
+    from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+    from Deeploy.DeeployTypes import NodeTemplate
+    if TYPE_CHECKING:
+        from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation
+    # ======================================================================
+    # MLIRExecutionBlock
+    # ======================================================================
+    class MLIRExecutionBlock:
+        """MLIR-specific execution state for a single operator.
+        Replaces the C-oriented :class:`ExecutionBlock` (which holds a deque of
+        :class:`CodeSnippet` objects) with fields that carry MLIR builder state
+        through the code-transformation pipeline.
+        Passes populate fields progressively:
+. The deployer sets ``computeTile``, ``shimTile``,
+           ``operatorRepresentation``, and ``patternMemoryConstraint``.
+. A device-phase pass (e.g. ``MLIRObjectFifoPass``) fills
+           ``fifoMap``, ``fifoTypes``, ``tileSize``, ``numTiles``,
+           ``kernelFuncName``, and ``kernelObjFile``.
+. The deployer sets ``runtimeSequenceArgs`` before the runtime-
+           sequence phase.
+. A runtime-sequence pass (e.g. ``MLIRRuntimeSequencePass``) reads
+           all of the above to emit DMA configuration.
+        """
+        def __init__(self, computeTile: Any = None, shimTile: Any = None) -> None:
+            # MLIR tile references (set by deployer)
+            self.computeTile: Any = computeTile
+            self.shimTile: Any = shimTile
+            # Operator metadata (set by deployer from parser)
+            self.operatorRepresentation: OperatorRepresentation = {}
+            # Tiling constraint from midend solver (may be None)
+            self.patternMemoryConstraint: Any = None
+            # Populated by device-phase passes (e.g. MLIRObjectFifoPass)
+            self.fifoMap: Dict[str, str] = {}  # tensor name → FIFO name
+            self.fifoTypes: Dict[str, Any] = {}  # tensor name → MemRefType
+            self.tileSize: int = 0
+            self.numTiles: int = 0
+            self.numElements: int = 0
+            self.kernelFuncName: Optional[str] = None
+            self.kernelObjFile: Optional[str] = None
+            # The MLIRNodeTemplate for this node (set by deployer, called by
+            # MLIRComputeCorePass to emit the kernel call inside the core block)
+            self.template: Optional[Any] = None
+            # Set by deployer before runtime-sequence phase
+            self.runtimeSequenceArgs: List[Any] = []
+            # Input / output tensor name lists (set by deployer from parser)
+            self.inputNames: List[str] = []
+            self.outputNames: List[str] = []
+    # ======================================================================
+    # MLIRCodeTransformationPass / MLIRCodeTransformation
+    # ======================================================================
+    class MLIRCodeTransformationPass:
+        """Base class for passes that transform an :class:`MLIRExecutionBlock`.
+        Subclasses override :meth:`apply` to read / mutate the block's fields
+        and optionally emit MLIR operations into the current insertion point.
+        """
+        def apply(self, ctxt: NetworkContext, mlirBlock: MLIRExecutionBlock,
+                  name: str) -> Tuple[NetworkContext, MLIRExecutionBlock]:
+            return ctxt, mlirBlock
+    class MLIRCodeTransformation:
+        """Two-phase pass container for MLIR code transformations.
+        *devicePasses* run inside an ``@aie_d.device(...)`` region (ObjectFifo
+        creation, external-kernel declarations, …).
+        *runtimeSequencePasses* run inside an ``@aiex_d.runtime_sequence``
+        block (DMA configuration, token await, …).
+        The deployer calls :meth:`applyDevicePasses` and
+        :meth:`applyRuntimeSequencePasses` at the appropriate points.
+        """
+        def __init__(self,
+                     devicePasses: Optional[List[MLIRCodeTransformationPass]] = None,
+                     runtimeSequencePasses: Optional[List[MLIRCodeTransformationPass]] = None) -> None:
+            self.devicePasses: List[MLIRCodeTransformationPass] = devicePasses or []
+            self.runtimeSequencePasses: List[MLIRCodeTransformationPass] = runtimeSequencePasses or []
+        def applyDevicePasses(self, ctxt: NetworkContext, mlirBlock: MLIRExecutionBlock,
+                              name: str) -> Tuple[NetworkContext, MLIRExecutionBlock]:
+            for _pass in self.devicePasses:
+                ctxt, mlirBlock = _pass.apply(ctxt, mlirBlock, name)
+            return ctxt, mlirBlock
+        def applyRuntimeSequencePasses(self, ctxt: NetworkContext, mlirBlock: MLIRExecutionBlock,
+                                       name: str) -> Tuple[NetworkContext, MLIRExecutionBlock]:
+            for _pass in self.runtimeSequencePasses:
+                ctxt, mlirBlock = _pass.apply(ctxt, mlirBlock, name)
+            return ctxt, mlirBlock
+    # ======================================================================
+    # MLIRNodeTemplate
+    # ======================================================================
+    class MLIRNodeTemplate(NodeTemplate):
+        """NodeTemplate subclass that emits MLIR instead of C code.
+        Subclasses must override :meth:`emit` to add dialect operations to an
+        ``mlir.ir.Module`` (or region / insertion point provided via *kwargs*).
+        ``generate()`` is overridden as a convenience that constructs a
+        standalone module, calls :meth:`emit`, and returns the MLIR text.
+        The base-class ``alignToContext`` / ``hoistTransientBuffers`` hooks are
+        retained and work unchanged.
+        """
+        def __init__(self):
+            # Empty Mako template — no C code is generated.
+            super().__init__("")
+        # ------------------------------------------------------------------
+        # Subclass API
+        # ------------------------------------------------------------------
+        @abstractmethod
+        def emit(self, operatorRepresentation: OperatorRepresentation, **kwargs) -> None:
+            """Populate an MLIR module with the operations for this node.
+            The caller (typically the deployer) sets up an ``mlir.ir.Module``
+            with the appropriate device wrapper and passes dialect-specific
+            context through *kwargs* (e.g. insertion point, tile references,
+            ObjectFifo handles).
+            Parameters
+            ----------
+            operatorRepresentation : OperatorRepresentation
+                The parser's node representation (buffer names, sizes, types …).
+            **kwargs
+                Dialect-specific context provided by the deployer.
+            """
+            ...
+        # ------------------------------------------------------------------
+        # NodeTemplate overrides
+        # ------------------------------------------------------------------
+        def generate(self, operatorRepresentation = {}, **kwargs) -> str:
+            """Generate an MLIR string for this node.
+            This default implementation is a thin wrapper: it delegates to
+            :meth:`emit`.  Deployers that need to build a single module from
+            multiple nodes should call :meth:`emit` directly with the shared
+            module context and then stringify the complete module themselves.
+            Returns
+            -------
+            str
+                MLIR text (printable module or fragment).
+            """
+            self.emit(operatorRepresentation, **kwargs)
+            return ""

Beta Support for XDNA2 Platform #179

Are you sure you want to change the base?

Uh oh!

Beta Support for XDNA2 Platform #179

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!