In [1]:
import os
import subprocess

def source_and_load(script_path):
    """
    Sources a shell script and loads the resulting environment variables
    into the current Python process. This version is robust against scripts
    that print status messages.
    """
    # Command to source the script and then run 'env' to dump all variables.
    command = f'bash -c "source {script_path} &> /dev/null && env"'
    
    try:
        result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        
        # Parse the KEY=VALUE output from 'env'
        for line in result.stdout.strip().split('\n'):
            if '=' in line:
                key, value = line.split('=', 1)
                os.environ[key] = value
                
        print(f"Successfully sourced and loaded variables from {script_path}")

    except subprocess.CalledProcessError as e:
        # If the script fails, print its error messages for debugging
        print(f"--- ERROR sourcing script: {script_path} ---")
        print(e.stderr)
        print("-------------------------------------------------")

# --- USAGE ---
# This should now work correctly even if the scripts are verbose
source_and_load("/work/shared/common/allo/setup-llvm19.sh")
source_and_load("/work/shared/common/allo/vitis_2023.2_u280.sh")

# You can now verify that a variable is set
print(f"LLVM_BUILD_DIR is set to: {os.environ.get('LLVM_BUILD_DIR', 'Not Set')}")

Successfully sourced and loaded variables from /work/shared/common/allo/setup-llvm19.sh
Successfully sourced and loaded variables from /work/shared/common/allo/vitis_2023.2_u280.sh
LLVM_BUILD_DIR is set to: /work/shared/common/llvm-project-19.x/build


In [2]:
import allo
from allo.ir.types import float32, int32

M,N,K = 128,128,128


def prgemm(t: int32, A: float32[M,K], B: float32[K,N]) -> float32[M,N]:
    C: float32[M,N] = 0.0
    for l in range(t):
        for i,j in allo.grid(M,N):
            for k in allo.reduction(K):
                C[i,j] += A[i,k] * B[k,j]
    return C


def gemm(A: float32[M,K], B: float32[K,N]) -> float32[M,N]:
    C: float32[M,N] = 0.0
    for i,j in allo.grid(M,N):
        for k in allo.reduction(K):
            C[i,j] += A[i,k] * B[k,j]
    return C

In [3]:
s = allo.customize(gemm)
prs = allo.customize(prgemm)
#print(s.module)

In [5]:
# Functional Testing; LLVM Backend
executable = prs.build()

# testing
import numpy as np
np_A = np.random.rand(M,K).astype(np.float32)
np_B = np.random.rand(K,N).astype(np.float32)
np_C = executable(4, np_A, np_B)
golden_C = np.matmul(np_A, np_B)
np.testing.assert_allclose(np_C, 4 * golden_C, rtol=1e-3, atol=1e-3)
print("testing completed, np_C == golden_C")

testing completed, np_C == golden_C


So far, we have functionally tested the feature of calling the kernel with an extra argument (the loop trip count).

Now, let's generalize this feature to generate arbitrary-sized square matrix tiledGEMM kernel.

Next, we'll achieve:
- Functional TiledGemm
- Do HLS workflow with TiledGEMM
    - Functional check -> HLS codegen -> Inspect HLS -> Iterate
- Programmable Feather
    - different tiling, layout, and routing patterns in FEATHER

In [10]:
# Copyright Allo authors. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
# %%
# We first reorder the inner reduction loop with the middle loop.
# This is used to change the computation order of matrix multiplication.

s.reorder("k", "j")
print(s.module)

# %%
# .. note::
#
#    This reordering seems to be easy, but it is impossible in the old Allo,
#    since the previous Allo directly generate reduction variables which make
#    the ``j`` loop becomes imperfect, while MLIR only supports reordering perfect
#    loops.

# %%
# Next, we create a new buffer for the output tensor ``C``.
# We provide a ``.buffer_at()`` primitive for users to quickly create a new buffer
# along a specific axis. Since Allo has attached all the tensors to the function,
# we can directly use ``<schedule>.<tensor>`` to access a specific tensor in the schedule.

s.buffer_at(s.C, axis="i")
print(s.module)

# %%
# From the above generated code, we can see that Allo automatically
# creates an intermediate buffer ``%1`` for ``C`` and attach it inside the ``i`` loop.
# Also two additional loop nested named ``j_init`` and ``j_back`` are created to
# initialize and write the intermediate buffer back to output tensor.

# %%
# Lastly, we pipeline the ``j`` loop in order to achieve the best performance.

s.pipeline("j")
print(s.module)

module {
  func.func @gemm(%arg0: memref<128x128xf32>, %arg1: memref<128x128xf32>) -> memref<128x128xf32> attributes {itypes = "__", otypes = "_"} {
    %alloc = memref.alloc() {name = "C"} : memref<128x128xf32>
    %cst = arith.constant 0.000000e+00 : f32
    linalg.fill ins(%cst : f32) outs(%alloc : memref<128x128xf32>)
    affine.for %arg2 = 0 to 128 {
      affine.for %arg3 = 0 to 128 {
        affine.for %arg4 = 0 to 128 {
          %0 = affine.load %arg0[%arg2, %arg3] {from = "A"} : memref<128x128xf32>
          %1 = affine.load %arg1[%arg3, %arg4] {from = "B"} : memref<128x128xf32>
          %2 = arith.mulf %0, %1 : f32
          %3 = affine.load %alloc[%arg2, %arg4] {from = "C"} : memref<128x128xf32>
          %4 = arith.addf %3, %2 : f32
          affine.store %4, %alloc[%arg2, %arg4] {to = "C"} : memref<128x128xf32>
        } {loop_name = "j"}
      } {loop_name = "k", op_name = "S_k_0", reduction}
    } {loop_name = "i", op_name = "S_i_j_0"}
    return %alloc : memref<128x12

In [9]:

##############################################################################
# Codegen for Vivado/Vitis HLS
# ----------------------------
# Similar to the CPU execution, we only need to change the target of the ``.build()`` function
# in order to target different backends. Here, we use ``vhls`` as the target to generate
# Vivado/Vitis HLS code, which will directly returns the generated code as a string.

code = s.build(target="vhls")
print(code)


//===------------------------------------------------------------*- C++ -*-===//
//
// Automatically generated file for High-level Synthesis (HLS).
//
//===----------------------------------------------------------------------===//
#include <algorithm>
#include <ap_axi_sdata.h>
#include <ap_fixed.h>
#include <ap_int.h>
#include <hls_math.h>
#include <hls_stream.h>
#include <math.h>
#include <stdint.h>
using namespace std;
void gemm(
  float v0[128][128],
  float v1[128][128],
  float v2[128][128]
) {	// L2
  for (int v3 = 0; v3 < 128; v3++) {	// L5
    for (int v4 = 0; v4 < 128; v4++) {	// L5
      v2[v3][v4] = (float)0.000000;	// L5
    }
  }
  l_S_i_j_0_i: for (int i = 0; i < 128; i++) {	// L6
    l_j: for (int j = 0; j < 128; j++) {	// L7
      l_S_k_0_k: for (int k = 0; k < 128; k++) {	// L8
        float v8 = v0[i][k];	// L9
        float v9 = v1[k][j];	// L10
        float v10 = v8 * v9;	// L11
        float v11 = v2[i][j];	// L12
        float v12 = v11 + v10;	// L13
        v2

In [7]:

# %%
# We can see that the generated code preserves the same structure as the IR, and inserts
# necessary headers and pragmas for Vivado/Vitis HLS. The generated code can be directly passed
# to Vivado/Vitis HLS to generate RTL designs.
#
# .. note::
#
#    Vivado HLS was the previous name of Vitis HLS (before 2020.1). The previous HLS code
#    can still run on the latest Vitis HLS, but the performance of the generated RTL design
#    and the estimated reports may be different, as the newer version of Vitis HLS provides better
#    automatic optimizations.

# %%
# We also provide an easy way to invoke Vitis HLS from Allo. Users can simply provide
# the synthesis mode that are supported by Vitis HLS (e.g., ``sw_emu``, ``hw_emu``, and ``hw``),
# and the target project folder name. Allo will automatically generate
# the HLS project and invoke the compiler to generate the RTL design.
#
# .. note::
#
#    - ``sw_emu``: Software emulation mode, which is similar to C simulation that compiles the program using C compiler and runs it on the CPU. Depending on the size of your input data, this mode may take within one minute.
#    - ``hw_emu``: Hardware emulation mode, which is similar to co-simulation that compiles the program into RTL design using HLS compiler and runs the RTL with the test bench on the FPGA emulator. Since it needs to go through the HLS synthesis flow, it may take several minutes to finish.
#    - ``hw``: Hardware mode, which compiles the program into RTL design using HLS, goes through placement and routing, generates the bitstream, and finally executes on FPGA. This mode may take several hours to finish.

mod = s.build(target="vitis_hls", mode="hw_emu", project="gemm.prj")


sh: ml: line 1: syntax error: unexpected end of file
sh: error importing function definition for `ml'
sh: which: line 1: syntax error: unexpected end of file
sh: error importing function definition for `which'
sh: module: line 1: syntax error: unexpected end of file
sh: error importing function definition for `module'


In [8]:
# %%
# After running the above instruction, we can see a ``gemm.prj`` folder is generated in the current directory:
#
# - ``host.cpp``: The host (CPU) OpenCL code that invokes the generated accelerator.
# - ``kernel.cpp``: The generated accelerator code.
# - ``Makefile``: Defined some shorthands for compiling the project.
#
# To generate the hardware design and see the performance estimation, we need to first
# prepare the input data. Allo supports NumPy inputs even for hardware programs,
# so we can just create two NumPy arrays ``np_A`` and ``np_B`` for inputs.
# Since the C++ design cannot support returning a new array, we also need to
# explicitly create an output array ``allo_C`` and pass it to the function.
#
# .. note::
#
#    You need to configure the `Vitis HLS <https://www.amd.com/en/products/software/adaptive-socs-and-fpgas/vitis/vitis-hls.html>`_ and `XRT <https://github.com/Xilinx/XRT>`_ environment before proceeding to the next step.
#    For Zhang group students, we have the Vitis environment configured on the server, so you can directly
#    ``source /work/shared/common/allo/vitis_2023.2_u280.sh`` to set up the environment, which
#    targets the AMD U280 FPGA board.
#
# .. code-block:: python
#
np_A = np.random.random((M, K)).astype(np.float32)
np_B = np.random.random((K, N)).astype(np.float32)
allo_C = np.zeros((M, N), dtype=np.float32)
mod(np_A, np_B, allo_C)
np.testing.assert_allclose(allo_C, np.matmul(np_A, np_B), rtol=1e-5, atol=1e-5)

# %%
# After executing the above command, you can check the following report under ``gemm.prj/_x.hw_emu.xilinx_u250_gen3x16_xdma_4_1_202210_1/gemm/gemm/gemm/solution/syn/report/csynth.rpt``.
#
# .. code-block:: python
#
#    +--------------------------------------------------+---------+-----------+----------+---------+------+----------+---------+---------+-------------+------------+-----+
#    |                      Modules                     | Latency |  Latency  | Iteration|         | Trip |          |         |         |             |            |     |
#    |                      & Loops                     | (cycles)|    (ns)   |  Latency | Interval| Count| Pipelined|  BRAM   |   DSP   |      FF     |     LUT    | URAM|
#    +--------------------------------------------------+---------+-----------+----------+---------+------+----------+---------+---------+-------------+------------+-----+
#    |+ gemm                                            |    39934|  1.331e+05|         -|    39935|     -|        no|  6 (~0%)|  5 (~0%)|  19074 (~0%)|  29069 (2%)|    -|
#    | + gemm_Pipeline_VITIS_LOOP_44_1_VITIS_LOOP_45_2  |     1026|  3.420e+03|         -|     1026|     -|        no|        -|        -|     36 (~0%)|   169 (~0%)|    -|
#    |  o VITIS_LOOP_44_1_VITIS_LOOP_45_2               |     1024|  3.413e+03|         2|        1|  1024|       yes|        -|        -|            -|           -|    -|
#    | o l_S_buf0_buf0_l_0_l_buf0_l_1                   |     1025|  3.416e+03|         3|        1|  1024|       yes|        -|        -|            -|           -|    -|
#    | o l_S_buf1_buf1_l_0_l_buf1_l_1                   |     1025|  3.416e+03|         3|        1|  1024|       yes|        -|        -|            -|           -|    -|
#    | o l_S_i_j_0_i                                    |    35616|  1.187e+05|      1113|        -|    32|        no|        -|        -|            -|           -|    -|
#    |  + gemm_Pipeline_l_j_init                        |       34|    113.322|         -|       34|     -|        no|        -|        -|      8 (~0%)|    50 (~0%)|    -|
#    |   o l_j_init                                     |       32|    106.656|         1|        1|    32|       yes|        -|        -|            -|           -|    -|
#    |  + gemm_Pipeline_l_S_k_0_k_l_j                   |     1039|  3.463e+03|         -|     1039|     -|        no|        -|  5 (~0%)|    759 (~0%)|   494 (~0%)|    -|
#    |   o l_S_k_0_k_l_j                                |     1037|  3.456e+03|        15|        1|  1024|       yes|        -|        -|            -|           -|    -|
#    |  + gemm_Pipeline_l_j_back                        |       34|    113.322|         -|       34|     -|        no|        -|        -|     15 (~0%)|    78 (~0%)|    -|
#    |   o l_j_back                                     |       32|    106.656|         2|        1|    32|       yes|        -|        -|            -|           -|    -|
#    | o l_S_result2_result2_l_0_l_result2_l_1          |     1026|  3.420e+03|         4|        1|  1024|       yes|        -|        -|            -|           -|    -|
#    +--------------------------------------------------+---------+-----------+----------+---------+------+----------+---------+---------+-------------+------------+-----+
#
# From the above output, we can clearly see that all the loops inside the GEMM kernel (marked as ``o``) are pipelined
# with Initiation Interval (II) equal to 1. You can also find more detailed information under the ``report`` folder.

sh: ml: line 1: syntax error: unexpected end of file
sh: error importing function definition for `ml'
sh: which: line 1: syntax error: unexpected end of file
sh: error importing function definition for `which'
sh: module: line 1: syntax error: unexpected end of file
sh: error importing function definition for `module'
/bin/sh: ml: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `ml'
/bin/sh: which: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `which'
/bin/sh: module: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `module'
/bin/sh: module: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `module'
/bin/sh: ml: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `ml'
/bin/sh: which: line 1: syntax error: unexpected end of file
/bin/sh: error importing function def

cd gemm.prj; make run TARGET=hw_emu PLATFORM=$XDEVICE


/bin/sh: module: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `module'
/bin/sh: ml: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `ml'
/bin/sh: which: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `which'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: which: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `which'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: which: line 1: syntax error: unexpect

g++ -o gemm xcl2.cpp host.cpp -I/opt/xilinx/xrt/include -I/opt/xilinx/Vivado/2023.2/include -Wall -O0 -g -std=c++1y -I. -fmessage-length=0 -L/opt/xilinx/xrt/lib -pthread -lOpenCL -lrt -lstdc++ 
mkdir -p ./_x.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1
v++ -c -g --save-temps  -t hw_emu --platform /opt/xilinx/platforms/xilinx_u280_gen3x16_xdma_1_202211_1/xilinx_u280_gen3x16_xdma_1_202211_1.xpfm -k gemm --temp_dir ./_x.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1  -I'.' -o'_x.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/gemm.xo' kernel.cpp


/bin/bash: which: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `which'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: which: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `which'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: which: line 1: syntax err

Option Map File Used: '/opt/xilinx/Vitis/2023.2/data/vitis/vpp/optMap.xml'

****** v++ v2023.2 (64-bit)
  **** SW Build 4026344 on 2023-10-11-15:42:10
    ** Copyright 1986-2022 Xilinx, Inc. All Rights Reserved.
    ** Copyright 2022-2023 Advanced Micro Devices, Inc. All Rights Reserved.

INFO: [v++ 60-1306] Additional information associated with this v++ compile can be found at:
	Reports: /home/sk3463/allo/prallo/gemm.prj/_x.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/reports/gemm
	Log files: /home/sk3463/allo/prallo/gemm.prj/_x.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/logs/gemm
Running Dispatch Server on port: 44637
INFO: [v++ 60-1548] Creating build summary session with primary output /home/sk3463/allo/prallo/gemm.prj/_x.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/gemm.xo.compile_summary, at Fri Oct 10 13:27:44 2025
INFO: [v++ 60-1315] Creating rulecheck session with output '/home/sk3463/allo/prallo/gemm.prj/_x.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/reports/gemm/v++_compile_gemm

/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: which: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `which'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: which: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `which'
/bin/sh: module: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `module'
/bin/sh: ml: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `ml'
/bin/sh: which: line 1: syntax error: unexpe


===>The following messages were generated while  performing high-level synthesis for kernel: gemm Log file: /home/sk3463/allo/prallo/gemm.prj/_x.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/gemm/gemm/vitis_hls.log :
INFO: [v++ 204-61] Pipelining loop 'VITIS_LOOP_71_1_VITIS_LOOP_72_2'.
INFO: [v++ 200-1470] Pipelining result : Target II = NA, Final II = 1, Depth = 2, loop 'VITIS_LOOP_71_1_VITIS_LOOP_72_2'
INFO: [v++ 204-61] Pipelining loop 'l_S_i_j_0_i_l_j_l_S_k_0_k'.
INFO: [v++ 200-1470] Pipelining result : Target II = NA, Final II = 8, Depth = 16, loop 'l_S_i_j_0_i_l_j_l_S_k_0_k'
INFO: [v++ 204-61] Pipelining loop 'l_S_load_buf0_load_buf0_l_0_l_load_buf0_l_1'.
INFO: [v++ 204-61] Pipelining loop 'l_S_load_buf1_load_buf1_l_0_l_load_buf1_l_1'.
INFO: [v++ 204-61] Pipelining loop 'l_S_store_res2_store_res2_l_0_l_store_res2_l_1'.
INFO: [v++ 200-1470] Pipelining result : Target II = 1, Final II = 1, Depth = 3, loop 'l_S_load_buf0_load_buf0_l_0_l_load_buf0_l_1'
INFO: [v++ 200-1470] Pipelining r

/bin/bash: which: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `which'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: which: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `which'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: which: line 1: syntax err

Option Map File Used: '/opt/xilinx/Vitis/2023.2/data/vitis/vpp/optMap.xml'

****** v++ v2023.2 (64-bit)
  **** SW Build 4026344 on 2023-10-11-15:42:10
    ** Copyright 1986-2022 Xilinx, Inc. All Rights Reserved.
    ** Copyright 2022-2023 Advanced Micro Devices, Inc. All Rights Reserved.

INFO: [v++ 60-1306] Additional information associated with this v++ link can be found at:
	Reports: /home/sk3463/allo/prallo/gemm.prj/_x.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/reports/link
	Log files: /home/sk3463/allo/prallo/gemm.prj/_x.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/logs/link
Running Dispatch Server on port: 35709
INFO: [v++ 60-1548] Creating build summary session with primary output /home/sk3463/allo/prallo/gemm.prj/build_dir.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/gemm.link.xclbin.link_summary, at Fri Oct 10 13:29:06 2025
INFO: [v++ 60-1315] Creating rulecheck session with output '/home/sk3463/allo/prallo/gemm.prj/_x.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/reports/link/v++_li

/bin/bash: which: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `which'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: which: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `which'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: which: line 1: syntax err

Option Map File Used: '/opt/xilinx/Vitis/2023.2/data/vitis/vpp/optMap.xml'

****** v++ v2023.2 (64-bit)
  **** SW Build 4026344 on 2023-10-11-15:42:10
    ** Copyright 1986-2022 Xilinx, Inc. All Rights Reserved.
    ** Copyright 2022-2023 Advanced Micro Devices, Inc. All Rights Reserved.

INFO: [v++ 60-1306] Additional information associated with this v++ package can be found at:
	Reports: /home/sk3463/allo/prallo/gemm.prj/_x/reports/package
	Log files: /home/sk3463/allo/prallo/gemm.prj/_x/logs/package
Running Dispatch Server on port: 33633
INFO: [v++ 60-1548] Creating build summary session with primary output /home/sk3463/allo/prallo/gemm.prj/build_dir.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/gemm.xclbin.package_summary, at Fri Oct 10 13:38:25 2025
INFO: [v++ 60-1315] Creating rulecheck session with output '/home/sk3463/allo/prallo/gemm.prj/_x/reports/package/v++_package_gemm_guidance.html', at Fri Oct 10 13:38:25 2025
INFO: [v++ 60-895]   Target platform: /opt/xilinx/platforms/xili

/bin/bash: which: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `which'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: which: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `which'
/bin/bash: module: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `module'
/bin/bash: ml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `ml'
/bin/bash: which: line 1: syntax err


****** configutil v2023.2 (64-bit)
  **** SW Build 4026344 on 2023-10-11-15:42:10
    ** Copyright 1986-2022 Xilinx, Inc. All Rights Reserved.
    ** Copyright 2022-2023 Advanced Micro Devices, Inc. All Rights Reserved.

INFO: [ConfigUtil 60-895]   Target platform: /opt/xilinx/platforms/xilinx_u280_gen3x16_xdma_1_202211_1/xilinx_u280_gen3x16_xdma_1_202211_1.xpfm
INFO: [ConfigUtil 60-1578]   This platform contains Xilinx Shell Archive '/opt/xilinx/platforms/xilinx_u280_gen3x16_xdma_1_202211_1/hw/hw.xsa'
INFO: [ConfigUtil 60-1032] Extracting hardware platform to ./_x.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1
emulation configuration file `emconfig.json` is created in ./_x.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1 directory 
cp -rf ./_x.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/emconfig.json .
XCL_EMULATION_MODE=hw_emu ./gemm ./build_dir.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/gemm.xclbin


/bin/sh: which: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `which'
/bin/sh: module: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `module'
/bin/sh: ml: line 1: syntax error: unexpected end of file
/bin/sh: error importing function definition for `ml'
sh: module: line 1: syntax error: unexpected end of file
sh: error importing function definition for `module'
sh: ml: line 1: syntax error: unexpected end of file
sh: error importing function definition for `ml'
sh: which: line 1: syntax error: unexpected end of file
sh: error importing function definition for `which'


Found Platform
Platform Name: Xilinx
INFO: Reading ./build_dir.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/gemm.xclbin
Loading: './build_dir.hw_emu.xilinx_u280_gen3x16_xdma_1_202211_1/gemm.xclbin'
Trying to program device[0]: xilinx_u280_gen3x16_xdma_1_202211_1


sh: module: line 1: syntax error: unexpected end of file
sh: error importing function definition for `module'
sh: ml: line 1: syntax error: unexpected end of file
sh: error importing function definition for `ml'
sh: which: line 1: syntax error: unexpected end of file
sh: error importing function definition for `which'
sh: module: line 1: syntax error: unexpected end of file
sh: error importing function definition for `module'
sh: ml: line 1: syntax error: unexpected end of file
sh: error importing function definition for `ml'
sh: which: line 1: syntax error: unexpected end of file
sh: error importing function definition for `which'
sh: module: line 1: syntax error: unexpected end of file
sh: error importing function definition for `module'
sh: ml: line 1: syntax error: unexpected end of file
sh: error importing function definition for `ml'
sh: which: line 1: syntax error: unexpected end of file
sh: error importing function definition for `which'
/bin/bash: module: line 1: syntax error:

INFO: [HW-EMU 05] Path of the simulation directory : /home/sk3463/allo/prallo/gemm.prj/.run/3853122/hw_emu/device0/binary_0/behav_waveform/xsim

 server socket name is	/tmp/sk3463/device0_0_3853122
INFO: [HW-EMU 01] Hardware emulation runs simulation underneath. Using a large data set will result in long simulation times. It is recommended that a small dataset is used for faster execution. The flow uses approximate models for Global memories and interconnect and hence the performance data generated is approximate.
configuring dataflow mode with ert polling
scheduler config ert(1), dataflow(1), slots(16), cudma(0), cuisr(0), cdma(0), cus(1)
Device[0]: program successful!
|-------------------------+-------------------------|
| Kernel                  |    Wall-Clock Time (ns) |
|-------------------------+-------------------------|
INFO::[ Vitis-EM 22 ] [Time elapsed: 5 minute(s) 10 seconds, Emulation time: 2.87884 ms]
Data transfer between kernel(s) and global memory(s)
gemm_1:m_axi_gmem

sh: module: line 1: syntax error: unexpected end of file
sh: error importing function definition for `module'
sh: ml: line 1: syntax error: unexpected end of file
sh: error importing function definition for `ml'
sh: which: line 1: syntax error: unexpected end of file
sh: error importing function definition for `which'
sh: module: line 1: syntax error: unexpected end of file
sh: error importing function definition for `module'
sh: ml: line 1: syntax error: unexpected end of file
sh: error importing function definition for `ml'
sh: which: line 1: syntax error: unexpected end of file
sh: error importing function definition for `which'


In [None]:

##############################################################################
# On-board Execution
# ------------------
# After optimizing the design and make sure everything works correctly,
# we can push the generated RTL design to the backend synthesis flow to generate
# the bitstream for FPGA. In Allo, we can directly change the target to ``hw``
# to launch the backend synthesis job. It may take several hours to generate the final
# bitstream, so it would be better to run it using `tmux <https://github.com/tmux/tmux/wiki>`_.
#
# .. code-block:: python
#
mod = s.build(target="vitis_hls", mode="hw", project="gemm.prj")
mod(np_A, np_B, allo_C)
np.testing.assert_allclose(allo_C, np.matmul(np_A, np_B), rtol=1e-5, atol=1e-5)
#
# Finally, you should be able to see the generated bitstream ``.xclbin`` under the ``gemm.prj/build_dir.hw.xilinx_u280_gen3x16_xdma_1_202211_1`` folder
# (actual board name may be different), and the above test should pass.

# %%
# To get more detailed information on the resource usage and performance of the generated design,
# you can check the following files:
#
# - ``gemm.prj/build_dir.hw.xilinx_u280_gen3x16_xdma_1_202211_1/gemm.xclbin``: The generated bitstream.
# - ``gemm.prj/build_dir.hw.xilinx_u280_gen3x16_xdma_1_202211_1/gemm.link.xclbin.info``: Frequency of the actual design, which can be found in ``DATA_CLK``. By default, it is 300MHz.
# - ``gemm.prj/_x.hw.xilinx_u280_gen3x16_xdma_1_202211_1/reports/gemm/hls_reports/gemm_csynth.rpt``: The HLS synthesis report.
# - ``gemm.prj/_x.hw.xilinx_u280_gen3x16_xdma_1_202211_1/reports/link/imp/impl_1_full_util_routed.rpt``: The full utilization report after placement and routing. You can find the following resource usage:
#
#   - LUT: ``1. CLB Logic -- CLB LUTs``
#   - FF: ``1. CLB Logic -- CLB Registers -- Register as Flip Flop``
#   - BRAM: ``3. BLOCKRAM -- Block RAM Tile``
#   - DSP: ``4. ARITHMETIC -- DSPs``
#
# - ``gemm.prj/_x.hw.xilinx_u280_gen3x16_xdma_1_202211_1/reports/link/imp/impl_1_slr_util_routed.rpt``: The per SLR utilization report after placement and routing.
# - ``gemm.prj/_x.hw.xilinx_u280_gen3x16_xdma_1_202211_1/logs/gemm/gemm_vitis_hls.log``: The log file of the Vitis HLS.
# - ``gemm.prj/_x.hw.xilinx_u280_gen3x16_xdma_1_202211_1/logs/link/v++.log``: The log file of the Vivado backend synthesis.
