[MLIR][GPU] Add gpu-lower-to-rocdl-pipeline meta-pass by Hardcode84 · Pull Request #196751 · llvm/llvm-project

Hardcode84 · 2026-05-09T20:37:43Z

Add gpu-lower-to-rocdl-pipeline meta-pass which lowers common MLIR dialects (gpu/arith/scf/vector) to binary, similar to the existing XeVM/NVVM pipelines.

llvmorg-github-actions · 2026-05-09T20:46:43Z

@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-mlir

Author: Ivan Butygin (Hardcode84)

Changes

Add gpu-lower-to-rocdl-pipeline meta-pass which lowers common MLIR dialects (gpu/arith/scf/vector) to binary, similar to the existing XeVM/NVVM pipelines.

Full diff: https://github.com/llvm/llvm-project/pull/196751.diff

5 Files Affected:

(modified) mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h (+59)
(modified) mlir/lib/Dialect/GPU/Pipelines/CMakeLists.txt (+5)
(added) mlir/lib/Dialect/GPU/Pipelines/GPUToROCDLPipeline.cpp (+136)
(modified) mlir/lib/RegisterAllPasses.cpp (+1)
(added) mlir/test/Integration/GPU/ROCM/gpu-lower-to-rocdl-pipeline.mlir (+69)

diff --git a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
index ee3632ba149e5..9a258ca30ec9e 100644
--- a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
@@ -64,6 +64,58 @@ struct GPUToNVVMPipelineOptions
       llvm::cl::init(true)};
 };
 
+/// Options for the gpu to rocdl pipeline.
+struct GPUToROCDLPipelineOptions
+    : public PassPipelineOptions<GPUToROCDLPipelineOptions> {
+  PassOptions::Option<int64_t> indexBitWidth{
+      *this, "index-bitwidth",
+      llvm::cl::desc("Bitwidth of the index type for the host (warning this "
+                     "should be 64 until the GPU layering is fixed)"),
+      llvm::cl::init(64)};
+  PassOptions::Option<std::string> hsacoTriple{
+      *this, "hsaco-triple",
+      llvm::cl::desc("Triple to use to serialize to hsaco."),
+      llvm::cl::init("amdgcn-amd-amdhsa")};
+  PassOptions::Option<std::string> hsacoChip{
+      *this, "hsaco-chip", llvm::cl::desc("Chip to use to serialize to hsaco."),
+      llvm::cl::init("gfx900")};
+  PassOptions::Option<std::string> hsacoFeatures{
+      *this, "hsaco-features",
+      llvm::cl::desc("Features to use to serialize to hsaco."),
+      llvm::cl::init("")};
+  PassOptions::Option<std::string> binaryFormat{
+      *this, "binary-format",
+      llvm::cl::desc("Final GPU binary emission format (e.g. fatbin, binary, "
+                     "isa, llvm, offloading)."),
+      llvm::cl::init("fatbin")};
+  PassOptions::Option<std::string> hsacoAbiVersion{
+      *this, "hsaco-abi",
+      llvm::cl::desc("AMDHSA ABI version (e.g. \"500\", \"600\")."),
+      llvm::cl::init("600")};
+  PassOptions::Option<bool> hsacoWave64{
+      *this, "hsaco-wave64",
+      llvm::cl::desc("Use Wave64 mode (default true; wave32 if false, "
+                     "appropriate for RDNA / gfx10+ where supported)."),
+      llvm::cl::init(true)};
+  PassOptions::Option<int> optLevel{
+      *this, "opt-level",
+      llvm::cl::desc("Optimization level for ROCDL/AMDGPU compilation."),
+      llvm::cl::init(2)};
+  PassOptions::Option<std::string> cmdOptions{
+      *this, "rocdl-cmd-options",
+      llvm::cl::desc(
+          "Command line options to pass to the downstream AMDGPU compiler."),
+      llvm::cl::init("")};
+  PassOptions::Option<bool> kernelUseBarePtrCallConv{
+      *this, "kernel-bare-ptr-calling-convention",
+      llvm::cl::desc("Use bareptr calling convention for device kernels."),
+      llvm::cl::init(false)};
+  PassOptions::Option<bool> hostUseBarePtrCallConv{
+      *this, "host-bare-ptr-calling-convention",
+      llvm::cl::desc("Use bareptr calling convention for the host."),
+      llvm::cl::init(false)};
+};
+
 // Options for the gpu to xevm pipeline.
 struct GPUToXeVMPipelineOptions
     : public PassPipelineOptions<GPUToXeVMPipelineOptions> {
@@ -120,6 +172,12 @@ struct GPUToXeVMPipelineOptions
 void buildLowerToNVVMPassPipeline(OpPassManager &pm,
                                   const GPUToNVVMPipelineOptions &options);
 
+/// Adds the GPU to ROCDL pipeline to the given pass manager. Transforms main
+/// dialects (arith, memref, scf, vector, gpu) into ROCDL/AMDGPU. Begins with
+/// GPU code regions, then handles host code.
+void buildLowerToROCDLPassPipeline(OpPassManager &pm,
+                                   const GPUToROCDLPipelineOptions &options);
+
 /// Adds the GPU to XeVM pipeline to the given pass manager. Transforms main
 /// dialects into XeVM targets. Begins with GPU code regions, then handles host
 /// code.
@@ -128,6 +186,7 @@ void buildLowerToXeVMPassPipeline(OpPassManager &pm,
 
 /// Register all pipelines for the `gpu` dialect.
 void registerGPUToNVVMPipeline();
+void registerGPUToROCDLPipeline();
 void registerGPUToXeVMPipeline();
 
 } // namespace gpu
diff --git a/mlir/lib/Dialect/GPU/Pipelines/CMakeLists.txt b/mlir/lib/Dialect/GPU/Pipelines/CMakeLists.txt
index 85b7b1ce90637..f523ccaee3f9f 100644
--- a/mlir/lib/Dialect/GPU/Pipelines/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/Pipelines/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_mlir_dialect_library(MLIRGPUPipelines
   GPUToNVVMPipeline.cpp
+  GPUToROCDLPipeline.cpp
   GPUToXeVMPipeline.cpp
 
   ADDITIONAL_HEADER_DIRS
@@ -12,8 +13,12 @@ add_mlir_dialect_library(MLIRGPUPipelines
   MLIRTransforms
   MLIRLinalgTransforms
   MLIRAffineToStandard
+  MLIRAMDGPUToROCDL
+  MLIRArithToLLVM
+  MLIRFuncToLLVM
   MLIRGPUToLLVMSPV
   MLIRGPUToNVVMTransforms
+  MLIRGPUToROCDLTransforms
   MLIRIndexToLLVM
   MLIRMathToLLVM
   MLIRMathToXeVM
diff --git a/mlir/lib/Dialect/GPU/Pipelines/GPUToROCDLPipeline.cpp b/mlir/lib/Dialect/GPU/Pipelines/GPUToROCDLPipeline.cpp
new file mode 100644
index 0000000000000..398cd59ec919e
--- /dev/null
+++ b/mlir/lib/Dialect/GPU/Pipelines/GPUToROCDLPipeline.cpp
@@ -0,0 +1,136 @@
+//===- GPUToROCDLPipeline.cpp - Lowering pipeline to ROCDL/AMDGPU --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a sink pipeline that lowers a payload containing
+// `gpu.launch` / `gpu.module` ops to AMDGPU/ROCDL and emits an AMDGCN binary
+// blob via `gpu-module-to-binary`. It is the AMD counterpart of
+// `gpu-lower-to-nvvm-pipeline` and `gpu-lower-to-xevm-pipeline`.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.h"
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
+#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
+#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
+#include "mlir/Conversion/GPUToROCDL/Runtimes.h"
+#include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h"
+#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
+#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
+#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/GPU/Pipelines/Passes.h"
+#include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/MemRef/Transforms/Passes.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Pass/PassOptions.h"
+#include "mlir/Transforms/Passes.h"
+
+using namespace mlir;
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// Common pipeline
+//===----------------------------------------------------------------------===//
+void buildCommonPassPipeline(
+    OpPassManager &pm, const mlir::gpu::GPUToROCDLPipelineOptions &options) {
+  // Lower AMDGPU dialect ops (e.g. amdgpu.lds_barrier, amdgpu.dpp,
+  // amdgpu.mfma, amdgpu.dot, ...) to ROCDL intrinsics first, while they may
+  // still live in unout-lined `gpu.launch` bodies. Mirrors the way NVVM's
+  // pipeline runs `convert-nvgpu-to-nvvm` before kernel outlining.
+  ConvertAMDGPUToROCDLPassOptions amdgpuToROCDLOpt;
+  amdgpuToROCDLOpt.chipset = options.hsacoChip;
+  pm.addPass(createConvertAMDGPUToROCDLPass(amdgpuToROCDLOpt));
+
+  pm.addPass(createGpuKernelOutliningPass());
+  pm.addPass(createConvertVectorToSCFPass());
+  pm.addPass(createSCFToControlFlowPass());
+  pm.addPass(createConvertFuncToLLVMPass());
+  pm.addPass(memref::createExpandStridedMetadataPass());
+
+  GpuROCDLAttachTargetOptions rocdlTargetOptions;
+  rocdlTargetOptions.triple = options.hsacoTriple;
+  rocdlTargetOptions.chip = options.hsacoChip;
+  rocdlTargetOptions.features = options.hsacoFeatures;
+  rocdlTargetOptions.abiVersion = options.hsacoAbiVersion;
+  rocdlTargetOptions.optLevel = options.optLevel;
+  rocdlTargetOptions.wave64Flag = options.hsacoWave64;
+  pm.addPass(createGpuROCDLAttachTarget(rocdlTargetOptions));
+
+  pm.addPass(createLowerAffinePass());
+  pm.addPass(createArithToLLVMConversionPass());
+  ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt;
+  convertIndexToLLVMPassOpt.indexBitwidth = options.indexBitWidth;
+  pm.addPass(createConvertIndexToLLVMPass(convertIndexToLLVMPassOpt));
+  pm.addPass(createCanonicalizerPass());
+  pm.addPass(createCSEPass());
+}
+
+//===----------------------------------------------------------------------===//
+// GPUModule-specific stuff.
+//===----------------------------------------------------------------------===//
+void buildGpuPassPipeline(OpPassManager &pm,
+                          const mlir::gpu::GPUToROCDLPipelineOptions &options) {
+  ConvertGpuOpsToROCDLOpsOptions opt;
+  opt.chipset = options.hsacoChip;
+  opt.useBarePtrCallConv = options.kernelUseBarePtrCallConv;
+  opt.indexBitwidth = options.indexBitWidth;
+  // Always declare HIP as the runtime so that gpu.printf etc. lower to the
+  // matching runtime entry points exposed by `libmlir_rocm_runtime.so`.
+  opt.runtime = mlir::gpu::amd::Runtime::HIP;
+  pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToROCDLOps(opt));
+  pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
+  pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
+  pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
+}
+
+//===----------------------------------------------------------------------===//
+// Host Post-GPU pipeline
+//===----------------------------------------------------------------------===//
+void buildHostPostPipeline(
+    OpPassManager &pm, const mlir::gpu::GPUToROCDLPipelineOptions &options) {
+  GpuToLLVMConversionPassOptions opt;
+  opt.hostBarePtrCallConv = options.hostUseBarePtrCallConv;
+  opt.kernelBarePtrCallConv = options.kernelUseBarePtrCallConv;
+  pm.addPass(createGpuToLLVMConversionPass(opt));
+
+  GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
+  gpuModuleToBinaryPassOptions.compilationTarget = options.binaryFormat;
+  gpuModuleToBinaryPassOptions.cmdOptions = options.cmdOptions;
+  pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
+  pm.addPass(createConvertMathToLLVMPass());
+  pm.addPass(createCanonicalizerPass());
+  pm.addPass(createCSEPass());
+  pm.addPass(createReconcileUnrealizedCastsPass());
+}
+
+} // namespace
+
+void mlir::gpu::buildLowerToROCDLPassPipeline(
+    OpPassManager &pm, const GPUToROCDLPipelineOptions &options) {
+  // Common pipelines
+  buildCommonPassPipeline(pm, options);
+
+  // GPUModule-specific stuff
+  buildGpuPassPipeline(pm, options);
+
+  // Host post-GPUModule-specific stuff
+  buildHostPostPipeline(pm, options);
+}
+
+void mlir::gpu::registerGPUToROCDLPipeline() {
+  PassPipelineRegistration<GPUToROCDLPipelineOptions>(
+      "gpu-lower-to-rocdl-pipeline",
+      "The default pipeline lowers main dialects (arith, memref, scf, vector, "
+      "gpu) to ROCDL. It starts by lowering GPU code to the specified "
+      "compilation target (default is fatbin) then lowers the host code.",
+      buildLowerToROCDLPassPipeline);
+}
diff --git a/mlir/lib/RegisterAllPasses.cpp b/mlir/lib/RegisterAllPasses.cpp
index e1d5b1236c8a6..c645d737cb766 100644
--- a/mlir/lib/RegisterAllPasses.cpp
+++ b/mlir/lib/RegisterAllPasses.cpp
@@ -100,5 +100,6 @@ void mlir::registerAllPasses() {
   sparse_tensor::registerSparseTensorPipelines();
   tosa::registerTosaToLinalgPipelines();
   gpu::registerGPUToNVVMPipeline();
+  gpu::registerGPUToROCDLPipeline();
   gpu::registerGPUToXeVMPipeline();
 }
diff --git a/mlir/test/Integration/GPU/ROCM/gpu-lower-to-rocdl-pipeline.mlir b/mlir/test/Integration/GPU/ROCM/gpu-lower-to-rocdl-pipeline.mlir
new file mode 100644
index 0000000000000..8e1cd6371a677
--- /dev/null
+++ b/mlir/test/Integration/GPU/ROCM/gpu-lower-to-rocdl-pipeline.mlir
@@ -0,0 +1,69 @@
+// RUN: mlir-opt %s \
+// RUN:   --gpu-lower-to-rocdl-pipeline="hsaco-chip=%chip" \
+// RUN: | mlir-runner \
+// RUN:   --shared-libs=%mlir_rocm_runtime \
+// RUN:   --shared-libs=%mlir_runner_utils \
+// RUN:   --entry-point-result=void \
+// RUN: | FileCheck %s
+
+// Mirror image of `vecadd.mlir`, but lowered through the
+// `gpu-lower-to-rocdl-pipeline` meta-pass instead of the hand-rolled pass
+// pipeline. Verifies that a single `--gpu-lower-to-rocdl-pipeline` invocation
+// reproduces the same numeric output the multi-step recipe used to require.
+//
+// The kernel intentionally embeds an `amdgpu.sched_barrier` op (semantically a
+// no-op, lowers to `rocdl.sched.barrier`) so that the test also exercises the
+// `convert-amdgpu-to-rocdl` step that runs first inside the meta-pass.
+
+func.func @vecadd(%arg0 : memref<5xf32>, %arg1 : memref<5xf32>, %arg2 : memref<5xf32>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %block_dim = arith.constant 5 : index
+  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
+             threads(%tx, %ty, %tz) in (%block_x = %block_dim, %block_y = %c1, %block_z = %c1) {
+    %a = memref.load %arg0[%tx] : memref<5xf32>
+    %b = memref.load %arg1[%tx] : memref<5xf32>
+    amdgpu.sched_barrier allow = <none>
+    %c = arith.addf %a, %b : f32
+    memref.store %c, %arg2[%tx] : memref<5xf32>
+    gpu.terminator
+  }
+  return
+}
+
+// CHECK: [2.46, 2.46, 2.46, 2.46, 2.46]
+func.func @main() {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c5 = arith.constant 5 : index
+  %cf1dot23 = arith.constant 1.23 : f32
+  %0 = memref.alloc() : memref<5xf32>
+  %1 = memref.alloc() : memref<5xf32>
+  %2 = memref.alloc() : memref<5xf32>
+  %3 = memref.cast %0 : memref<5xf32> to memref<?xf32>
+  %4 = memref.cast %1 : memref<5xf32> to memref<?xf32>
+  %5 = memref.cast %2 : memref<5xf32> to memref<?xf32>
+  scf.for %i = %c0 to %c5 step %c1 {
+    memref.store %cf1dot23, %3[%i] : memref<?xf32>
+    memref.store %cf1dot23, %4[%i] : memref<?xf32>
+  }
+  %6 = memref.cast %3 : memref<?xf32> to memref<*xf32>
+  %7 = memref.cast %4 : memref<?xf32> to memref<*xf32>
+  %8 = memref.cast %5 : memref<?xf32> to memref<*xf32>
+  gpu.host_register %6 : memref<*xf32>
+  gpu.host_register %7 : memref<*xf32>
+  gpu.host_register %8 : memref<*xf32>
+  %9 = call @mgpuMemGetDeviceMemRef1dFloat(%3) : (memref<?xf32>) -> (memref<?xf32>)
+  %10 = call @mgpuMemGetDeviceMemRef1dFloat(%4) : (memref<?xf32>) -> (memref<?xf32>)
+  %11 = call @mgpuMemGetDeviceMemRef1dFloat(%5) : (memref<?xf32>) -> (memref<?xf32>)
+  %12 = memref.cast %9 : memref<?xf32> to memref<5xf32>
+  %13 = memref.cast %10 : memref<?xf32> to memref<5xf32>
+  %14 = memref.cast %11 : memref<?xf32> to memref<5xf32>
+
+  call @vecadd(%12, %13, %14) : (memref<5xf32>, memref<5xf32>, memref<5xf32>) -> ()
+  call @printMemrefF32(%8) : (memref<*xf32>) -> ()
+  return
+}
+
+func.func private @mgpuMemGetDeviceMemRef1dFloat(%ptr : memref<?xf32>) -> (memref<?xf32>)
+func.func private @printMemrefF32(%ptr : memref<*xf32>)

fabianmcg

LGTM!

fabianmcg · 2026-05-09T23:42:35Z

+                     "should be 64 until the GPU layering is fixed)"),
+      llvm::cl::init(64)};
+  PassOptions::Option<std::string> hsacoTriple{
+      *this, "hsaco-triple",


Why the hsaco prefix? It looks unnecessary.

dropped the prefix

fabianmcg · 2026-05-09T23:44:40Z

+      llvm::cl::init("amdgcn-amd-amdhsa")};
+  PassOptions::Option<std::string> hsacoChip{
+      *this, "hsaco-chip", llvm::cl::desc("Chip to use to serialize to hsaco."),
+      llvm::cl::init("gfx900")};


I know the target attr uses this as default, but I'm not sure it makes to have it. In the NVVM pipeline makes sense because ptx, but I think this is a footgun for new users to AMDGPUs, so I'd propose to remove it.

removed the default value, it's now explicitly required here. FYI, underlying passes are still have their own default values if invoked separately.

Add an AMDGPU counterpart to gpu-lower-to-nvvm-pipeline and gpu-lower-to-xevm-pipeline. The new pass takes a payload that contains gpu.launch / gpu.module ops (optionally with AMDGPU dialect ops embedded in the device code) all the way to an AMDGCN binary embedded in a gpu.binary op, ready to be launched via libmlir_rocm_runtime.so. Until now ROCDL users had to hand-compose ~10 passes (convert-amdgpu-to-rocdl, gpu-kernel-outlining, convert-vector-to-scf, convert-scf-to-cf, convert-func-to-llvm, expand-strided-metadata, rocdl-attach-target, lower-affine, convert-arith-to-llvm, convert-index-to-llvm, gpu.module(convert-gpu-to-rocdl, canonicalize, cse, reconcile-unrealized-casts), gpu-to-llvm, gpu-module-to-binary, convert-math-to-llvm, reconcile-unrealized-casts) to do what NVVM and XeVM users get from a single pipeline option. This patch reuses the same three-phase structure as GPUToNVVMPipeline.cpp (pre-GPU common, gpu.module-nested, host-post) and runs convert-amdgpu-to-rocdl as the first step (mirroring NVVM's placement of convert-nvgpu-to-nvvm), so that AMDGPU dialect ops (amdgpu.dpp, amdgpu.mfma, amdgpu.dot, amdgpu.sched_barrier, amdgpu.lds_barrier, amdgpu.raw_buffer_*, amdgpu.fat_raw_buffer_cast, ...) lower correctly without any extra setup. The pipeline exposes the AMDGPU-codegen knobs needed at the boundary (triple, chip, features, abi, wave64, opt-level, rocdl-cmd-options, {kernel,host}-bare-ptr-calling-convention, index-bitwidth) and threads chip through both rocdl-attach-target and convert-amdgpu-to-rocdl as the chipset. chip has no default: AMDGCN binaries are not forward- compatible across chip families, so silently codegen'ing for some hard coded chip would be a footgun; passing nothing produces a clear "Invalid chipset name" diagnostic from convert-amdgpu-to-rocdl. The output container is selected via binary-format (matching the XeVM pipeline's naming, since the option is vendor-agnostic and accepts fatbin/binary/isa/llvm/offloading rather than only hsaco). Hooked into mlir::registerAllPasses(). Tested end-to-end on a Radeon Pro W7900 (gfx1100): the new lit test under test/Integration/GPU/ROCM/ runs the same vecadd payload as the existing vecadd.mlir but lowers it through a single --gpu-lower-to-rocdl-pipeline=chip=%chip invocation. The kernel embeds an amdgpu.sched_barrier op so the test also exercises the convert-amdgpu-to-rocdl step inside the meta-pass. All six tests in test/Integration/GPU/ROCM/ pass.

Add `gpu-lower-to-rocdl-pipeline` meta-pass which lowers common MLIR dialects (gpu/arith/scf/vector) to binary, similar to the existing XeVM/NVVM pipelines.

Hardcode84 force-pushed the rocdl-pipeline branch from 6a2d304 to f215e30 Compare May 9, 2026 20:45

Hardcode84 marked this pull request as ready for review May 9, 2026 20:46

Hardcode84 requested a review from fabianmcg as a code owner May 9, 2026 20:46

llvmorg-github-actions Bot added mlir:gpu mlir labels May 9, 2026

Hardcode84 requested review from krzysz00 and kuhar May 9, 2026 20:46

fabianmcg approved these changes May 9, 2026

View reviewed changes

Hardcode84 force-pushed the rocdl-pipeline branch from f215e30 to 037caaa Compare May 10, 2026 07:41

Hardcode84 merged commit 34502b0 into llvm:main May 11, 2026
10 checks passed

Hardcode84 deleted the rocdl-pipeline branch May 11, 2026 11:51

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[MLIR][GPU] Add gpu-lower-to-rocdl-pipeline meta-pass#196751

[MLIR][GPU] Add gpu-lower-to-rocdl-pipeline meta-pass#196751
Hardcode84 merged 1 commit into
llvm:mainfrom
Hardcode84:rocdl-pipeline

Hardcode84 commented May 9, 2026

Uh oh!

llvmorg-github-actions Bot commented May 9, 2026 •

edited

Loading

Uh oh!

fabianmcg left a comment

Uh oh!

fabianmcg May 9, 2026

Uh oh!

Hardcode84 May 10, 2026

Uh oh!

fabianmcg May 9, 2026

Uh oh!

Hardcode84 May 10, 2026 •

edited

Loading

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

Conversation

Hardcode84 commented May 9, 2026

Uh oh!

llvmorg-github-actions Bot commented May 9, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

fabianmcg left a comment

Choose a reason for hiding this comment

Uh oh!

fabianmcg May 9, 2026

Choose a reason for hiding this comment

Uh oh!

Hardcode84 May 10, 2026

Choose a reason for hiding this comment

Uh oh!

fabianmcg May 9, 2026

Choose a reason for hiding this comment

Uh oh!

Hardcode84 May 10, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

llvmorg-github-actions Bot commented May 9, 2026 •

edited

Loading

Hardcode84 May 10, 2026 •

edited

Loading