Skip to content

Conversation

@CoTinker
Copy link
Contributor

@CoTinker CoTinker commented Dec 10, 2025

This PR lowers thegpu.global_id op using the arith dialect instead of the index dialect. Fixes #171303.

This PR adds missing dependent index dialect
for ConvertGpuOpsToNVVMOps and ConvertGpuOpsToROCDLOps pass.
@llvmbot
Copy link
Member

llvmbot commented Dec 10, 2025

@llvm/pr-subscribers-mlir

Author: Longsheng Mou (CoTinker)

Changes

This PR adds missing dependent index dialect for ConvertGpuOpsToNVVMOps and ConvertGpuOpsToROCDLOps pass. Fixes #171303.


Full diff: https://github.com/llvm/llvm-project/pull/171614.diff

4 Files Affected:

  • (modified) mlir/include/mlir/Conversion/Passes.td (+2)
  • (modified) mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp (+1)
  • (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+1)
  • (added) mlir/test/Conversion/GPUCommon/lower-global-id.mlir (+33)
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index fcbaf3ccc1486..fc8c3b2e1c169 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -618,6 +618,7 @@ def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> {
     "cf::ControlFlowDialect",
     "memref::MemRefDialect",
     "NVVM::NVVMDialect",
+    "index::IndexDialect"
   ];
   let options = [
     Option<"indexBitwidth", "index-bitwidth", "unsigned",
@@ -647,6 +648,7 @@ def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> {
     "amdgpu::AMDGPUDialect",
     "cf::ControlFlowDialect",
     "memref::MemRefDialect",
+    "index::IndexDialect"
   ];
   let options = [
     Option<"chipset", "chipset", "std::string",
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 5848489274c13..4432b06bf1560 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -23,6 +23,7 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/Index/IR/IndexDialect.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index c03f3a5d3889c..3c356ee75f79d 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -31,6 +31,7 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/Index/IR/IndexDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
diff --git a/mlir/test/Conversion/GPUCommon/lower-global-id.mlir b/mlir/test/Conversion/GPUCommon/lower-global-id.mlir
new file mode 100644
index 0000000000000..b0274e0f9f290
--- /dev/null
+++ b/mlir/test/Conversion/GPUCommon/lower-global-id.mlir
@@ -0,0 +1,33 @@
+// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl | FileCheck %s --check-prefixes=ROCDL
+// RUN: mlir-opt %s -split-input-file -convert-gpu-to-nvvm | FileCheck %s --check-prefixes=NVVM
+
+gpu.module @kernel {
+  gpu.func @gpu_global_id() -> (index) {
+    %global_id_x = gpu.global_id x
+    gpu.return %global_id_x : index
+  }
+}
+
+// ROCDL-LABEL:   llvm.func @gpu_global_id() -> i64 {
+// ROCDL:           %[[WORKGROUP_0:.*]] = rocdl.workgroup.id.x : i32
+// ROCDL:           %[[SEXT_0:.*]] = llvm.sext %[[WORKGROUP_0]] : i32 to i64
+// ROCDL:           %[[WORKGROUP_1:.*]] = rocdl.workgroup.dim.x : i32
+// ROCDL:           %[[SEXT_1:.*]] = llvm.sext %[[WORKGROUP_1]] : i32 to i64
+// ROCDL:           %[[MUL_0:.*]] = llvm.mul %[[SEXT_0]], %[[SEXT_1]] : i64
+// ROCDL:           %[[WORKITEM_0:.*]] = rocdl.workitem.id.x : i32
+// ROCDL:           %[[SEXT_2:.*]] = llvm.sext %[[WORKITEM_0]] : i32 to i64
+// ROCDL:           %[[ADD_0:.*]] = llvm.add %[[SEXT_2]], %[[MUL_0]] : i64
+// ROCDL:           llvm.return %[[ADD_0]] : i64
+// ROCDL:         }
+
+// NVVM-LABEL:   llvm.func @gpu_global_id() -> i64 {
+// NVVM:           %[[READ_0:.*]] = nvvm.read.ptx.sreg.ctaid.x : i32
+// NVVM:           %[[SEXT_0:.*]] = llvm.sext %[[READ_0]] : i32 to i64
+// NVVM:           %[[READ_1:.*]] = nvvm.read.ptx.sreg.ntid.x : i32
+// NVVM:           %[[SEXT_1:.*]] = llvm.sext %[[READ_1]] : i32 to i64
+// NVVM:           %[[MUL_0:.*]] = llvm.mul %[[SEXT_0]], %[[SEXT_1]] : i64
+// NVVM:           %[[READ_2:.*]] = nvvm.read.ptx.sreg.tid.x : i32
+// NVVM:           %[[SEXT_2:.*]] = llvm.sext %[[READ_2]] : i32 to i64
+// NVVM:           %[[ADD_0:.*]] = llvm.add %[[SEXT_2]], %[[MUL_0]] : i64
+// NVVM:           llvm.return %[[ADD_0]] : i64
+// NVVM:         }

@llvmbot
Copy link
Member

llvmbot commented Dec 10, 2025

@llvm/pr-subscribers-mlir-gpu

Author: Longsheng Mou (CoTinker)

Changes

This PR adds missing dependent index dialect for ConvertGpuOpsToNVVMOps and ConvertGpuOpsToROCDLOps pass. Fixes #171303.


Full diff: https://github.com/llvm/llvm-project/pull/171614.diff

4 Files Affected:

  • (modified) mlir/include/mlir/Conversion/Passes.td (+2)
  • (modified) mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp (+1)
  • (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+1)
  • (added) mlir/test/Conversion/GPUCommon/lower-global-id.mlir (+33)
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index fcbaf3ccc1486..fc8c3b2e1c169 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -618,6 +618,7 @@ def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> {
     "cf::ControlFlowDialect",
     "memref::MemRefDialect",
     "NVVM::NVVMDialect",
+    "index::IndexDialect"
   ];
   let options = [
     Option<"indexBitwidth", "index-bitwidth", "unsigned",
@@ -647,6 +648,7 @@ def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> {
     "amdgpu::AMDGPUDialect",
     "cf::ControlFlowDialect",
     "memref::MemRefDialect",
+    "index::IndexDialect"
   ];
   let options = [
     Option<"chipset", "chipset", "std::string",
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 5848489274c13..4432b06bf1560 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -23,6 +23,7 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/Index/IR/IndexDialect.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index c03f3a5d3889c..3c356ee75f79d 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -31,6 +31,7 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/Index/IR/IndexDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
diff --git a/mlir/test/Conversion/GPUCommon/lower-global-id.mlir b/mlir/test/Conversion/GPUCommon/lower-global-id.mlir
new file mode 100644
index 0000000000000..b0274e0f9f290
--- /dev/null
+++ b/mlir/test/Conversion/GPUCommon/lower-global-id.mlir
@@ -0,0 +1,33 @@
+// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl | FileCheck %s --check-prefixes=ROCDL
+// RUN: mlir-opt %s -split-input-file -convert-gpu-to-nvvm | FileCheck %s --check-prefixes=NVVM
+
+gpu.module @kernel {
+  gpu.func @gpu_global_id() -> (index) {
+    %global_id_x = gpu.global_id x
+    gpu.return %global_id_x : index
+  }
+}
+
+// ROCDL-LABEL:   llvm.func @gpu_global_id() -> i64 {
+// ROCDL:           %[[WORKGROUP_0:.*]] = rocdl.workgroup.id.x : i32
+// ROCDL:           %[[SEXT_0:.*]] = llvm.sext %[[WORKGROUP_0]] : i32 to i64
+// ROCDL:           %[[WORKGROUP_1:.*]] = rocdl.workgroup.dim.x : i32
+// ROCDL:           %[[SEXT_1:.*]] = llvm.sext %[[WORKGROUP_1]] : i32 to i64
+// ROCDL:           %[[MUL_0:.*]] = llvm.mul %[[SEXT_0]], %[[SEXT_1]] : i64
+// ROCDL:           %[[WORKITEM_0:.*]] = rocdl.workitem.id.x : i32
+// ROCDL:           %[[SEXT_2:.*]] = llvm.sext %[[WORKITEM_0]] : i32 to i64
+// ROCDL:           %[[ADD_0:.*]] = llvm.add %[[SEXT_2]], %[[MUL_0]] : i64
+// ROCDL:           llvm.return %[[ADD_0]] : i64
+// ROCDL:         }
+
+// NVVM-LABEL:   llvm.func @gpu_global_id() -> i64 {
+// NVVM:           %[[READ_0:.*]] = nvvm.read.ptx.sreg.ctaid.x : i32
+// NVVM:           %[[SEXT_0:.*]] = llvm.sext %[[READ_0]] : i32 to i64
+// NVVM:           %[[READ_1:.*]] = nvvm.read.ptx.sreg.ntid.x : i32
+// NVVM:           %[[SEXT_1:.*]] = llvm.sext %[[READ_1]] : i32 to i64
+// NVVM:           %[[MUL_0:.*]] = llvm.mul %[[SEXT_0]], %[[SEXT_1]] : i64
+// NVVM:           %[[READ_2:.*]] = nvvm.read.ptx.sreg.tid.x : i32
+// NVVM:           %[[SEXT_2:.*]] = llvm.sext %[[READ_2]] : i32 to i64
+// NVVM:           %[[ADD_0:.*]] = llvm.add %[[SEXT_2]], %[[MUL_0]] : i64
+// NVVM:           llvm.return %[[ADD_0]] : i64
+// NVVM:         }

@CoTinker CoTinker changed the title [mlir][gpu] Add missing dependent dialect [mlir][gpu] Use arith dialect to lower gpu.global_id Dec 12, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[MLIR] convert-gpu-to-nvvm crashes with "index.mul created with unregistered dialect"

4 participants