-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[mlir][gpu] Use arith dialect to lower gpu.global_id
#171614
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This PR adds missing dependent index dialect for ConvertGpuOpsToNVVMOps and ConvertGpuOpsToROCDLOps pass.
|
@llvm/pr-subscribers-mlir Author: Longsheng Mou (CoTinker) ChangesThis PR adds missing dependent Full diff: https://github.com/llvm/llvm-project/pull/171614.diff 4 Files Affected:
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index fcbaf3ccc1486..fc8c3b2e1c169 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -618,6 +618,7 @@ def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> {
"cf::ControlFlowDialect",
"memref::MemRefDialect",
"NVVM::NVVMDialect",
+ "index::IndexDialect"
];
let options = [
Option<"indexBitwidth", "index-bitwidth", "unsigned",
@@ -647,6 +648,7 @@ def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> {
"amdgpu::AMDGPUDialect",
"cf::ControlFlowDialect",
"memref::MemRefDialect",
+ "index::IndexDialect"
];
let options = [
Option<"chipset", "chipset", "std::string",
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 5848489274c13..4432b06bf1560 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -23,6 +23,7 @@
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/Index/IR/IndexDialect.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/Math/IR/Math.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index c03f3a5d3889c..3c356ee75f79d 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -31,6 +31,7 @@
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/Index/IR/IndexDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
diff --git a/mlir/test/Conversion/GPUCommon/lower-global-id.mlir b/mlir/test/Conversion/GPUCommon/lower-global-id.mlir
new file mode 100644
index 0000000000000..b0274e0f9f290
--- /dev/null
+++ b/mlir/test/Conversion/GPUCommon/lower-global-id.mlir
@@ -0,0 +1,33 @@
+// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl | FileCheck %s --check-prefixes=ROCDL
+// RUN: mlir-opt %s -split-input-file -convert-gpu-to-nvvm | FileCheck %s --check-prefixes=NVVM
+
+gpu.module @kernel {
+ gpu.func @gpu_global_id() -> (index) {
+ %global_id_x = gpu.global_id x
+ gpu.return %global_id_x : index
+ }
+}
+
+// ROCDL-LABEL: llvm.func @gpu_global_id() -> i64 {
+// ROCDL: %[[WORKGROUP_0:.*]] = rocdl.workgroup.id.x : i32
+// ROCDL: %[[SEXT_0:.*]] = llvm.sext %[[WORKGROUP_0]] : i32 to i64
+// ROCDL: %[[WORKGROUP_1:.*]] = rocdl.workgroup.dim.x : i32
+// ROCDL: %[[SEXT_1:.*]] = llvm.sext %[[WORKGROUP_1]] : i32 to i64
+// ROCDL: %[[MUL_0:.*]] = llvm.mul %[[SEXT_0]], %[[SEXT_1]] : i64
+// ROCDL: %[[WORKITEM_0:.*]] = rocdl.workitem.id.x : i32
+// ROCDL: %[[SEXT_2:.*]] = llvm.sext %[[WORKITEM_0]] : i32 to i64
+// ROCDL: %[[ADD_0:.*]] = llvm.add %[[SEXT_2]], %[[MUL_0]] : i64
+// ROCDL: llvm.return %[[ADD_0]] : i64
+// ROCDL: }
+
+// NVVM-LABEL: llvm.func @gpu_global_id() -> i64 {
+// NVVM: %[[READ_0:.*]] = nvvm.read.ptx.sreg.ctaid.x : i32
+// NVVM: %[[SEXT_0:.*]] = llvm.sext %[[READ_0]] : i32 to i64
+// NVVM: %[[READ_1:.*]] = nvvm.read.ptx.sreg.ntid.x : i32
+// NVVM: %[[SEXT_1:.*]] = llvm.sext %[[READ_1]] : i32 to i64
+// NVVM: %[[MUL_0:.*]] = llvm.mul %[[SEXT_0]], %[[SEXT_1]] : i64
+// NVVM: %[[READ_2:.*]] = nvvm.read.ptx.sreg.tid.x : i32
+// NVVM: %[[SEXT_2:.*]] = llvm.sext %[[READ_2]] : i32 to i64
+// NVVM: %[[ADD_0:.*]] = llvm.add %[[SEXT_2]], %[[MUL_0]] : i64
+// NVVM: llvm.return %[[ADD_0]] : i64
+// NVVM: }
|
|
@llvm/pr-subscribers-mlir-gpu Author: Longsheng Mou (CoTinker) ChangesThis PR adds missing dependent Full diff: https://github.com/llvm/llvm-project/pull/171614.diff 4 Files Affected:
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index fcbaf3ccc1486..fc8c3b2e1c169 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -618,6 +618,7 @@ def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> {
"cf::ControlFlowDialect",
"memref::MemRefDialect",
"NVVM::NVVMDialect",
+ "index::IndexDialect"
];
let options = [
Option<"indexBitwidth", "index-bitwidth", "unsigned",
@@ -647,6 +648,7 @@ def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> {
"amdgpu::AMDGPUDialect",
"cf::ControlFlowDialect",
"memref::MemRefDialect",
+ "index::IndexDialect"
];
let options = [
Option<"chipset", "chipset", "std::string",
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 5848489274c13..4432b06bf1560 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -23,6 +23,7 @@
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/Index/IR/IndexDialect.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/Math/IR/Math.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index c03f3a5d3889c..3c356ee75f79d 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -31,6 +31,7 @@
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/Index/IR/IndexDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
diff --git a/mlir/test/Conversion/GPUCommon/lower-global-id.mlir b/mlir/test/Conversion/GPUCommon/lower-global-id.mlir
new file mode 100644
index 0000000000000..b0274e0f9f290
--- /dev/null
+++ b/mlir/test/Conversion/GPUCommon/lower-global-id.mlir
@@ -0,0 +1,33 @@
+// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl | FileCheck %s --check-prefixes=ROCDL
+// RUN: mlir-opt %s -split-input-file -convert-gpu-to-nvvm | FileCheck %s --check-prefixes=NVVM
+
+gpu.module @kernel {
+ gpu.func @gpu_global_id() -> (index) {
+ %global_id_x = gpu.global_id x
+ gpu.return %global_id_x : index
+ }
+}
+
+// ROCDL-LABEL: llvm.func @gpu_global_id() -> i64 {
+// ROCDL: %[[WORKGROUP_0:.*]] = rocdl.workgroup.id.x : i32
+// ROCDL: %[[SEXT_0:.*]] = llvm.sext %[[WORKGROUP_0]] : i32 to i64
+// ROCDL: %[[WORKGROUP_1:.*]] = rocdl.workgroup.dim.x : i32
+// ROCDL: %[[SEXT_1:.*]] = llvm.sext %[[WORKGROUP_1]] : i32 to i64
+// ROCDL: %[[MUL_0:.*]] = llvm.mul %[[SEXT_0]], %[[SEXT_1]] : i64
+// ROCDL: %[[WORKITEM_0:.*]] = rocdl.workitem.id.x : i32
+// ROCDL: %[[SEXT_2:.*]] = llvm.sext %[[WORKITEM_0]] : i32 to i64
+// ROCDL: %[[ADD_0:.*]] = llvm.add %[[SEXT_2]], %[[MUL_0]] : i64
+// ROCDL: llvm.return %[[ADD_0]] : i64
+// ROCDL: }
+
+// NVVM-LABEL: llvm.func @gpu_global_id() -> i64 {
+// NVVM: %[[READ_0:.*]] = nvvm.read.ptx.sreg.ctaid.x : i32
+// NVVM: %[[SEXT_0:.*]] = llvm.sext %[[READ_0]] : i32 to i64
+// NVVM: %[[READ_1:.*]] = nvvm.read.ptx.sreg.ntid.x : i32
+// NVVM: %[[SEXT_1:.*]] = llvm.sext %[[READ_1]] : i32 to i64
+// NVVM: %[[MUL_0:.*]] = llvm.mul %[[SEXT_0]], %[[SEXT_1]] : i64
+// NVVM: %[[READ_2:.*]] = nvvm.read.ptx.sreg.tid.x : i32
+// NVVM: %[[SEXT_2:.*]] = llvm.sext %[[READ_2]] : i32 to i64
+// NVVM: %[[ADD_0:.*]] = llvm.add %[[SEXT_2]], %[[MUL_0]] : i64
+// NVVM: llvm.return %[[ADD_0]] : i64
+// NVVM: }
|
arith dialect to lower gpu.global_id
This PR lowers the
gpu.global_idop using the arith dialect instead of the index dialect. Fixes #171303.