Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mlir][gpu] Support dynamic_shared_memory Op with vector dialect #74475

Merged
merged 2 commits into from
Dec 6, 2023

Conversation

grypp
Copy link
Member

@grypp grypp commented Dec 5, 2023

gpu.dynamic_shared_memory currently does not get lowered when it is used with vector dialect. The reason is that vector-to-llvm conversion is not included in gpu-to-nvvm. This PR includes that and adds a test.

`gpu.dynamic_shared_memory` currently does not get lowered when it is used with vector dialect. The reason is that vector-to-llvm conversion is not included in gpu-to-nvvm. This PR includes that and adds a test.
@llvmbot
Copy link
Collaborator

llvmbot commented Dec 5, 2023

@llvm/pr-subscribers-mlir

Author: Guray Ozen (grypp)

Changes

gpu.dynamic_shared_memory currently does not get lowered when it is used with vector dialect. The reason is that vector-to-llvm conversion is not included in gpu-to-nvvm. This PR includes that and adds a test.


Full diff: https://github.com/llvm/llvm-project/pull/74475.diff

2 Files Affected:

  • (modified) mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp (+2)
  • (modified) mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir (+20)
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 4855fd187eb58..0e978ca0a6424 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -21,6 +21,7 @@
 #include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
+#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
@@ -282,6 +283,7 @@ struct LowerGpuOpsToNVVMOpsPass
     populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns);
     populateGpuToNVVMConversionPatterns(converter, llvmPatterns);
     populateGpuWMMAToNVVMConversionPatterns(converter, llvmPatterns);
+    populateVectorToLLVMConversionPatterns(converter, llvmPatterns);
     if (this->hasRedux)
       populateGpuSubgroupReduceOpLoweringPattern(converter, llvmPatterns);
     LLVMConversionTarget target(getContext());
diff --git a/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir b/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir
index 14f5302ac2002..6d50770f53543 100644
--- a/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir
@@ -46,3 +46,23 @@ gpu.module @kernel {
 //       CHECK: [[value:%.+]] = llvm.load
 //  CHECK-SAME:   : !llvm.ptr<1> -> f32
 //       CHECK: llvm.return [[value]]
+
+// -----
+
+gpu.module @kernel {
+  gpu.func @dynamic_shmem_with_vector(%arg1: memref<1xf32>) {
+    %0 = arith.constant 0 : index
+    %1 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
+    %2 = memref.view %1[%0][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<1xf32, #gpu.address_space<workgroup>>
+    %3 = vector.load %2[%0] : memref<1xf32, #gpu.address_space<workgroup>>, vector<1xf32>
+    vector.store %3, %arg1[%0] : memref<1xf32>, vector<1xf32>
+    gpu.return
+  }
+}
+
+// ROCDL: llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32} : !llvm.array<0 x i8>
+// NVVM: llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32, alignment = 16 : i64} : !llvm.array<0 x i8>
+// CHECK-LABEL:  llvm.func @dynamic_shmem_with_vector
+// CHECK: llvm.mlir.addressof @__dynamic_shmem__0 : !llvm.ptr<3>
+// CHECK: llvm.load %{{.*}} {alignment = 4 : i64} : !llvm.ptr<3> -> vector<1xf32>
+// CHECK: llvm.store

@llvmbot
Copy link
Collaborator

llvmbot commented Dec 5, 2023

@llvm/pr-subscribers-mlir-gpu

Author: Guray Ozen (grypp)

Changes

gpu.dynamic_shared_memory currently does not get lowered when it is used with vector dialect. The reason is that vector-to-llvm conversion is not included in gpu-to-nvvm. This PR includes that and adds a test.


Full diff: https://github.com/llvm/llvm-project/pull/74475.diff

2 Files Affected:

  • (modified) mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp (+2)
  • (modified) mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir (+20)
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 4855fd187eb58..0e978ca0a6424 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -21,6 +21,7 @@
 #include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
+#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
@@ -282,6 +283,7 @@ struct LowerGpuOpsToNVVMOpsPass
     populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns);
     populateGpuToNVVMConversionPatterns(converter, llvmPatterns);
     populateGpuWMMAToNVVMConversionPatterns(converter, llvmPatterns);
+    populateVectorToLLVMConversionPatterns(converter, llvmPatterns);
     if (this->hasRedux)
       populateGpuSubgroupReduceOpLoweringPattern(converter, llvmPatterns);
     LLVMConversionTarget target(getContext());
diff --git a/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir b/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir
index 14f5302ac2002..6d50770f53543 100644
--- a/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir
@@ -46,3 +46,23 @@ gpu.module @kernel {
 //       CHECK: [[value:%.+]] = llvm.load
 //  CHECK-SAME:   : !llvm.ptr<1> -> f32
 //       CHECK: llvm.return [[value]]
+
+// -----
+
+gpu.module @kernel {
+  gpu.func @dynamic_shmem_with_vector(%arg1: memref<1xf32>) {
+    %0 = arith.constant 0 : index
+    %1 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
+    %2 = memref.view %1[%0][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<1xf32, #gpu.address_space<workgroup>>
+    %3 = vector.load %2[%0] : memref<1xf32, #gpu.address_space<workgroup>>, vector<1xf32>
+    vector.store %3, %arg1[%0] : memref<1xf32>, vector<1xf32>
+    gpu.return
+  }
+}
+
+// ROCDL: llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32} : !llvm.array<0 x i8>
+// NVVM: llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32, alignment = 16 : i64} : !llvm.array<0 x i8>
+// CHECK-LABEL:  llvm.func @dynamic_shmem_with_vector
+// CHECK: llvm.mlir.addressof @__dynamic_shmem__0 : !llvm.ptr<3>
+// CHECK: llvm.load %{{.*}} {alignment = 4 : i64} : !llvm.ptr<3> -> vector<1xf32>
+// CHECK: llvm.store

@grypp grypp merged commit 641e05d into llvm:main Dec 6, 2023
4 checks passed
@grypp grypp deleted the add-vector-to-gpu branch December 6, 2023 09:42
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants