[AMDGPU] add utils for common usage #75097

erman-gurses · 2023-12-11T21:11:09Z

This PR generalizes helper functions for the common usage in swizzling implementation for AMDGPU and NVGPU Dialects.

llvmbot · 2023-12-11T21:11:37Z

@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-mlir-amdgpu

Author: None (erman-gurses)

Changes

This PR generalizes helper functions for the common usage in swizzling implementation for AMDGPU and NVGPU Dialects.

Full diff: https://github.com/llvm/llvm-project/pull/75097.diff

8 Files Affected:

(modified) mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td (+21)
(modified) mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h (-6)
(modified) mlir/include/mlir/Dialect/Utils/IndexingUtils.h (+13)
(modified) mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp (+16-1)
(modified) mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp (+3-2)
(modified) mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp (+1-1)
(modified) mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp (-40)
(modified) mlir/lib/Dialect/Utils/IndexingUtils.cpp (+42)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index ffb302fcedd732..110bdcb571fdf3 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -29,6 +29,27 @@ def AMDGPU_Dialect : Dialect {
     "gpu::GPUDialect"
   ];
   let useDefaultAttributePrinterParser = 1;
+  let extraClassDeclaration = [{
+    /// Return true if the given MemRefType has an integer address
+    /// space that matches the ROCDL shared memory address space or
+    /// is a gpu::AddressSpaceAttr attribute with value 'workgroup`.
+    static bool hasSharedMemoryAddressSpace(MemRefType type);
+
+    /// Return true if the given Attribute has an integer address
+    /// space that matches the ROCDL shared memory address space or
+    /// is a gpu::AddressSpaceAttr attribute with value 'workgroup`.
+    static bool isSharedMemoryAddressSpace(Attribute type);
+
+    /// Defines the MemRef memory space attribute numeric value that indicates
+    /// a memref is located in global memory. This should correspond to the
+    /// value used in ROCDL.
+    static constexpr unsigned kGlobaldMemoryAddressSpace = 1;
+
+    /// Defines the MemRef memory space attribute numeric value that indicates
+    /// a memref is located in shared memory. This should correspond to the
+    /// value used in ROCDL.
+    static constexpr unsigned kSharedMemoryAddressSpace = 3;
+  }];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h b/mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h
index 64bce441722af8..b845aef888ca6d 100644
--- a/mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h
+++ b/mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h
@@ -11,12 +11,6 @@
 namespace mlir {
 namespace nvgpu {
 
-/// Get the indices that the given load/store operation is operating on.
-Operation::operand_range getIndices(Operation *op);
-
-/// Set the indices that the given load/store operation is operating on.
-void setIndices(Operation *op, ArrayRef<Value> indices);
-
 /// Get the value that is stored by the given store operation.
 Value getValueStored(Operation *op);
 
diff --git a/mlir/include/mlir/Dialect/Utils/IndexingUtils.h b/mlir/include/mlir/Dialect/Utils/IndexingUtils.h
index f51a8b28b7548e..295add03127925 100644
--- a/mlir/include/mlir/Dialect/Utils/IndexingUtils.h
+++ b/mlir/include/mlir/Dialect/Utils/IndexingUtils.h
@@ -14,6 +14,9 @@
 #ifndef MLIR_DIALECT_UTILS_INDEXINGUTILS_H
 #define MLIR_DIALECT_UTILS_INDEXINGUTILS_H
 
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
+#include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -387,6 +390,16 @@ class StaticTileOffsetRange {
   IteratorTy beginValue;
   IteratorTy pastEndValue;
 };
+
+
+//===----------------------------------------------------------------------===//
+// load/store utils.
+//===----------------------------------------------------------------------===//
+/// Get the indices that the given load/store operation is operating on.
+Operation::operand_range getIndices(Operation *op);
+
+/// Set the indices that the given load/store operation is operating on.
+void setIndices(Operation *op, ArrayRef<Value> indices);
 } // namespace mlir
 
 #endif // MLIR_DIALECT_UTILS_INDEXINGUTILS_H
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index 2575ad4984814b..f54ec46c7476dd 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -9,7 +9,7 @@
 // This file implements the AMDGPU dialect and its operations.
 //
 //===----------------------------------------------------------------------===//
-
+#include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
@@ -43,6 +43,21 @@ void AMDGPUDialect::initialize() {
       >();
 }
 
+bool amdgpu::AMDGPUDialect::isSharedMemoryAddressSpace(Attribute memorySpace) {
+  if (!memorySpace)
+    return false;
+  if (auto intAttr = llvm::dyn_cast<IntegerAttr>(memorySpace))
+    return intAttr.getInt() == AMDGPUDialect::kSharedMemoryAddressSpace;
+  if (auto gpuAttr = llvm::dyn_cast<gpu::AddressSpaceAttr>(memorySpace))
+    return gpuAttr.getValue() == gpu::AddressSpace::Workgroup;
+  return false;
+}
+
+bool amdgpu::AMDGPUDialect::hasSharedMemoryAddressSpace(MemRefType type) {
+  Attribute memorySpace = type.getMemorySpace();
+  return isSharedMemoryAddressSpace(memorySpace);
+}
+
 //===----------------------------------------------------------------------===//
 // 8-bit float ops
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp b/mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp
index f63825cdc8f617..235cc28abd8f06 100644
--- a/mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp
+++ b/mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/NVGPU/Transforms/Transforms.h"
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
@@ -254,9 +255,9 @@ void nvgpu::createAsyncGroups(RewriterBase &rewriter, Operation *op,
       // bypass_l1 only possible with 16 byte transfer.
       Value token = rewriter.create<nvgpu::DeviceAsyncCopyOp>(
           writeOp->getLoc(), nvgpu::DeviceAsyncTokenType::get(op->getContext()),
-          /*dst=*/storeBase, /*dstIndices=*/nvgpu::getIndices(writeOp),
+          /*dst=*/storeBase, /*dstIndices=*/getIndices(writeOp),
           /*src=*/loadBase,
-          /*srcIndices=*/nvgpu::getIndices(readOp),
+          /*srcIndices=*/getIndices(readOp),
           /*dstElements=*/rewriter.getIndexAttr(numElements),
           /*srcElements=*/numReadElements,
           /*bypassL1=*/bypassL1 && sizeInBytes == 16 ? rewriter.getUnitAttr()
diff --git a/mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp b/mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp
index 693bb53cacff62..bfffeaa32fbe2c 100644
--- a/mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp
+++ b/mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp
@@ -9,7 +9,7 @@
 // This file implements transforms to optimize accesses to shared memory.
 //
 //===----------------------------------------------------------------------===//
-
+#include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/NVGPU/Transforms/Passes.h"
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
diff --git a/mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp b/mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp
index a782ed5ddd85e4..213b453d4bf9ff 100644
--- a/mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp
+++ b/mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp
@@ -15,46 +15,6 @@
 using namespace mlir;
 using namespace mlir::nvgpu;
 
-Operation::operand_range nvgpu::getIndices(Operation *op) {
-  if (auto ldmatrixOp = dyn_cast<LdMatrixOp>(op))
-    return ldmatrixOp.getIndices();
-  if (auto copyOp = dyn_cast<DeviceAsyncCopyOp>(op))
-    return copyOp.getDstIndices();
-  if (auto loadOp = dyn_cast<memref::LoadOp>(op))
-    return loadOp.getIndices();
-  if (auto storeOp = dyn_cast<memref::StoreOp>(op))
-    return storeOp.getIndices();
-  if (auto vectorReadOp = dyn_cast<vector::LoadOp>(op))
-    return vectorReadOp.getIndices();
-  if (auto vectorStoreOp = dyn_cast<vector::StoreOp>(op))
-    return vectorStoreOp.getIndices();
-  if (auto transferReadOp = dyn_cast<vector::TransferReadOp>(op))
-    return transferReadOp.getIndices();
-  if (auto transferWriteOp = dyn_cast<vector::TransferWriteOp>(op))
-    return transferWriteOp.getIndices();
-  llvm_unreachable("unsupported op type");
-}
-
-void nvgpu::setIndices(Operation *op, ArrayRef<Value> indices) {
-  if (auto ldmatrixOp = dyn_cast<LdMatrixOp>(op))
-    return ldmatrixOp.getIndicesMutable().assign(indices);
-  if (auto copyOp = dyn_cast<DeviceAsyncCopyOp>(op))
-    return copyOp.getDstIndicesMutable().assign(indices);
-  if (auto loadOp = dyn_cast<memref::LoadOp>(op))
-    return loadOp.getIndicesMutable().assign(indices);
-  if (auto storeOp = dyn_cast<memref::StoreOp>(op))
-    return storeOp.getIndicesMutable().assign(indices);
-  if (auto vectorReadOp = dyn_cast<vector::LoadOp>(op))
-    return vectorReadOp.getIndicesMutable().assign(indices);
-  if (auto vectorStoreOp = dyn_cast<vector::StoreOp>(op))
-    return vectorStoreOp.getIndicesMutable().assign(indices);
-  if (auto transferReadOp = dyn_cast<vector::TransferReadOp>(op))
-    return transferReadOp.getIndicesMutable().assign(indices);
-  if (auto transferWriteOp = dyn_cast<vector::TransferWriteOp>(op))
-    return transferWriteOp.getIndicesMutable().assign(indices);
-  llvm_unreachable("unsupported op type");
-}
-
 Value nvgpu::getValueStored(Operation *op) {
   if (auto storeOp = dyn_cast<memref::StoreOp>(op))
     return storeOp.getValueToStore();
diff --git a/mlir/lib/Dialect/Utils/IndexingUtils.cpp b/mlir/lib/Dialect/Utils/IndexingUtils.cpp
index bb8a0d5912d7c1..8bbd84918eed9b 100644
--- a/mlir/lib/Dialect/Utils/IndexingUtils.cpp
+++ b/mlir/lib/Dialect/Utils/IndexingUtils.cpp
@@ -8,6 +8,8 @@
 
 #include "mlir/Dialect/Utils/IndexingUtils.h"
 
+#include "mlir/Dialect/NVGPU/Transforms/Utils.h"
+#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinAttributes.h"
@@ -352,3 +354,43 @@ mlir::detail::TileOffsetRangeImpl::getDynamicTileOffsets(
   return mlir::computeElementwiseMul(tileCoords,
                                      getAffineConstantExprs(tileShape, ctx));
 }
+
+Operation::operand_range mlir::getIndices(Operation *op) {
+  if (auto ldmatrixOp = dyn_cast<nvgpu::LdMatrixOp>(op))
+    return ldmatrixOp.getIndices();
+  if (auto copyOp = dyn_cast<nvgpu::DeviceAsyncCopyOp>(op))
+    return copyOp.getDstIndices();
+  if (auto loadOp = dyn_cast<memref::LoadOp>(op))
+    return loadOp.getIndices();
+  if (auto storeOp = dyn_cast<memref::StoreOp>(op))
+    return storeOp.getIndices();
+  if (auto vectorReadOp = dyn_cast<vector::LoadOp>(op))
+    return vectorReadOp.getIndices();
+  if (auto vectorStoreOp = dyn_cast<vector::StoreOp>(op))
+    return vectorStoreOp.getIndices();
+  if (auto transferReadOp = dyn_cast<vector::TransferReadOp>(op))
+    return transferReadOp.getIndices();
+  if (auto transferWriteOp = dyn_cast<vector::TransferWriteOp>(op))
+    return transferWriteOp.getIndices();
+  llvm_unreachable("unsupported op type");
+}
+
+void mlir::setIndices(Operation *op, ArrayRef<Value> indices) {
+  if (auto ldmatrixOp = dyn_cast<nvgpu::LdMatrixOp>(op))
+    return ldmatrixOp.getIndicesMutable().assign(indices);
+  if (auto copyOp = dyn_cast<nvgpu::DeviceAsyncCopyOp>(op))
+    return copyOp.getDstIndicesMutable().assign(indices);
+  if (auto loadOp = dyn_cast<memref::LoadOp>(op))
+    return loadOp.getIndicesMutable().assign(indices);
+  if (auto storeOp = dyn_cast<memref::StoreOp>(op))
+    return storeOp.getIndicesMutable().assign(indices);
+  if (auto vectorReadOp = dyn_cast<vector::LoadOp>(op))
+    return vectorReadOp.getIndicesMutable().assign(indices);
+  if (auto vectorStoreOp = dyn_cast<vector::StoreOp>(op))
+    return vectorStoreOp.getIndicesMutable().assign(indices);
+  if (auto transferReadOp = dyn_cast<vector::TransferReadOp>(op))
+    return transferReadOp.getIndicesMutable().assign(indices);
+  if (auto transferWriteOp = dyn_cast<vector::TransferWriteOp>(op))
+    return transferWriteOp.getIndicesMutable().assign(indices);
+  llvm_unreachable("unsupported op type");
+}

llvmbot · 2023-12-11T21:11:37Z

@llvm/pr-subscribers-mlir-gpu

Author: None (erman-gurses)

Changes

This PR generalizes helper functions for the common usage in swizzling implementation for AMDGPU and NVGPU Dialects.

Full diff: https://github.com/llvm/llvm-project/pull/75097.diff

8 Files Affected:

(modified) mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td (+21)
(modified) mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h (-6)
(modified) mlir/include/mlir/Dialect/Utils/IndexingUtils.h (+13)
(modified) mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp (+16-1)
(modified) mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp (+3-2)
(modified) mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp (+1-1)
(modified) mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp (-40)
(modified) mlir/lib/Dialect/Utils/IndexingUtils.cpp (+42)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index ffb302fcedd732..110bdcb571fdf3 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -29,6 +29,27 @@ def AMDGPU_Dialect : Dialect {
     "gpu::GPUDialect"
   ];
   let useDefaultAttributePrinterParser = 1;
+  let extraClassDeclaration = [{
+    /// Return true if the given MemRefType has an integer address
+    /// space that matches the ROCDL shared memory address space or
+    /// is a gpu::AddressSpaceAttr attribute with value 'workgroup`.
+    static bool hasSharedMemoryAddressSpace(MemRefType type);
+
+    /// Return true if the given Attribute has an integer address
+    /// space that matches the ROCDL shared memory address space or
+    /// is a gpu::AddressSpaceAttr attribute with value 'workgroup`.
+    static bool isSharedMemoryAddressSpace(Attribute type);
+
+    /// Defines the MemRef memory space attribute numeric value that indicates
+    /// a memref is located in global memory. This should correspond to the
+    /// value used in ROCDL.
+    static constexpr unsigned kGlobaldMemoryAddressSpace = 1;
+
+    /// Defines the MemRef memory space attribute numeric value that indicates
+    /// a memref is located in shared memory. This should correspond to the
+    /// value used in ROCDL.
+    static constexpr unsigned kSharedMemoryAddressSpace = 3;
+  }];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h b/mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h
index 64bce441722af8..b845aef888ca6d 100644
--- a/mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h
+++ b/mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h
@@ -11,12 +11,6 @@
 namespace mlir {
 namespace nvgpu {
 
-/// Get the indices that the given load/store operation is operating on.
-Operation::operand_range getIndices(Operation *op);
-
-/// Set the indices that the given load/store operation is operating on.
-void setIndices(Operation *op, ArrayRef<Value> indices);
-
 /// Get the value that is stored by the given store operation.
 Value getValueStored(Operation *op);
 
diff --git a/mlir/include/mlir/Dialect/Utils/IndexingUtils.h b/mlir/include/mlir/Dialect/Utils/IndexingUtils.h
index f51a8b28b7548e..295add03127925 100644
--- a/mlir/include/mlir/Dialect/Utils/IndexingUtils.h
+++ b/mlir/include/mlir/Dialect/Utils/IndexingUtils.h
@@ -14,6 +14,9 @@
 #ifndef MLIR_DIALECT_UTILS_INDEXINGUTILS_H
 #define MLIR_DIALECT_UTILS_INDEXINGUTILS_H
 
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
+#include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -387,6 +390,16 @@ class StaticTileOffsetRange {
   IteratorTy beginValue;
   IteratorTy pastEndValue;
 };
+
+
+//===----------------------------------------------------------------------===//
+// load/store utils.
+//===----------------------------------------------------------------------===//
+/// Get the indices that the given load/store operation is operating on.
+Operation::operand_range getIndices(Operation *op);
+
+/// Set the indices that the given load/store operation is operating on.
+void setIndices(Operation *op, ArrayRef<Value> indices);
 } // namespace mlir
 
 #endif // MLIR_DIALECT_UTILS_INDEXINGUTILS_H
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index 2575ad4984814b..f54ec46c7476dd 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -9,7 +9,7 @@
 // This file implements the AMDGPU dialect and its operations.
 //
 //===----------------------------------------------------------------------===//
-
+#include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
@@ -43,6 +43,21 @@ void AMDGPUDialect::initialize() {
       >();
 }
 
+bool amdgpu::AMDGPUDialect::isSharedMemoryAddressSpace(Attribute memorySpace) {
+  if (!memorySpace)
+    return false;
+  if (auto intAttr = llvm::dyn_cast<IntegerAttr>(memorySpace))
+    return intAttr.getInt() == AMDGPUDialect::kSharedMemoryAddressSpace;
+  if (auto gpuAttr = llvm::dyn_cast<gpu::AddressSpaceAttr>(memorySpace))
+    return gpuAttr.getValue() == gpu::AddressSpace::Workgroup;
+  return false;
+}
+
+bool amdgpu::AMDGPUDialect::hasSharedMemoryAddressSpace(MemRefType type) {
+  Attribute memorySpace = type.getMemorySpace();
+  return isSharedMemoryAddressSpace(memorySpace);
+}
+
 //===----------------------------------------------------------------------===//
 // 8-bit float ops
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp b/mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp
index f63825cdc8f617..235cc28abd8f06 100644
--- a/mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp
+++ b/mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/NVGPU/Transforms/Transforms.h"
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
@@ -254,9 +255,9 @@ void nvgpu::createAsyncGroups(RewriterBase &rewriter, Operation *op,
       // bypass_l1 only possible with 16 byte transfer.
       Value token = rewriter.create<nvgpu::DeviceAsyncCopyOp>(
           writeOp->getLoc(), nvgpu::DeviceAsyncTokenType::get(op->getContext()),
-          /*dst=*/storeBase, /*dstIndices=*/nvgpu::getIndices(writeOp),
+          /*dst=*/storeBase, /*dstIndices=*/getIndices(writeOp),
           /*src=*/loadBase,
-          /*srcIndices=*/nvgpu::getIndices(readOp),
+          /*srcIndices=*/getIndices(readOp),
           /*dstElements=*/rewriter.getIndexAttr(numElements),
           /*srcElements=*/numReadElements,
           /*bypassL1=*/bypassL1 && sizeInBytes == 16 ? rewriter.getUnitAttr()
diff --git a/mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp b/mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp
index 693bb53cacff62..bfffeaa32fbe2c 100644
--- a/mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp
+++ b/mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp
@@ -9,7 +9,7 @@
 // This file implements transforms to optimize accesses to shared memory.
 //
 //===----------------------------------------------------------------------===//
-
+#include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/NVGPU/Transforms/Passes.h"
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
diff --git a/mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp b/mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp
index a782ed5ddd85e4..213b453d4bf9ff 100644
--- a/mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp
+++ b/mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp
@@ -15,46 +15,6 @@
 using namespace mlir;
 using namespace mlir::nvgpu;
 
-Operation::operand_range nvgpu::getIndices(Operation *op) {
-  if (auto ldmatrixOp = dyn_cast<LdMatrixOp>(op))
-    return ldmatrixOp.getIndices();
-  if (auto copyOp = dyn_cast<DeviceAsyncCopyOp>(op))
-    return copyOp.getDstIndices();
-  if (auto loadOp = dyn_cast<memref::LoadOp>(op))
-    return loadOp.getIndices();
-  if (auto storeOp = dyn_cast<memref::StoreOp>(op))
-    return storeOp.getIndices();
-  if (auto vectorReadOp = dyn_cast<vector::LoadOp>(op))
-    return vectorReadOp.getIndices();
-  if (auto vectorStoreOp = dyn_cast<vector::StoreOp>(op))
-    return vectorStoreOp.getIndices();
-  if (auto transferReadOp = dyn_cast<vector::TransferReadOp>(op))
-    return transferReadOp.getIndices();
-  if (auto transferWriteOp = dyn_cast<vector::TransferWriteOp>(op))
-    return transferWriteOp.getIndices();
-  llvm_unreachable("unsupported op type");
-}
-
-void nvgpu::setIndices(Operation *op, ArrayRef<Value> indices) {
-  if (auto ldmatrixOp = dyn_cast<LdMatrixOp>(op))
-    return ldmatrixOp.getIndicesMutable().assign(indices);
-  if (auto copyOp = dyn_cast<DeviceAsyncCopyOp>(op))
-    return copyOp.getDstIndicesMutable().assign(indices);
-  if (auto loadOp = dyn_cast<memref::LoadOp>(op))
-    return loadOp.getIndicesMutable().assign(indices);
-  if (auto storeOp = dyn_cast<memref::StoreOp>(op))
-    return storeOp.getIndicesMutable().assign(indices);
-  if (auto vectorReadOp = dyn_cast<vector::LoadOp>(op))
-    return vectorReadOp.getIndicesMutable().assign(indices);
-  if (auto vectorStoreOp = dyn_cast<vector::StoreOp>(op))
-    return vectorStoreOp.getIndicesMutable().assign(indices);
-  if (auto transferReadOp = dyn_cast<vector::TransferReadOp>(op))
-    return transferReadOp.getIndicesMutable().assign(indices);
-  if (auto transferWriteOp = dyn_cast<vector::TransferWriteOp>(op))
-    return transferWriteOp.getIndicesMutable().assign(indices);
-  llvm_unreachable("unsupported op type");
-}
-
 Value nvgpu::getValueStored(Operation *op) {
   if (auto storeOp = dyn_cast<memref::StoreOp>(op))
     return storeOp.getValueToStore();
diff --git a/mlir/lib/Dialect/Utils/IndexingUtils.cpp b/mlir/lib/Dialect/Utils/IndexingUtils.cpp
index bb8a0d5912d7c1..8bbd84918eed9b 100644
--- a/mlir/lib/Dialect/Utils/IndexingUtils.cpp
+++ b/mlir/lib/Dialect/Utils/IndexingUtils.cpp
@@ -8,6 +8,8 @@
 
 #include "mlir/Dialect/Utils/IndexingUtils.h"
 
+#include "mlir/Dialect/NVGPU/Transforms/Utils.h"
+#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinAttributes.h"
@@ -352,3 +354,43 @@ mlir::detail::TileOffsetRangeImpl::getDynamicTileOffsets(
   return mlir::computeElementwiseMul(tileCoords,
                                      getAffineConstantExprs(tileShape, ctx));
 }
+
+Operation::operand_range mlir::getIndices(Operation *op) {
+  if (auto ldmatrixOp = dyn_cast<nvgpu::LdMatrixOp>(op))
+    return ldmatrixOp.getIndices();
+  if (auto copyOp = dyn_cast<nvgpu::DeviceAsyncCopyOp>(op))
+    return copyOp.getDstIndices();
+  if (auto loadOp = dyn_cast<memref::LoadOp>(op))
+    return loadOp.getIndices();
+  if (auto storeOp = dyn_cast<memref::StoreOp>(op))
+    return storeOp.getIndices();
+  if (auto vectorReadOp = dyn_cast<vector::LoadOp>(op))
+    return vectorReadOp.getIndices();
+  if (auto vectorStoreOp = dyn_cast<vector::StoreOp>(op))
+    return vectorStoreOp.getIndices();
+  if (auto transferReadOp = dyn_cast<vector::TransferReadOp>(op))
+    return transferReadOp.getIndices();
+  if (auto transferWriteOp = dyn_cast<vector::TransferWriteOp>(op))
+    return transferWriteOp.getIndices();
+  llvm_unreachable("unsupported op type");
+}
+
+void mlir::setIndices(Operation *op, ArrayRef<Value> indices) {
+  if (auto ldmatrixOp = dyn_cast<nvgpu::LdMatrixOp>(op))
+    return ldmatrixOp.getIndicesMutable().assign(indices);
+  if (auto copyOp = dyn_cast<nvgpu::DeviceAsyncCopyOp>(op))
+    return copyOp.getDstIndicesMutable().assign(indices);
+  if (auto loadOp = dyn_cast<memref::LoadOp>(op))
+    return loadOp.getIndicesMutable().assign(indices);
+  if (auto storeOp = dyn_cast<memref::StoreOp>(op))
+    return storeOp.getIndicesMutable().assign(indices);
+  if (auto vectorReadOp = dyn_cast<vector::LoadOp>(op))
+    return vectorReadOp.getIndicesMutable().assign(indices);
+  if (auto vectorStoreOp = dyn_cast<vector::StoreOp>(op))
+    return vectorStoreOp.getIndicesMutable().assign(indices);
+  if (auto transferReadOp = dyn_cast<vector::TransferReadOp>(op))
+    return transferReadOp.getIndicesMutable().assign(indices);
+  if (auto transferWriteOp = dyn_cast<vector::TransferWriteOp>(op))
+    return transferWriteOp.getIndicesMutable().assign(indices);
+  llvm_unreachable("unsupported op type");
+}

llvmbot · 2023-12-11T21:11:38Z

@llvm/pr-subscribers-mlir-nvgpu

Author: None (erman-gurses)

Changes

This PR generalizes helper functions for the common usage in swizzling implementation for AMDGPU and NVGPU Dialects.

Full diff: https://github.com/llvm/llvm-project/pull/75097.diff

8 Files Affected:

(modified) mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td (+21)
(modified) mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h (-6)
(modified) mlir/include/mlir/Dialect/Utils/IndexingUtils.h (+13)
(modified) mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp (+16-1)
(modified) mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp (+3-2)
(modified) mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp (+1-1)
(modified) mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp (-40)
(modified) mlir/lib/Dialect/Utils/IndexingUtils.cpp (+42)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index ffb302fcedd732..110bdcb571fdf3 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -29,6 +29,27 @@ def AMDGPU_Dialect : Dialect {
     "gpu::GPUDialect"
   ];
   let useDefaultAttributePrinterParser = 1;
+  let extraClassDeclaration = [{
+    /// Return true if the given MemRefType has an integer address
+    /// space that matches the ROCDL shared memory address space or
+    /// is a gpu::AddressSpaceAttr attribute with value 'workgroup`.
+    static bool hasSharedMemoryAddressSpace(MemRefType type);
+
+    /// Return true if the given Attribute has an integer address
+    /// space that matches the ROCDL shared memory address space or
+    /// is a gpu::AddressSpaceAttr attribute with value 'workgroup`.
+    static bool isSharedMemoryAddressSpace(Attribute type);
+
+    /// Defines the MemRef memory space attribute numeric value that indicates
+    /// a memref is located in global memory. This should correspond to the
+    /// value used in ROCDL.
+    static constexpr unsigned kGlobaldMemoryAddressSpace = 1;
+
+    /// Defines the MemRef memory space attribute numeric value that indicates
+    /// a memref is located in shared memory. This should correspond to the
+    /// value used in ROCDL.
+    static constexpr unsigned kSharedMemoryAddressSpace = 3;
+  }];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h b/mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h
index 64bce441722af8..b845aef888ca6d 100644
--- a/mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h
+++ b/mlir/include/mlir/Dialect/NVGPU/Transforms/Utils.h
@@ -11,12 +11,6 @@
 namespace mlir {
 namespace nvgpu {
 
-/// Get the indices that the given load/store operation is operating on.
-Operation::operand_range getIndices(Operation *op);
-
-/// Set the indices that the given load/store operation is operating on.
-void setIndices(Operation *op, ArrayRef<Value> indices);
-
 /// Get the value that is stored by the given store operation.
 Value getValueStored(Operation *op);
 
diff --git a/mlir/include/mlir/Dialect/Utils/IndexingUtils.h b/mlir/include/mlir/Dialect/Utils/IndexingUtils.h
index f51a8b28b7548e..295add03127925 100644
--- a/mlir/include/mlir/Dialect/Utils/IndexingUtils.h
+++ b/mlir/include/mlir/Dialect/Utils/IndexingUtils.h
@@ -14,6 +14,9 @@
 #ifndef MLIR_DIALECT_UTILS_INDEXINGUTILS_H
 #define MLIR_DIALECT_UTILS_INDEXINGUTILS_H
 
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
+#include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -387,6 +390,16 @@ class StaticTileOffsetRange {
   IteratorTy beginValue;
   IteratorTy pastEndValue;
 };
+
+
+//===----------------------------------------------------------------------===//
+// load/store utils.
+//===----------------------------------------------------------------------===//
+/// Get the indices that the given load/store operation is operating on.
+Operation::operand_range getIndices(Operation *op);
+
+/// Set the indices that the given load/store operation is operating on.
+void setIndices(Operation *op, ArrayRef<Value> indices);
 } // namespace mlir
 
 #endif // MLIR_DIALECT_UTILS_INDEXINGUTILS_H
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index 2575ad4984814b..f54ec46c7476dd 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -9,7 +9,7 @@
 // This file implements the AMDGPU dialect and its operations.
 //
 //===----------------------------------------------------------------------===//
-
+#include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
@@ -43,6 +43,21 @@ void AMDGPUDialect::initialize() {
       >();
 }
 
+bool amdgpu::AMDGPUDialect::isSharedMemoryAddressSpace(Attribute memorySpace) {
+  if (!memorySpace)
+    return false;
+  if (auto intAttr = llvm::dyn_cast<IntegerAttr>(memorySpace))
+    return intAttr.getInt() == AMDGPUDialect::kSharedMemoryAddressSpace;
+  if (auto gpuAttr = llvm::dyn_cast<gpu::AddressSpaceAttr>(memorySpace))
+    return gpuAttr.getValue() == gpu::AddressSpace::Workgroup;
+  return false;
+}
+
+bool amdgpu::AMDGPUDialect::hasSharedMemoryAddressSpace(MemRefType type) {
+  Attribute memorySpace = type.getMemorySpace();
+  return isSharedMemoryAddressSpace(memorySpace);
+}
+
 //===----------------------------------------------------------------------===//
 // 8-bit float ops
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp b/mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp
index f63825cdc8f617..235cc28abd8f06 100644
--- a/mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp
+++ b/mlir/lib/Dialect/NVGPU/Transforms/CreateAsyncGroups.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/NVGPU/Transforms/Transforms.h"
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
@@ -254,9 +255,9 @@ void nvgpu::createAsyncGroups(RewriterBase &rewriter, Operation *op,
       // bypass_l1 only possible with 16 byte transfer.
       Value token = rewriter.create<nvgpu::DeviceAsyncCopyOp>(
           writeOp->getLoc(), nvgpu::DeviceAsyncTokenType::get(op->getContext()),
-          /*dst=*/storeBase, /*dstIndices=*/nvgpu::getIndices(writeOp),
+          /*dst=*/storeBase, /*dstIndices=*/getIndices(writeOp),
           /*src=*/loadBase,
-          /*srcIndices=*/nvgpu::getIndices(readOp),
+          /*srcIndices=*/getIndices(readOp),
           /*dstElements=*/rewriter.getIndexAttr(numElements),
           /*srcElements=*/numReadElements,
           /*bypassL1=*/bypassL1 && sizeInBytes == 16 ? rewriter.getUnitAttr()
diff --git a/mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp b/mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp
index 693bb53cacff62..bfffeaa32fbe2c 100644
--- a/mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp
+++ b/mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp
@@ -9,7 +9,7 @@
 // This file implements transforms to optimize accesses to shared memory.
 //
 //===----------------------------------------------------------------------===//
-
+#include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/NVGPU/Transforms/Passes.h"
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
diff --git a/mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp b/mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp
index a782ed5ddd85e4..213b453d4bf9ff 100644
--- a/mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp
+++ b/mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp
@@ -15,46 +15,6 @@
 using namespace mlir;
 using namespace mlir::nvgpu;
 
-Operation::operand_range nvgpu::getIndices(Operation *op) {
-  if (auto ldmatrixOp = dyn_cast<LdMatrixOp>(op))
-    return ldmatrixOp.getIndices();
-  if (auto copyOp = dyn_cast<DeviceAsyncCopyOp>(op))
-    return copyOp.getDstIndices();
-  if (auto loadOp = dyn_cast<memref::LoadOp>(op))
-    return loadOp.getIndices();
-  if (auto storeOp = dyn_cast<memref::StoreOp>(op))
-    return storeOp.getIndices();
-  if (auto vectorReadOp = dyn_cast<vector::LoadOp>(op))
-    return vectorReadOp.getIndices();
-  if (auto vectorStoreOp = dyn_cast<vector::StoreOp>(op))
-    return vectorStoreOp.getIndices();
-  if (auto transferReadOp = dyn_cast<vector::TransferReadOp>(op))
-    return transferReadOp.getIndices();
-  if (auto transferWriteOp = dyn_cast<vector::TransferWriteOp>(op))
-    return transferWriteOp.getIndices();
-  llvm_unreachable("unsupported op type");
-}
-
-void nvgpu::setIndices(Operation *op, ArrayRef<Value> indices) {
-  if (auto ldmatrixOp = dyn_cast<LdMatrixOp>(op))
-    return ldmatrixOp.getIndicesMutable().assign(indices);
-  if (auto copyOp = dyn_cast<DeviceAsyncCopyOp>(op))
-    return copyOp.getDstIndicesMutable().assign(indices);
-  if (auto loadOp = dyn_cast<memref::LoadOp>(op))
-    return loadOp.getIndicesMutable().assign(indices);
-  if (auto storeOp = dyn_cast<memref::StoreOp>(op))
-    return storeOp.getIndicesMutable().assign(indices);
-  if (auto vectorReadOp = dyn_cast<vector::LoadOp>(op))
-    return vectorReadOp.getIndicesMutable().assign(indices);
-  if (auto vectorStoreOp = dyn_cast<vector::StoreOp>(op))
-    return vectorStoreOp.getIndicesMutable().assign(indices);
-  if (auto transferReadOp = dyn_cast<vector::TransferReadOp>(op))
-    return transferReadOp.getIndicesMutable().assign(indices);
-  if (auto transferWriteOp = dyn_cast<vector::TransferWriteOp>(op))
-    return transferWriteOp.getIndicesMutable().assign(indices);
-  llvm_unreachable("unsupported op type");
-}
-
 Value nvgpu::getValueStored(Operation *op) {
   if (auto storeOp = dyn_cast<memref::StoreOp>(op))
     return storeOp.getValueToStore();
diff --git a/mlir/lib/Dialect/Utils/IndexingUtils.cpp b/mlir/lib/Dialect/Utils/IndexingUtils.cpp
index bb8a0d5912d7c1..8bbd84918eed9b 100644
--- a/mlir/lib/Dialect/Utils/IndexingUtils.cpp
+++ b/mlir/lib/Dialect/Utils/IndexingUtils.cpp
@@ -8,6 +8,8 @@
 
 #include "mlir/Dialect/Utils/IndexingUtils.h"
 
+#include "mlir/Dialect/NVGPU/Transforms/Utils.h"
+#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinAttributes.h"
@@ -352,3 +354,43 @@ mlir::detail::TileOffsetRangeImpl::getDynamicTileOffsets(
   return mlir::computeElementwiseMul(tileCoords,
                                      getAffineConstantExprs(tileShape, ctx));
 }
+
+Operation::operand_range mlir::getIndices(Operation *op) {
+  if (auto ldmatrixOp = dyn_cast<nvgpu::LdMatrixOp>(op))
+    return ldmatrixOp.getIndices();
+  if (auto copyOp = dyn_cast<nvgpu::DeviceAsyncCopyOp>(op))
+    return copyOp.getDstIndices();
+  if (auto loadOp = dyn_cast<memref::LoadOp>(op))
+    return loadOp.getIndices();
+  if (auto storeOp = dyn_cast<memref::StoreOp>(op))
+    return storeOp.getIndices();
+  if (auto vectorReadOp = dyn_cast<vector::LoadOp>(op))
+    return vectorReadOp.getIndices();
+  if (auto vectorStoreOp = dyn_cast<vector::StoreOp>(op))
+    return vectorStoreOp.getIndices();
+  if (auto transferReadOp = dyn_cast<vector::TransferReadOp>(op))
+    return transferReadOp.getIndices();
+  if (auto transferWriteOp = dyn_cast<vector::TransferWriteOp>(op))
+    return transferWriteOp.getIndices();
+  llvm_unreachable("unsupported op type");
+}
+
+void mlir::setIndices(Operation *op, ArrayRef<Value> indices) {
+  if (auto ldmatrixOp = dyn_cast<nvgpu::LdMatrixOp>(op))
+    return ldmatrixOp.getIndicesMutable().assign(indices);
+  if (auto copyOp = dyn_cast<nvgpu::DeviceAsyncCopyOp>(op))
+    return copyOp.getDstIndicesMutable().assign(indices);
+  if (auto loadOp = dyn_cast<memref::LoadOp>(op))
+    return loadOp.getIndicesMutable().assign(indices);
+  if (auto storeOp = dyn_cast<memref::StoreOp>(op))
+    return storeOp.getIndicesMutable().assign(indices);
+  if (auto vectorReadOp = dyn_cast<vector::LoadOp>(op))
+    return vectorReadOp.getIndicesMutable().assign(indices);
+  if (auto vectorStoreOp = dyn_cast<vector::StoreOp>(op))
+    return vectorStoreOp.getIndicesMutable().assign(indices);
+  if (auto transferReadOp = dyn_cast<vector::TransferReadOp>(op))
+    return transferReadOp.getIndicesMutable().assign(indices);
+  if (auto transferWriteOp = dyn_cast<vector::TransferWriteOp>(op))
+    return transferWriteOp.getIndicesMutable().assign(indices);
+  llvm_unreachable("unsupported op type");
+}

github-actions · 2023-12-11T21:13:31Z

:white_check_mark: With the latest revision this PR passed the C/C++ code formatter.

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

qedawkins · 2023-12-11T21:40:53Z

mlir/lib/Dialect/Utils/IndexingUtils.cpp

@@ -8,6 +8,8 @@

 #include "mlir/Dialect/Utils/IndexingUtils.h"

+#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
+#include "mlir/Dialect/NVGPU/Transforms/Utils.h"


I don't think IndexingUtils.cpp can take a dependency on NVGPU or Vector. I assume the portion of the helper that you want to reuse isn't specific to any NVGPU ops; maybe split out the vector and memref portions and make them available in the vector dialect, or just duplicate the portion of the helper that is needed in AMDGPU?

That is a good point. We can discuss about it today but probably I will create separated .cpp util file for that. Main reason for this patch to avoid code duplication as mush as we can.

qedawkins · 2023-12-11T21:45:01Z

mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp

+  if (!memorySpace)
+    return false;
+  if (auto intAttr = llvm::dyn_cast<IntegerAttr>(memorySpace))
+    return intAttr.getInt() == AMDGPUDialect::kSharedMemoryAddressSpace;


My understanding is that the convention is to keep the shared memory address space general for as long as possible, which to me seems like ROCDL would be the expected translation point. Can you elaborate on the reason to add an earlier dialect specific value for this?

(not necessarily a direct stakeholder here, mainly concerned about how magic memory space constants can be difficult to manage).

Yeah, but this is, I think, designed to not break in the case where the shared memory's already been converted.

That being said, I'd argue for the value of 3 to be over in rocdl, and for his "is it shared memory" thing being a GPUDialect utility

+1 that's how I'm interpreting this as well. Using the GPUDialect utility for as long as possible seems consistent with other dialects and IMHO is more readable. It would be good to understand what cases we're hitting that has this early conversion of the shared memory space and whether this is the right level to handle it.

Thanks for the comments, based on the that and some discussions, it will be better bring a new PR that includes swizzling implementation as well.

krzysz00 · 2023-12-12T15:35:04Z

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

+    /// Defines the MemRef memory space attribute numeric value that indicates
+    /// a memref is located in shared memory. This should correspond to the
+    /// value used in ROCDL.
+    static constexpr unsigned kSharedMemoryAddressSpace = 3;


I'd add the incantations for private address space as well if we're doing this

Sure, will do that.

krzysz00 · 2023-12-12T15:36:25Z

mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp

+  if (!memorySpace)
+    return false;
+  if (auto intAttr = llvm::dyn_cast<IntegerAttr>(memorySpace))
+    return intAttr.getInt() == AMDGPUDialect::kSharedMemoryAddressSpace;


Yeah, but this is, I think, designed to not break in the case where the shared memory's already been converted.

That being said, I'd argue for the value of 3 to be over in rocdl, and for his "is it shared memory" thing being a GPUDialect utility

krzysz00 · 2023-12-12T15:37:31Z

mlir/lib/Dialect/NVGPU/Transforms/Utils.cpp

@@ -15,46 +15,6 @@
 using namespace mlir;
 using namespace mlir::nvgpu;

-Operation::operand_range nvgpu::getIndices(Operation *op) {


... this could be an interface or something, idk

Or an annotation on arguments

Or perhaps refactored out to GPU Common Indexing Utils or something

erman-gurses · 2023-12-12T19:17:27Z

Thanks for the comments, based on the that and some discussions, it will be better bring a new PR that includes swizzling implementation as well.

llvmbot added backend:AMDGPU mlir:gpu mlir mlir:amdgpu mlir:nvgpu labels Dec 11, 2023

erman-gurses force-pushed the eg_utils branch 2 times, most recently from fc930fe to b58b5b5 Compare December 11, 2023 21:22

harsh-nod requested review from krzysz00, qedawkins, nirvedhmeshram and ThomasRaoux December 11, 2023 21:24

erman-gurses force-pushed the eg_utils branch from b58b5b5 to ea8ebf3 Compare December 11, 2023 21:35

[AMDGPU] add utils for common usage

29604f2

erman-gurses force-pushed the eg_utils branch from ea8ebf3 to 29604f2 Compare December 11, 2023 21:40

qedawkins requested changes Dec 11, 2023

View reviewed changes

krzysz00 reviewed Dec 12, 2023

View reviewed changes

erman-gurses closed this Dec 12, 2023

[AMDGPU] add utils for common usage #75097

[AMDGPU] add utils for common usage #75097

Uh oh!

Conversation

erman-gurses commented Dec 11, 2023

Uh oh!

llvmbot commented Dec 11, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Dec 11, 2023

Uh oh!

llvmbot commented Dec 11, 2023

Uh oh!

github-actions bot commented Dec 11, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

erman-gurses Dec 12, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

erman-gurses commented Dec 12, 2023

Uh oh!

Uh oh!

llvmbot commented Dec 11, 2023 •

edited

Loading

github-actions bot commented Dec 11, 2023 •

edited

Loading

erman-gurses Dec 12, 2023 •

edited

Loading