Skip to content

Conversation

@amd-eochoalo
Copy link
Contributor

@amd-eochoalo amd-eochoalo commented Dec 9, 2025

  • changes workgroup mask's type from i16 to vector<16xi1>
  • changes pad_amount and pad_interval from Index to I32
  • adds lit tests for padEnable, iteration and dynamic cases
  • adds TODO for a future instrumentation pass to validate inputs
  • adds descriptor groups 2 and 3

@llvmbot
Copy link
Member

llvmbot commented Dec 10, 2025

@llvm/pr-subscribers-mlir-gpu
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-mlir-amdgpu

Author: Erick Ochoa Lopez (amd-eochoalo)

Changes
  • changes workgroup mask's type from i16 to vector<16xi1>
  • changes pad_amount and pad_interval from Index to I32
  • adds lit tests for padEnable, iteration and dynamic cases
  • adds TODO for a future instrumentation pass to validate inputs
  • adds descriptor groups 2 and 3

Patch is 59.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171498.diff

5 Files Affected:

  • (modified) mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td (+12-7)
  • (modified) mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp (+306-56)
  • (modified) mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir (+310-27)
  • (modified) mlir/test/Dialect/AMDGPU/amdgpu-make-dma-descriptor-fold.mlir (+3-3)
  • (modified) mlir/test/Dialect/AMDGPU/ops.mlir (+6-6)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 56160d3e8fe85..6fbc90ded5824 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -110,9 +110,14 @@ def AMDGPU_TDMDescriptorType : AMDGPU_Type<"TDMDescriptor", "tdm_descriptor"> {
     This type is opaque and corresponds to the two or four descriptor groups
     used in tensor_load_to_lds or tensor_store_from_lds.
   }];
-
 }
 
+class AMDGPU_ConcreteVector<Type elem, int length> :
+  FixedVectorOfLengthAndType<[length], [elem]>,
+  BuildableType<
+    "::mlir::VectorType::get({" # length # "} ,"
+      # elem.builderCall # ")">;
+
 //===----------------------------------------------------------------------===//
 // AMDGPU Op definitions
 //===----------------------------------------------------------------------===//
@@ -1296,14 +1301,14 @@ def AMDGPU_MakeDmaDescriptorOp :
     DenseI64ArrayAttr: $global_static_strides,
     Variadic<Index>: $shared_dynamic_sizes,
     DenseI64ArrayAttr: $shared_static_sizes,
-    Optional<I16>: $workgroup_mask,
+    Optional<AMDGPU_ConcreteVector<I1, 16>>: $workgroup_mask,
     Optional<I1>: $early_timeout,
-    Optional<Index>: $pad_amount,
-    Optional<Index>: $pad_interval,
+    Optional<I32>: $pad_amount,
+    Optional<I32>: $pad_interval,
     Optional<AnyMemRef>: $atomic_barrier_address,
     Variadic<Index>: $atomic_barrier_indices,
     Optional<Index>: $global_increment,
-    Optional<Index>: $lds_increment,
+    Optional<I32>: $lds_increment,
     Optional<Index>: $iteration_count)>,
   Results<(outs AMDGPU_TDMDescriptorType: $desc)> {
 
@@ -1335,7 +1340,7 @@ def AMDGPU_MakeDmaDescriptorOp :
      2D and 3D tensors may be iterated over by setting $global_increment, $lds_increment, and $iteration_count.
      $global_increment determines how much to increment the starting global memory address per iteration in units of the $base's element type.
      $lds_increment determines how much to increment the starting LDS address per iteration in units of the $base's element type.
-     $iterate_count determines how many times to iterate.
+     $iterate_count determines how many times to iterate, it must be a value in the inclusive interval [1, 256].
 
      ```mlir
       // Example of moving a two-dimensional tensor to LDS.
@@ -1345,7 +1350,7 @@ def AMDGPU_MakeDmaDescriptorOp :
 
       // Example of moving a two dimension tensor to LDS where padding is applied after every integer.
       %base = amdgpu.make_dma_base %global[0, 0], %lds[0, 0] : memref<32x32xi32>, memref<64x64xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
-      %descriptor = amdgpu.make_dma_descriptor %base globalSize [32, 32] globalStride [32, 1] sharedSize [64, 64] padding(%pad_amount pad_every %pad_interval) : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
+      %descriptor = amdgpu.make_dma_descriptor %base globalSize [32, 32] globalStride [32, 1] sharedSize [64, 64] padShared(%pad_amount every %pad_interval) : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
       amdgpu.tensor_load_to_lds %descriptor : !amdgpu.tdm_descriptor
      ```
   }];
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 7584b17075225..592731778a8e4 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -2381,6 +2381,8 @@ struct AMDGPUMakeDmaDescriptorLowering
     if (!mask)
       return sgpr0;
 
+    Type i16 = rewriter.getI16Type();
+    mask = LLVM::BitcastOp::create(rewriter, loc, i16, mask);
     Type i32 = rewriter.getI32Type();
     Value extendedMask = LLVM::ZExtOp::create(rewriter, loc, i32, mask);
     return setValueAtOffset(rewriter, loc, sgpr0, extendedMask, 0);
@@ -2389,21 +2391,19 @@ struct AMDGPUMakeDmaDescriptorLowering
   Value setDataSize(MakeDmaDescriptorOp op, OpAdaptor adaptor,
                     ConversionPatternRewriter &rewriter, Location loc,
                     Value sgpr0, ArrayRef<Value> consts) const {
-    // Compute data_size.
     unsigned elementTypeWidthInBits = op.getElementTypeWidth();
     assert(
         llvm::is_contained<unsigned>({8, 16, 32, 64}, elementTypeWidthInBits) &&
         "expected type width to be 8, 16, 32, or 64.");
-    int64_t dataSize = llvm::Log2_32(elementTypeWidthInBits / 8);
-    Value size = createI32Constant(rewriter, loc, dataSize);
+    int64_t idx = llvm::Log2_32(elementTypeWidthInBits / 8);
+    Value size = consts[idx];
     return setValueAtOffset(rewriter, loc, sgpr0, size, 16);
   }
 
   Value setAtomicBarrier(MakeDmaDescriptorOp op, OpAdaptor adaptor,
                          ConversionPatternRewriter &rewriter, Location loc,
                          Value sgpr0, ArrayRef<Value> consts) const {
-    bool atomic_barrier_enable = adaptor.getAtomicBarrierAddress() != nullptr;
-    if (!atomic_barrier_enable)
+    if (!adaptor.getAtomicBarrierAddress())
       return sgpr0;
 
     return setValueAtOffset(rewriter, loc, sgpr0, consts[1], 18);
@@ -2412,19 +2412,16 @@ struct AMDGPUMakeDmaDescriptorLowering
   Value setIterateEnable(MakeDmaDescriptorOp op, OpAdaptor adaptor,
                          ConversionPatternRewriter &rewriter, Location loc,
                          Value sgpr0, ArrayRef<Value> consts) const {
-    bool iterate_enable = adaptor.getGlobalIncrement() != nullptr;
-    if (!iterate_enable)
+    if (!adaptor.getGlobalIncrement())
       return sgpr0;
 
-    // TODO: In future PR, add other required fields for iteration.
     return setValueAtOffset(rewriter, loc, sgpr0, consts[1], 19);
   }
 
   Value setPadEnable(MakeDmaDescriptorOp op, OpAdaptor adaptor,
                      ConversionPatternRewriter &rewriter, Location loc,
                      Value sgpr0, ArrayRef<Value> consts) const {
-    bool pad_enable = op.getPadAmount() != nullptr;
-    if (!pad_enable)
+    if (!op.getPadAmount())
       return sgpr0;
 
     return setValueAtOffset(rewriter, loc, sgpr0, consts[1], 20);
@@ -2442,13 +2439,16 @@ struct AMDGPUMakeDmaDescriptorLowering
   Value setPadInterval(MakeDmaDescriptorOp op, OpAdaptor adaptor,
                        ConversionPatternRewriter &rewriter, Location loc,
                        Value sgpr0, ArrayRef<Value> consts) const {
-    bool pad_enable = op.getPadAmount() != nullptr;
-    if (!pad_enable)
+    if (!op.getPadAmount())
       return sgpr0;
 
+    // pre-condition: padInterval can be a power of two between 2 and 256.
+    // TODO: Validation if the value breaks the pre-condition.
+    // If the pre-condition fails, there is a possibility of
+    // affecting the higher bits. In a following PR add a flag
+    // that instruments conditions that need to be checked at runtime.
     IntegerType i32 = rewriter.getI32Type();
     Value padInterval = adaptor.getPadInterval();
-    // pre-condition: padInterval can be a power of two between 2 and 256.
     padInterval = LLVM::CountTrailingZerosOp::create(rewriter, loc, i32,
                                                      padInterval, false);
     padInterval = LLVM::SubOp::create(rewriter, loc, padInterval, consts[1]);
@@ -2459,12 +2459,15 @@ struct AMDGPUMakeDmaDescriptorLowering
   Value setPadAmount(MakeDmaDescriptorOp op, OpAdaptor adaptor,
                      ConversionPatternRewriter &rewriter, Location loc,
                      Value sgpr0, ArrayRef<Value> consts) const {
-    bool pad_enable = op.getPadAmount() != nullptr;
-    if (!pad_enable)
+    if (!op.getPadAmount())
       return sgpr0;
 
-    Value padAmount = adaptor.getPadAmount();
     // pre-condition: padAmount is a value between 1-128.
+    // TODO: Validation if the value breaks the pre-condition.
+    // If the pre-condition fails, there is a possibility of
+    // affecting the higher bits. In a following PR add a flag
+    // that instruments conditions that need to be checked at runtime.
+    Value padAmount = adaptor.getPadAmount();
     padAmount = LLVM::SubOp::create(rewriter, loc, padAmount, consts[1]);
     // post-condition: padAmount is a value between 0-127.
     return setValueAtOffset(rewriter, loc, sgpr0, padAmount, 25);
@@ -2474,8 +2477,7 @@ struct AMDGPUMakeDmaDescriptorLowering
                                 ConversionPatternRewriter &rewriter,
                                 Location loc, Value sgpr1,
                                 ArrayRef<Value> consts) const {
-    bool atomic_barrier_enable = adaptor.getAtomicBarrierAddress() != nullptr;
-    if (!atomic_barrier_enable)
+    if (!adaptor.getAtomicBarrierAddress())
       return sgpr1;
 
     Value atomicBarrierAddress = adaptor.getAtomicBarrierAddress();
@@ -2488,6 +2490,9 @@ struct AMDGPUMakeDmaDescriptorLowering
     IntegerType i32 = rewriter.getI32Type();
     // pre-condition: atomicBarrierAddress is aligned to 8 bytes which implies
     // that the 3 LSBs are zero.
+    // TODO: Validation if the value breaks the pre-condition.
+    // In a following PR add a flag that instruments conditions that need to be
+    // checked at runtime.
     atomicBarrierAddress =
         LLVM::PtrToIntOp::create(rewriter, loc, i32, atomicBarrierAddress);
     atomicBarrierAddress =
@@ -2498,65 +2503,91 @@ struct AMDGPUMakeDmaDescriptorLowering
     return setValueAtOffset(rewriter, loc, sgpr1, atomicBarrierAddress, 32);
   }
 
-  std::pair<Value, Value> setTensorDim0(MakeDmaDescriptorOp op,
+  std::pair<Value, Value> setTensorDimX(MakeDmaDescriptorOp op,
                                         OpAdaptor adaptor,
                                         ConversionPatternRewriter &rewriter,
                                         Location loc, Value sgpr1, Value sgpr2,
-                                        ArrayRef<Value> consts) const {
-    SmallVector<OpFoldResult> mixedGlobalSizes = op.getMixedGlobalSizes();
-    OpFoldResult tensorDim0OpFoldResult = mixedGlobalSizes.back();
-    Value tensorDim0;
-    if (auto attr = dyn_cast<Attribute>(tensorDim0OpFoldResult))
-      tensorDim0 =
+                                        ArrayRef<Value> consts, uint64_t dimX,
+                                        uint32_t offset) const {
+    ArrayRef<int64_t> globalStaticSizes = adaptor.getGlobalStaticSizes();
+    ValueRange globalDynamicSizes = adaptor.getGlobalDynamicSizes();
+    SmallVector<OpFoldResult> mixedGlobalSizes =
+        getMixedValues(globalStaticSizes, globalDynamicSizes, rewriter);
+    if (mixedGlobalSizes.size() <= dimX)
+      return {sgpr1, sgpr2};
+
+    OpFoldResult tensorDimXOpFoldResult = *(mixedGlobalSizes.rbegin() + dimX);
+    // pre-condition: tensorDimX is less than 2^48-1
+    // TODO: Validation if the value breaks the pre-condition.
+    // If the pre-condition fails, there is a possibility of
+    // affecting the higher bits. In a following PR add a flag
+    // that instruments conditions that need to be checked at runtime.
+    // This could also be fixed by saying that mixedGlobalSizes is a
+    // DynamicI48List.
+    Value tensorDimX;
+    if (auto attr = dyn_cast<Attribute>(tensorDimXOpFoldResult))
+      tensorDimX =
           createI32Constant(rewriter, loc, cast<IntegerAttr>(attr).getInt());
-    else
-      tensorDim0 = cast<Value>(tensorDim0OpFoldResult);
+    else {
+      IntegerType i32 = rewriter.getI32Type();
+      tensorDimX = cast<Value>(tensorDimXOpFoldResult);
+      tensorDimX = LLVM::TruncOp::create(rewriter, loc, i32, tensorDimX);
+    }
+
+    sgpr1 = setValueAtOffset(rewriter, loc, sgpr1, tensorDimX, offset);
 
     Value c16 = createI32Constant(rewriter, loc, 16);
-    Value tensorDim0High = LLVM::LShrOp::create(rewriter, loc, tensorDim0, c16);
-    sgpr1 = setValueAtOffset(rewriter, loc, sgpr1, tensorDim0, 48);
-    sgpr2 = setValueAtOffset(rewriter, loc, sgpr2, tensorDim0High, 48 + 16);
+    Value tensorDimXHigh = LLVM::LShrOp::create(rewriter, loc, tensorDimX, c16);
+    sgpr2 = setValueAtOffset(rewriter, loc, sgpr2, tensorDimXHigh, offset + 16);
     return {sgpr1, sgpr2};
   }
 
+  std::pair<Value, Value> setTensorDim0(MakeDmaDescriptorOp op,
+                                        OpAdaptor adaptor,
+                                        ConversionPatternRewriter &rewriter,
+                                        Location loc, Value sgpr1, Value sgpr2,
+                                        ArrayRef<Value> consts) const {
+    return setTensorDimX(op, adaptor, rewriter, loc, sgpr1, sgpr2, consts, 0,
+                         48);
+  }
+
   std::pair<Value, Value> setTensorDim1(MakeDmaDescriptorOp op,
                                         OpAdaptor adaptor,
                                         ConversionPatternRewriter &rewriter,
                                         Location loc, Value sgpr2, Value sgpr3,
                                         ArrayRef<Value> consts) const {
-    // TODO: Generalize to setTensorDimX.
-    SmallVector<OpFoldResult> mixedGlobalSizes = op.getMixedGlobalSizes();
-    OpFoldResult tensorDim1OpFoldResult = *(mixedGlobalSizes.rbegin() + 1);
-    Value tensorDim1;
-    if (auto attr = dyn_cast<Attribute>(tensorDim1OpFoldResult))
-      tensorDim1 =
-          createI32Constant(rewriter, loc, cast<IntegerAttr>(attr).getInt());
-    else
-      tensorDim1 = cast<Value>(tensorDim1OpFoldResult);
-
-    Value c16 = createI32Constant(rewriter, loc, 16);
-    Value tensorDim1High = LLVM::LShrOp::create(rewriter, loc, tensorDim1, c16);
-    sgpr2 = setValueAtOffset(rewriter, loc, sgpr2, tensorDim1, 80);
-    sgpr3 = setValueAtOffset(rewriter, loc, sgpr3, tensorDim1High, 80 + 16);
-    return {sgpr2, sgpr3};
+    return setTensorDimX(op, adaptor, rewriter, loc, sgpr2, sgpr3, consts, 1,
+                         80);
   }
 
   Value setTileDimX(MakeDmaDescriptorOp op, OpAdaptor adaptor,
                     ConversionPatternRewriter &rewriter, Location loc,
                     Value sgpr, ArrayRef<Value> consts, size_t dimX,
                     int64_t offset) const {
-    SmallVector<OpFoldResult> mixedSharedSizes = op.getMixedSharedSizes();
-
+    ArrayRef<int64_t> sharedStaticSizes = adaptor.getSharedStaticSizes();
+    ValueRange sharedDynamicSizes = adaptor.getSharedDynamicSizes();
+    SmallVector<OpFoldResult> mixedSharedSizes =
+        getMixedValues(sharedStaticSizes, sharedDynamicSizes, rewriter);
     if (mixedSharedSizes.size() <= dimX)
       return sgpr;
 
     OpFoldResult tileDimXOpFoldResult = *(mixedSharedSizes.rbegin() + dimX);
+    // pre-condition: tileDimX is less than 2^16-1
+    // TODO: Validation if the value breaks the pre-condition.
+    // If the pre-condition fails, there is a possibility of
+    // affecting the higher bits. In a following PR add a flag
+    // that instruments conditions that need to be checked at runtime.
+    // This could also be fixed by saying that mixedSharedSizes is a
+    // DynamicI16List.
     Value tileDimX;
     if (auto attr = dyn_cast<Attribute>(tileDimXOpFoldResult))
       tileDimX =
           createI32Constant(rewriter, loc, cast<IntegerAttr>(attr).getInt());
-    else
+    else {
+      IntegerType i32 = rewriter.getI32Type();
       tileDimX = cast<Value>(tileDimXOpFoldResult);
+      tileDimX = LLVM::TruncOp::create(rewriter, loc, i32, tileDimX);
+    }
 
     return setValueAtOffset(rewriter, loc, sgpr, tileDimX, offset);
   }
@@ -2584,13 +2615,20 @@ struct AMDGPUMakeDmaDescriptorLowering
                       ConversionPatternRewriter &rewriter, Location loc,
                       Value sgprY, Value sgprZ, ArrayRef<Value> consts,
                       size_t dimX, int64_t offset) const {
-    SmallVector<OpFoldResult> mixedGlobalStrides = op.getMixedGlobalStrides();
+    ArrayRef<int64_t> globalStaticStrides = adaptor.getGlobalStaticStrides();
+    ValueRange globalDynamicStrides = adaptor.getGlobalDynamicStrides();
+    SmallVector<OpFoldResult> mixedGlobalStrides =
+        getMixedValues(globalStaticStrides, globalDynamicStrides, rewriter);
 
     if (mixedGlobalStrides.size() <= dimX)
       return {sgprY, sgprZ};
 
     OpFoldResult tensorDimXStrideOpFoldResult =
         *(mixedGlobalStrides.rbegin() + dimX);
+    // pre-condition: tensorDimXStride is less than 2^48-1
+    // TODO: Validation if the value breaks the pre-condition.
+    // In a following PR add a flag that instruments conditions that need to be
+    // checked at runtime.
     Value tensorDimXStride;
     if (auto attr = dyn_cast<Attribute>(tensorDimXStrideOpFoldResult))
       tensorDimXStride =
@@ -2605,6 +2643,7 @@ struct AMDGPUMakeDmaDescriptorLowering
     IntegerType i32 = rewriter.getI32Type();
     Value tensorDimXStrideLow =
         LLVM::TruncOp::create(rewriter, loc, i32, tensorDimXStride);
+    sgprY = setValueAtOffset(rewriter, loc, sgprY, tensorDimXStrideLow, offset);
 
     int64_t shift = (offset % 32) == 0 ? 32 : offset % 32;
     Value shiftVal = createI64Constant(rewriter, loc, shift);
@@ -2612,8 +2651,6 @@ struct AMDGPUMakeDmaDescriptorLowering
         LLVM::LShrOp::create(rewriter, loc, tensorDimXStride, shiftVal);
     tensorDimXStrideHigh =
         LLVM::TruncOp::create(rewriter, loc, i32, tensorDimXStrideHigh);
-
-    sgprY = setValueAtOffset(rewriter, loc, sgprY, tensorDimXStrideLow, offset);
     sgprZ = setValueAtOffset(rewriter, loc, sgprZ, tensorDimXStrideHigh,
                              offset + shift);
     return {sgprY, sgprZ};
@@ -2680,6 +2717,221 @@ struct AMDGPUMakeDmaDescriptorLowering
     return dgroup1;
   }
 
+  Value setTensorDimX(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+                      ConversionPatternRewriter &rewriter, Location loc,
+                      Value sgpr0, ArrayRef<Value> consts, int64_t dimX,
+                      int64_t offset) const {
+    ArrayRef<int64_t> globalStaticSizes = adaptor.getGlobalStaticSizes();
+    ValueRange globalDynamicSizes = adaptor.getGlobalDynamicSizes();
+    SmallVector<OpFoldResult> mixedGlobalSizes =
+        getMixedValues(globalStaticSizes, globalDynamicSizes, rewriter);
+    if (mixedGlobalSizes.size() <= static_cast<unsigned long>(dimX))
+      return sgpr0;
+
+    OpFoldResult tensorDimXOpFoldResult = *(mixedGlobalSizes.rbegin() + dimX);
+    Value tensorDimX;
+    if (auto attr = dyn_cast<Attribute>(tensorDimXOpFoldResult))
+      tensorDimX =
+          createI32Constant(rewriter, loc, cast<IntegerAttr>(attr).getInt());
+    else {
+      IntegerType i32 = rewriter.getI32Type();
+      tensorDimX = cast<Value>(tensorDimXOpFoldResult);
+      tensorDimX = LLVM::TruncOp::create(rewriter, loc, i32, tensorDimX);
+    }
+
+    return setValueAtOffset(rewriter, loc, sgpr0, tensorDimX, offset);
+  }
+
+  Value setTensorDim2(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+                      ConversionPatternRewriter &rewriter, Location loc,
+                      Value sgpr0, ArrayRef<Value> consts) const {
+    return setTensorDimX(op, adaptor, rewriter, loc, sgpr0, consts, 2, 0);
+  }
+
+  Value truncateAndSetValueAtOffset(ConversionPatternRewriter &rewriter,
+                                    Location loc, Value accumulator,
+                                    Value value, int64_t shift) const {
+
+    IntegerType i32 = rewriter.getI32Type();
+    value = LLVM::TruncOp::create(rewriter, loc, i32, value);
+    return setValueAtOffset(rewriter, loc, accumulator, value, shift);
+  }
+
+  Value setLDSAddrIncrement(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+                            ConversionPatternRewriter &rewriter, Location loc,
+                            Value sgpr1, ArrayRef<Value> consts,
+                            int64_t offset) const {
+    Value ldsAddrIncrement = adaptor.getLdsIncrement();
+    return setValueAtOffset(rewriter, loc, sgpr1, ldsAddrIncrement, offset);
+  }
+
+  std::pair<Value, Value>
+  setGlobalAddrIncrement(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+                         ConversionPatternRewriter &rewriter, Location loc,
+                         Value sgpr2, Value sgpr3, ArrayRef<Value> consts,
+                         int64_t offset) const {
+    Value globalAddrIncrement = adaptor.getGlobalIncrement();
+    sgpr2 = truncateAndSetValueAtOffset(rewriter, loc, sgpr2,
+                                        globalAddrIncrement, offset);
+    Value shift = createI64Constant(rewriter, loc, 32);
+    globalAddrIncrement =
+        LLVM::LShrOp::create(rewriter, loc, glo...
[truncated]

Copy link
Contributor

@krzysz00 krzysz00 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor initial notes

Copy link
Contributor

@krzysz00 krzysz00 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Approved after one change

@amd-eochoalo amd-eochoalo merged commit 2f9b8b7 into llvm:main Dec 11, 2025
10 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants