[AMDGPU] Apply alignment attr for make.buffer.rsrc #166914

Shoreshen · 2025-11-07T09:56:41Z

Calculating alignment for make.buffer.rsrc intrinsic. The logic is the alignment on use of return value of make.buffer.rsrc should be capped by the base operand's alignment of make.buffer.rsrc.

For example:

define float @foo(ptr addrspace(1) align X %ptr) {
  %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 C, i32 0)
  %y = load float, ptr addrspace(7) %fat.ptr, align Y
  ret float %y
}

We hopes that Y = min(X, Y)

llvmbot · 2025-11-07T09:57:11Z

@llvm/pr-subscribers-clang
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: None (Shoreshen)

Changes

Calculating alignment for make.buffer.rsrc intrinsic. The logic is the alignment on use of return value of make.buffer.rsrc should be capped by the base operand's alignment of make.buffer.rsrc.

For example:

define float @<!-- -->foo(ptr addrspace(1) align X %ptr) {
  %fat.ptr = call ptr addrspace(7) @<!-- -->llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 C, i32 0)
  %y = load float, ptr addrspace(7) %fat.ptr, align Y
  ret float %y
}

It hopes that Y = min(X, Y)

Full diff: https://github.com/llvm/llvm-project/pull/166914.diff

4 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+4-1)
(modified) llvm/lib/Transforms/IPO/AttributorAttributes.cpp (+34-4)
(added) llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll (+26)
(modified) llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll (+2-2)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 56ab040706a13..70f2fbae08ada 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1603,7 +1603,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
        &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
        &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
        &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
-       &AAAMDGPUClusterDims::ID});
+       &AAAMDGPUClusterDims::ID, &AAAlign::ID});
 
   AttributorConfig AC(CGUpdater);
   AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1657,6 +1657,9 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
         Ptr = RMW->getPointerOperand();
       else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
         Ptr = CmpX->getPointerOperand();
+      else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
+        if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
+          A.getOrCreateAAFor<AAAlign>(IRPosition::value(*II));
 
       if (Ptr) {
         A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index a6ac7610a2c7a..37ff282343889 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -5279,6 +5279,12 @@ struct AAAlignImpl : AAAlign {
 
   /// See AbstractAttribute::initialize(...).
   void initialize(Attributor &A) override {
+    // For make.buffer.rsrc, the alignment strictly equals to the base's
+    // alignment
+    if (Instruction *I = dyn_cast<Instruction>(&getAssociatedValue()))
+      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+        if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
+          return;
     SmallVector<Attribute, 4> Attrs;
     A.getAttrs(getIRPosition(), {Attribute::Alignment}, Attrs);
     for (const Attribute &Attr : Attrs)
@@ -5300,10 +5306,19 @@ struct AAAlignImpl : AAAlign {
     if (isa<ConstantData>(AssociatedValue))
       return ChangeStatus::UNCHANGED;
 
+    // For use of amdgcn.make.buffer.rsrc, the alignment equals to
+    // min(base, load/store)
+    bool IsMakeBufferRsrc = false;
+    if (Instruction *I = dyn_cast<Instruction>(&getAssociatedValue()))
+      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+        if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
+          IsMakeBufferRsrc = true;
     for (const Use &U : AssociatedValue.uses()) {
       if (auto *SI = dyn_cast<StoreInst>(U.getUser())) {
         if (SI->getPointerOperand() == &AssociatedValue)
-          if (SI->getAlign() < getAssumedAlign()) {
+          if (IsMakeBufferRsrc) {
+            SI->setAlignment(std::min(SI->getAlign(), getAssumedAlign()));
+          } else if (SI->getAlign() < getAssumedAlign()) {
             STATS_DECLTRACK(AAAlign, Store,
                             "Number of times alignment added to a store");
             SI->setAlignment(getAssumedAlign());
@@ -5311,14 +5326,18 @@ struct AAAlignImpl : AAAlign {
           }
       } else if (auto *LI = dyn_cast<LoadInst>(U.getUser())) {
         if (LI->getPointerOperand() == &AssociatedValue)
-          if (LI->getAlign() < getAssumedAlign()) {
+          if (IsMakeBufferRsrc) {
+            LI->setAlignment(std::min(LI->getAlign(), getAssumedAlign()));
+          } else if (LI->getAlign() < getAssumedAlign()) {
             LI->setAlignment(getAssumedAlign());
             STATS_DECLTRACK(AAAlign, Load,
                             "Number of times alignment added to a load");
             InstrChanged = ChangeStatus::CHANGED;
           }
       } else if (auto *RMW = dyn_cast<AtomicRMWInst>(U.getUser())) {
-        if (RMW->getPointerOperand() == &AssociatedValue) {
+        if (IsMakeBufferRsrc) {
+          RMW->setAlignment(std::min(RMW->getAlign(), getAssumedAlign()));
+        } else if (RMW->getPointerOperand() == &AssociatedValue) {
           if (RMW->getAlign() < getAssumedAlign()) {
             STATS_DECLTRACK(AAAlign, AtomicRMW,
                             "Number of times alignment added to atomicrmw");
@@ -5328,7 +5347,9 @@ struct AAAlignImpl : AAAlign {
           }
         }
       } else if (auto *CAS = dyn_cast<AtomicCmpXchgInst>(U.getUser())) {
-        if (CAS->getPointerOperand() == &AssociatedValue) {
+        if (IsMakeBufferRsrc) {
+          CAS->setAlignment(std::min(CAS->getAlign(), getAssumedAlign()));
+        } else if (CAS->getPointerOperand() == &AssociatedValue) {
           if (CAS->getAlign() < getAssumedAlign()) {
             STATS_DECLTRACK(AAAlign, AtomicCmpXchg,
                             "Number of times alignment added to cmpxchg");
@@ -5554,6 +5575,15 @@ struct AAAlignCallSiteReturned final
               std::min(this->getAssumedAlign(), Alignment).value());
         break;
       }
+      case Intrinsic::amdgcn_make_buffer_rsrc: {
+        const auto *AlignAA =
+            A.getAAFor<AAAlign>(*this, IRPosition::value(*(II->getOperand(0))),
+                                DepClassTy::REQUIRED);
+        if (AlignAA && AlignAA->isValidState())
+          return clampStateAndIndicateChange<StateType>(
+              this->getState(), AlignAA->getAssumedAlign().value());
+        break;
+      }
       default:
         break;
       }
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
new file mode 100644
index 0000000000000..8d2bfab09460b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s
+
+define float @load_gt_base(ptr align 4 %p) {
+; CHECK-LABEL: define float @load_gt_base(
+; CHECK-SAME: ptr align 4 [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[PTR:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr align 4 [[P]], i16 0, i64 0, i32 0)
+; CHECK-NEXT:    [[LOADED:%.*]] = load float, ptr addrspace(7) [[PTR]], align 4
+; CHECK-NEXT:    ret float [[LOADED]]
+;
+  %ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr %p, i16 0, i64 0, i32 0)
+  %loaded = load float, ptr addrspace(7) %ptr, align 8
+  ret float %loaded
+}
+
+define float @load_lt_base(ptr align 8 %p) {
+; CHECK-LABEL: define float @load_lt_base(
+; CHECK-SAME: ptr align 8 [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr align 8 [[P]], i16 0, i64 0, i32 0)
+; CHECK-NEXT:    [[LOADED:%.*]] = load float, ptr addrspace(7) [[PTR]], align 4
+; CHECK-NEXT:    ret float [[LOADED]]
+;
+  %ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr %p, i16 0, i64 0, i32 0)
+  %loaded = load float, ptr addrspace(7) %ptr, align 4
+  ret float %loaded
+}
diff --git a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
index 1ab607465dbbb..34bbfa8974747 100644
--- a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
+++ b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
@@ -306,7 +306,7 @@ define amdgpu_kernel void @test_call_untouched_ptr() {
 define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer(
 ; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
-; AMDGCN-NEXT:    [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11:[0-9]+]]
+; AMDGCN-NEXT:    [[RSRC:%.*]] = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11:[0-9]+]]
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4
 ; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR7]]
 ; AMDGCN-NEXT:    ret void
@@ -321,7 +321,7 @@ define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) {
 define amdgpu_kernel void @test_make_buffer_noalias(ptr addrspace(1) noalias %ptr) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer_noalias(
 ; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
-; AMDGCN-NEXT:    [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11]]
+; AMDGCN-NEXT:    [[RSRC:%.*]] = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11]]
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4, !invariant.load [[META0]]
 ; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR7]]
 ; AMDGCN-NEXT:    ret void

shiltian

I don't think we should put the handling of AMDGPU specific code into the generic code. We can potentially create a class inheriting the existing ones in AMDGPUAttributor dedicated for the handling of AMDGPU specific stuff.

arsenm · 2025-11-07T22:24:17Z

I don't think we should put the handling of AMDGPU specific code into the generic code. We can potentially create a class inheriting the existing ones in AMDGPUAttributor dedicated for the handling of AMDGPU specific stuff.

AMDGPUAttributor isn't really the place for it either. It's not an AMDGPU specific attribute

arsenm · 2025-11-07T22:24:39Z

Title should make it clear this is about inferring it, not changing the intrinsic definition

arsenm · 2025-11-07T22:25:24Z

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

+        const auto *AlignAA =
+            A.getAAFor<AAAlign>(*this, IRPosition::value(*(II->getOperand(0))),
+                                DepClassTy::REQUIRED);
+        if (AlignAA && AlignAA->isValidState())


Does TargetTransformInfo have some kind of alignment propagation already? I thought it did

Hi @arsenm , I had a look but cannot be sure. There are lots of align related function but mainly used for legality check and cost computation.

I'm not really familiar with the struct, if you could be more specific?? So that I can have some directions to search~~

Thanks

shiltian · 2025-11-07T23:19:17Z

I don't think we should put the handling of AMDGPU specific code into the generic code. We can potentially create a class inheriting the existing ones in AMDGPUAttributor dedicated for the handling of AMDGPU specific stuff.

AMDGPUAttributor isn't really the place for it either. It's not an AMDGPU specific attribute

It is an AMDGPU specific intrinsic.

krzysz00 · 2025-11-11T17:49:54Z

I'd like to flag #167553 as related (and to make sure we're on the same page as to what we mean by align on these complex pointers)

shiltian · 2025-11-12T02:19:29Z

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

  llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
 }

+struct AAAMDGPUMakeBufferRsrcAlign


I'd make it something like AAAMDGPUAlign, and then use it to deal with all AMDGPU related alignments.

Also, document the new class.

arsenm · 2025-11-12T03:29:11Z

It is an AMDGPU specific intrinsic.

That does not make the transform AMDGPU specific. This is applying target intrinsic knowledge to a generic intrinsic. The handling should be directly in AAAlign. I would rank handling it as an AMDGPU specific Attributor significantly worse than just hardcoding the target intrinsic in AAAlign. Better would be to have TTI handle the relevant parsing

arsenm · 2025-11-12T03:31:41Z

We probably could use the TTI information from #140802 to get the alignment

krzysz00

I'm with @arsenm on "this goes in the generic align handling"

Overall, can we see a few more test cases - a load/store that is underaligned and a function argument with aligns on it, for example.

I'd also like to see (especially after my docs PR lands) a note somewhere in the code that this is a conservatively correct approach to this alignment inference.

shiltian · 2025-11-13T01:15:08Z

This shouldn't go into generic handling because the target intrinsic handling rules are highly target-dependent, and I don't see a TTI hook improving that. In this case, we expect that when a pointer in AS7 has alignment X and a load has alignment Y, it should be Y = min(X, Y), but in other cases, it could be completely different. For example, a target intrinsic might need to look at a broader or different set of uses to decide, which is very hard to express through TTI.

On the other hand, with the AAAMDGPUAlign I suggested, we can handle all alignment-related logic for AMDGPU ourselves. Some cases might not need special treatment, so we can just use AAAlign; others might need special handling but still depend on AAAlign, which is fine; and some might need other AAs. It would be very flexible.

Lastly, based on the description, this logic doesn't seem to belong in the attributor because it doesn't require iterative inference.

shiltian · 2025-11-13T02:03:42Z

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

        Ptr = CmpX->getPointerOperand();
+      else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
+        if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
+          A.getOrCreateAAFor<AAAMDGPUMakeBufferRsrcAlign>(


The creation of the AA is weird here as well. I'd expect that we want to know the alignment of a load/store instruction, and we see the pointer is a buffer pointer, then we do something. Here it looks like it is completely up side down.

krzysz00 · 2025-11-13T03:45:15Z

I'll note that the property we're trying to capture here is "buffer fat pointers have component-wise alignment guarantees". That is, per documentation I just landed, an align(4) access to a buffer fat pointer is a claim that the base pointer and the offset are align(4).

The can't-be-incorrect thing to do here is to clamp the alignment of these accesses to the inferrable alignment of the base pointer.

(You could also go the other way, assume that all the loads and stores are aligned as specified, and infer an alignment for the base pointer from that, but ... that feels like a footgun)

And re target-specific intrinsic handling ... we already put in infrastructure for handling ptrmask in the generic attributor, and part of the motivation for that work was to have a reasonable "slot" into which to put this PR. And this won't be the first or the last time a target intrinsic name shows up in the analyses, no? (See the "does this intrinsic return an aliasing pointer" switch statement, that's got ARM and AMD mentioned by name, for example)

shiltian · 2025-11-13T03:55:59Z

ptrmask is a generic LLVM intrinsic instead of target one. Its semantics is defined by LLVM LangRef, so no target should have different interpretation of that. It should be in the generic attributor. On the other hand, this intrinsic is not. We can definitely put it into the generic logic, and have all AMDGPU handling there, but that is not good in the first place. Other targets don't need these code and they don't need to run the check whether it is an AMDGPU intrinsic code at all. Also, what if in the future we need to pull AMDGPU subtarget when we handle an intrinsic?

arsenm · 2025-11-13T05:34:15Z

This shouldn't go into generic handling because the target intrinsic handling rules are highly target-dependent, and I don't see a TTI hook improving that

Not really, the range of what is possible is quite bounded. The mechanics are universal, and then the target information is just how to evaluate within those constraints.

Apply alignment attr for make.buffer.rsrc

4d3d6e4

llvmbot added backend:AMDGPU llvm:transforms labels Nov 7, 2025

Shoreshen requested review from arsenm and shiltian and removed request for arsenm November 7, 2025 09:57

Shoreshen requested review from arsenm and krzysz00 November 7, 2025 09:57

shiltian requested changes Nov 7, 2025

View reviewed changes

arsenm reviewed Nov 7, 2025

View reviewed changes

Shoreshen added 5 commits November 10, 2025 10:03

fix format error

505426b

Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2

4245ca5

Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2

07415ef

Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2

841bbe5

Addr 0 should have max alignment

7be66a0

llvmbot added the clang Clang issues not falling into any other category label Nov 10, 2025

Shoreshen added 2 commits November 11, 2025 16:10

Move to amd specific attribute

5efe07b

fix failed test

9d04645

Shoreshen requested review from arsenm and shiltian November 12, 2025 01:02

Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2

5e3479c

shiltian reviewed Nov 12, 2025

View reviewed changes

krzysz00 reviewed Nov 12, 2025

View reviewed changes

shiltian reviewed Nov 13, 2025

View reviewed changes

Shoreshen added 2 commits November 14, 2025 09:07

Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2

0da0b42

Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2

533ca2f

[AMDGPU] Apply alignment attr for make.buffer.rsrc #166914

Are you sure you want to change the base?

[AMDGPU] Apply alignment attr for make.buffer.rsrc #166914

Uh oh!

Conversation

Shoreshen commented Nov 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Nov 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

shiltian left a comment

Choose a reason for hiding this comment

Uh oh!

arsenm commented Nov 7, 2025

Uh oh!

arsenm commented Nov 7, 2025

Uh oh!

arsenm Nov 7, 2025

Choose a reason for hiding this comment

Uh oh!

Shoreshen Nov 11, 2025

Choose a reason for hiding this comment

Uh oh!

shiltian commented Nov 7, 2025

Uh oh!

krzysz00 commented Nov 11, 2025

Uh oh!

shiltian Nov 12, 2025

Choose a reason for hiding this comment

Uh oh!

shiltian Nov 12, 2025

Choose a reason for hiding this comment

Uh oh!

arsenm commented Nov 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm commented Nov 12, 2025

Uh oh!

krzysz00 left a comment

Choose a reason for hiding this comment

Uh oh!

shiltian commented Nov 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

shiltian Nov 13, 2025

Choose a reason for hiding this comment

Uh oh!

krzysz00 commented Nov 13, 2025

Uh oh!

shiltian commented Nov 13, 2025

Uh oh!

arsenm commented Nov 13, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

Shoreshen commented Nov 7, 2025 •

edited

Loading

llvmbot commented Nov 7, 2025 •

edited

Loading

arsenm commented Nov 12, 2025 •

edited

Loading

shiltian commented Nov 13, 2025 •

edited

Loading