[AMDGPU] Remove AAInstanceInfo from the AMDGPUAttributor #150232

jmmartinez · 2025-07-23T14:59:03Z

Related to compile-time issue SWDEV-543240 and functional issue
SWDEV-544256

llvmbot · 2025-07-23T14:59:43Z

@llvm/pr-subscribers-backend-amdgpu

Author: Juan Manuel Martinez Caamaño (jmmartinez)

Changes

This reverts commit 1ca9fe6.

Related to compile-time issue SWDEV-543240 and functional issue SWDEV-544256

For the unrelated tests that changed, see #150231

Patch is 34.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150232.diff

8 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+1-8)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll (+5-10)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll (+1-13)
(modified) llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll (+42-54)
(modified) llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll (+2-3)
(modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll (+14-45)
(modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+6-5)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index dedee46a44237..f5c79a9c442e2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1382,20 +1382,13 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
        &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
        &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
        &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
-       &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
-       &AAIndirectCallInfo::ID, &AAInstanceInfo::ID});
+       &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID});
 
   AttributorConfig AC(CGUpdater);
   AC.IsClosedWorldModule = Options.IsClosedWorld;
   AC.Allowed = &Allowed;
   AC.IsModulePass = true;
   AC.DefaultInitializeLiveInternals = false;
-  AC.IndirectCalleeSpecializationCallback =
-      [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
-         Function &Callee, unsigned NumAssumedCallees) {
-        return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
-               (NumAssumedCallees <= IndirectCallSpecializationThreshold);
-      };
   AC.IPOAmendableCB = [](const Function &F) {
     return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
   };
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll
index d58a62408427d..18ec3ab64298b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll
@@ -14,8 +14,7 @@ define internal fastcc void @foo(ptr %kg) {
 ; CHECK-NEXT:    [[NUM_CLOSURE_I26_I:%.*]] = getelementptr i8, ptr [[KG]], i64 276
 ; CHECK-NEXT:    br label %[[WHILE_COND:.*]]
 ; CHECK:       [[WHILE_COND]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[KG]] to ptr addrspace(5)
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[KG]], align 4
 ; CHECK-NEXT:    [[IDXPROM_I:%.*]] = zext i32 [[TMP1]] to i64
 ; CHECK-NEXT:    switch i32 0, label %[[SW_BB92:.*]] [
 ; CHECK-NEXT:      i32 1, label %[[SW_BB92]]
@@ -23,22 +22,18 @@ define internal fastcc void @foo(ptr %kg) {
 ; CHECK-NEXT:    ]
 ; CHECK:       [[SUBD_TRIANGLE_PATCH_EXIT_I_I35]]:
 ; CHECK-NEXT:    [[ARRAYIDX_I27_I:%.*]] = getelementptr float, ptr [[KG]], i64 [[IDXPROM_I]]
-; CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[ARRAYIDX_I27_I]] to ptr addrspace(5)
-; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[TMP2]], align 4
+; CHECK-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX_I27_I]], align 4
 ; CHECK-NEXT:    br label %[[WHILE_COND]]
 ; CHECK:       [[SW_BB92]]:
 ; CHECK-NEXT:    [[INSERT:%.*]] = insertelement <3 x i32> zeroinitializer, i32 [[TMP1]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = bitcast <3 x i32> [[INSERT]] to <3 x float>
 ; CHECK-NEXT:    [[SHFL:%.*]] = shufflevector <3 x float> [[SPLAT_SPLATINSERT_I]], <3 x float> zeroinitializer, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = addrspacecast ptr [[NUM_CLOSURE_I26_I]] to ptr addrspace(5)
-; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[NUM_CLOSURE_I26_I]], align 4
 ; CHECK-NEXT:    [[IDXPROM_I27_I:%.*]] = sext i32 [[LOAD]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX_I28_I:%.*]] = getelementptr [64 x %struct.ShaderClosure], ptr [[CLOSURE_I25_I]], i64 0, i64 [[IDXPROM_I27_I]]
-; CHECK-NEXT:    [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX_I28_I]] to ptr addrspace(5)
-; CHECK-NEXT:    store <4 x float> [[SHFL]], ptr addrspace(5) [[TMP4]], align 16
+; CHECK-NEXT:    store <4 x float> [[SHFL]], ptr [[ARRAYIDX_I28_I]], align 16
 ; CHECK-NEXT:    [[INC_I30_I:%.*]] = or i32 [[LOAD]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = addrspacecast ptr [[NUM_CLOSURE_I26_I]] to ptr addrspace(5)
-; CHECK-NEXT:    store i32 [[INC_I30_I]], ptr addrspace(5) [[TMP5]], align 4
+; CHECK-NEXT:    store i32 [[INC_I30_I]], ptr [[NUM_CLOSURE_I26_I]], align 4
 ; CHECK-NEXT:    br label %[[WHILE_COND]]
 ;
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index 181dab8d4ca79..66b6910ff6db5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -231,19 +231,7 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
 ; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
 ; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
-; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
-; CHECK:       2:
-; CHECK-NEXT:    call void @also_empty()
-; CHECK-NEXT:    br label [[TMP6:%.*]]
-; CHECK:       3:
-; CHECK-NEXT:    br i1 true, label [[TMP4:%.*]], label [[TMP5:%.*]]
-; CHECK:       4:
-; CHECK-NEXT:    call void @empty()
-; CHECK-NEXT:    br label [[TMP6]]
-; CHECK:       5:
-; CHECK-NEXT:    unreachable
-; CHECK:       6:
+; CHECK-NEXT:    call void [[FPTR]]()
 ; CHECK-NEXT:    ret void
 ;
   %fptr = select i1 %cond, ptr @empty, ptr @also_empty
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
index 7f450ed2b9aa4..29d54f11f6b94 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
@@ -44,13 +44,13 @@ define void @with_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: define void @with_global_to_flat_addrspacecast(
 ; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META0:![0-9]+]]
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @with_global_to_flat_addrspacecast(
 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META0:![0-9]+]]
 ; GFX10-NEXT:    ret void
 ;
   %stof = addrspacecast ptr addrspace(1) %ptr to ptr
@@ -62,13 +62,13 @@ define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(ptr addrs
 ; GFX9-LABEL: define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(
 ; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META0]]
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(
 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META0]]
 ; GFX10-NEXT:    ret void
 ;
   %stof = addrspacecast ptr addrspace(1) %ptr to ptr
@@ -110,13 +110,13 @@ define void @with_region_to_flat_addrspacecast(ptr addrspace(2) %ptr) {
 ; GFX9-LABEL: define void @with_region_to_flat_addrspacecast(
 ; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
-; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META1:![0-9]+]]
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @with_region_to_flat_addrspacecast(
 ; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
-; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META1:![0-9]+]]
 ; GFX10-NEXT:    ret void
 ;
   %stof = addrspacecast ptr addrspace(2) %ptr to ptr
@@ -128,13 +128,13 @@ define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(ptr addrs
 ; GFX9-LABEL: define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(
 ; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
-; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META1]]
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(
 ; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
-; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META1]]
 ; GFX10-NEXT:    ret void
 ;
   %stof = addrspacecast ptr addrspace(2) %ptr to ptr
@@ -176,13 +176,13 @@ define void @with_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: define void @with_group_to_flat_addrspacecast(
 ; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
-; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META2:![0-9]+]]
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @with_group_to_flat_addrspacecast(
 ; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
-; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META2:![0-9]+]]
 ; GFX10-NEXT:    ret void
 ;
   %stof = addrspacecast ptr addrspace(3) %ptr to ptr
@@ -194,13 +194,13 @@ define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel(ptr addrsp
 ; GFX9-LABEL: define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel(
 ; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
-; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META2]]
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel(
 ; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
-; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META2]]
 ; GFX10-NEXT:    ret void
 ;
   %stof = addrspacecast ptr addrspace(3) %ptr to ptr
@@ -242,13 +242,13 @@ define void @with_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) {
 ; GFX9-LABEL: define void @with_constant_to_flat_addrspacecast(
 ; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
-; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META3:![0-9]+]]
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @with_constant_to_flat_addrspacecast(
 ; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
-; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META3:![0-9]+]]
 ; GFX10-NEXT:    ret void
 ;
   %stof = addrspacecast ptr addrspace(4) %ptr to ptr
@@ -260,13 +260,13 @@ define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel(ptr add
 ; GFX9-LABEL: define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel(
 ; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
-; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META3]]
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel(
 ; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
-; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META3]]
 ; GFX10-NEXT:    ret void
 ;
   %stof = addrspacecast ptr addrspace(4) %ptr to ptr
@@ -308,13 +308,13 @@ define void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define void @with_private_to_flat_addrspacecast(
 ; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4:![0-9]+]]
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @with_private_to_flat_addrspacecast(
 ; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4:![0-9]+]]
 ; GFX10-NEXT:    ret void
 ;
   %stof = addrspacecast ptr addrspace(5) %ptr to ptr
@@ -326,13 +326,13 @@ define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(ptr addr
 ; GFX9-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(
 ; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(
 ; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
 ; GFX10-NEXT:    ret void
 ;
   %stof = addrspacecast ptr addrspace(5) %ptr to ptr
@@ -530,14 +530,14 @@ define void @with_cast_call_without_private_to_flat_addrspacecast(ptr addrspace(
 ; GFX9-LABEL: define void @with_cast_call_without_private_to_flat_addrspacecast(
 ; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
 ; GFX9-NEXT:    call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @with_cast_call_without_private_to_flat_addrspacecast(
 ; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
 ; GFX10-NEXT:    call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    ret void
 ;
@@ -551,14 +551,14 @@ define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_
 ; GFX9-LABEL: define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_cc_kernel(
 ; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
 ; GFX9-NEXT:    call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_cc_kernel(
 ; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
 ; GFX10-NEXT:    call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    ret void
 ;
@@ -572,14 +572,14 @@ define void @with_cast_call_with_private_to_flat_addrspacecast(ptr addrspace(5)
 ; GFX9-LABEL: define void @with_cast_call_with_private_to_flat_addrspacecast(
 ; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
 ; GFX9-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @with_cast_call_with_private_to_flat_addrspacecast(
 ; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
 ; GFX10-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    ret void
 ;
@@ -593,14 +593,14 @@ define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_
 ; GFX9-LABEL: define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_kernel(
 ; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
 ; GFX9-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_kernel(
 ; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
 ; GFX10-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    ret void
 ;
@@ -725,37 +725,13 @@ define amdgpu_kernel void @indirect_call_known_callees(i1 %cond) {
 ; GFX9-LABEL: define amdgpu_kernel void @indirect_call_known_callees(
 ; GFX9-SAME: i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
 ; GFX9-NEXT:    [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
-; GFX9-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
-; GFX9-NEXT:    br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
-; GFX9:       [[BB2]]:
-; GFX9-NEXT:    call void @also_empty()
-; GFX9-NEXT:    br label %[[BB6:.*]]
-; GFX9:       [[BB3]]:
-; GFX9-NEXT:    br i1 true, label %[[BB4:.*]], label %[[BB5:.*]]
-; GFX9:       [[BB4]]:
-; GFX9-NEXT:    call void @empty()
-; GFX9-NEXT:    br label %[[BB6]]
-; GFX9:       [[BB...
[truncated]

shiltian · 2025-07-23T15:00:04Z

What is the functional issue here?

I'm not sure about the revert. I'm fine with reverting it downstream.

jmmartinez · 2025-07-23T15:01:50Z

@shiltian

I'll file a proper ticket to reapply this.

I haven't checked the root cause of SWDEV-544256 maybe @changpeng has some more info.

I propose to revert this in the meantime while we find a fix for all of this.

changpeng · 2025-07-23T17:39:06Z

SWDEV-544256

SWDEV-544256 is the correctness issue with wrong output. I am not sure about the root cause yet.
If there is no known side-effect of reverting this patch, can we just revert in the downstream branch(es)?
Another option maybe we can introduce a flag to turn it off.

shiltian · 2025-07-23T17:47:52Z

There is a side effect of not using AAInstanceInfo. Other AAs will not work in the best state. However, I'm not sure if that's gonna cause performance regression (not the compile time one). I'd do a full psdb cycle.

FWIW, if AAInstanceInfo causes the compile time issue, we can always just remove it from the filter but still keep AAIndirectCallInfo.

shiltian · 2025-07-23T17:48:48Z

SWDEV-544256 is the correctness issue with wrong output.

Does the issue still exist when AAInstanceInfo is filtered out but still keep the AAIndirectCallInfo?

changpeng · 2025-07-23T18:22:53Z

SWDEV-544256 is the correctness issue with wrong output.

Does the issue still exist when AAInstanceInfo is filtered out but still keep the AAIndirectCallInfo?

NO. The issue disappeared with AAInstanceInfo filtered out

shiltian · 2025-07-23T19:08:01Z

I'm okay with removing AAInstanceInfo for now, but still keeping the AAIndirectCallInfo. It will make the AAs less powerful. Meanwhile, can you also create a ticket (internally) and assign to me? @jmmartinez @changpeng

jmmartinez · 2025-07-24T07:02:18Z

Sounds good to me. I'll update the patch and create the ticket.

Related to compile-time issue SWDEV-543240 and functional issue SWDEV-544256

jmmartinez · 2025-07-24T07:39:24Z

Updated !

shiltian

As the test changes show, it actually makes things worse.

llvm-ci · 2025-07-25T06:36:01Z

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-expensive-checks-debian running on gribozavr4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/23186

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: TableGen/RuntimeLibcallEmitter.td' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-tblgen -gen-runtime-libcalls -I /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/TableGen/../../include /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/TableGen/RuntimeLibcallEmitter.td | /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/TableGen/RuntimeLibcallEmitter.td # RUN: at line 1
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-tblgen -gen-runtime-libcalls -I /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/TableGen/../../include /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/TableGen/RuntimeLibcallEmitter.td
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/TableGen/RuntimeLibcallEmitter.td
/b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/TableGen/RuntimeLibcallEmitter.td:98:16: error: CHECK-NEXT: expected string not found in input
// CHECK-NEXT: sqrtl_f80 = 7, // sqrtl
               ^
<stdin>:32:23: note: scanning from here
 calloc = 6, // calloc
                      ^
<stdin>:34:2: note: possible intended match here
 sqrtl_f80 = 8, // sqrtl
 ^

Input file: <stdin>
Check file: /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/TableGen/RuntimeLibcallEmitter.td

-dump-input=help explains the following input dump.

Input was:
<<<<<<
           .
           .
           .
          27:  ___memcpy = 1, // ___memcpy 
          28:  ___memset = 2, // ___memset 
          29:  __ashlsi3 = 3, // __ashlsi3 
          30:  __lshrdi3 = 4, // __lshrdi3 
          31:  bzero = 5, // bzero 
          32:  calloc = 6, // calloc 
next:98'0                           X error: no match found
          33:  sqrtl_f128 = 7, // sqrtl 
next:98'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~
          34:  sqrtl_f80 = 8, // sqrtl 
next:98'0     ~~~~~~~~~~~~~~~~~~~~~~~~~
next:98'1      ?                        possible intended match
          35:  NumLibcallImpls = 9 
next:98'0     ~~~~~~~~~~~~~~~~~~~~~
          36: }; 
next:98'0     ~~~
          37: } // End namespace RTLIB 
next:98'0     ~~~~~~~~~~~~~~~~~~~~~~~~~
          38: } // End namespace llvm 
next:98'0     ~~~~~~~~~~~~~~~~~~~~~~~~
          39: #endif 
next:98'0     ~~~~~~~
...

Related to compile-time issue SWDEV-543240 and functional issue SWDEV-544256

llvm#3289)

jmmartinez requested review from shiltian and kzhuravl July 23, 2025 14:59

jmmartinez self-assigned this Jul 23, 2025

llvmbot added the backend:AMDGPU label Jul 23, 2025

jmmartinez requested a review from changpeng July 23, 2025 15:01

jmmartinez force-pushed the revert/aaindirect-call-info branch from fb2299c to 5616fa8 Compare July 23, 2025 15:38

[AMDGPU] Remove AAInstanceInfo from the AMDGPUAttributor

0ea8fd9

Related to compile-time issue SWDEV-543240 and functional issue SWDEV-544256

jmmartinez force-pushed the revert/aaindirect-call-info branch from 5616fa8 to 0ea8fd9 Compare July 24, 2025 07:38

jmmartinez changed the title ~~Revert "Reapply "[Attributor][AMDGPU] Enable AAIndirectCallInfo for AMDAttributor (#100952)""~~ [AMDGPU] Remove AAInstanceInfo from the AMDGPUAttributor Jul 24, 2025

shiltian approved these changes Jul 24, 2025

View reviewed changes

jmmartinez merged commit 862b9ea into llvm:main Jul 24, 2025
9 checks passed

mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Jul 28, 2025

[AMDGPU] Remove AAInstanceInfo from the AMDGPUAttributor (llvm#150232)

55b52a9

Related to compile-time issue SWDEV-543240 and functional issue SWDEV-544256

rahulc-gh pushed a commit to ROCm/llvm-project that referenced this pull request Aug 1, 2025

[AMDGPU] Remove AAInstanceInfo from the AMDGPUAttributor (llvm#150232)

c66b0a4

Related to compile-time issue SWDEV-543240 and functional issue SWDEV-544256

rahulc-gh pushed a commit to ROCm/llvm-project that referenced this pull request Aug 1, 2025

[AMDGPU] Remove AAInstanceInfo from the AMDGPUAttributor (llvm#150232) (

82aed4e

llvm#3289)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Remove AAInstanceInfo from the AMDGPUAttributor #150232

[AMDGPU] Remove AAInstanceInfo from the AMDGPUAttributor #150232

Uh oh!

jmmartinez commented Jul 23, 2025 •

edited

Loading

Uh oh!

llvmbot commented Jul 23, 2025

Uh oh!

shiltian commented Jul 23, 2025 •

edited

Loading

Uh oh!

jmmartinez commented Jul 23, 2025

Uh oh!

changpeng commented Jul 23, 2025

Uh oh!

shiltian commented Jul 23, 2025

Uh oh!

shiltian commented Jul 23, 2025 •

edited

Loading

Uh oh!

changpeng commented Jul 23, 2025

Uh oh!

shiltian commented Jul 23, 2025

Uh oh!

jmmartinez commented Jul 24, 2025

Uh oh!

jmmartinez commented Jul 24, 2025

Uh oh!

shiltian left a comment

Uh oh!

Uh oh!

llvm-ci commented Jul 25, 2025

Uh oh!

Uh oh!

[AMDGPU] Remove AAInstanceInfo from the AMDGPUAttributor #150232

[AMDGPU] Remove AAInstanceInfo from the AMDGPUAttributor #150232

Uh oh!

Conversation

jmmartinez commented Jul 23, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 23, 2025

Uh oh!

shiltian commented Jul 23, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jmmartinez commented Jul 23, 2025

Uh oh!

changpeng commented Jul 23, 2025

Uh oh!

shiltian commented Jul 23, 2025

Uh oh!

shiltian commented Jul 23, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

changpeng commented Jul 23, 2025

Uh oh!

shiltian commented Jul 23, 2025

Uh oh!

jmmartinez commented Jul 24, 2025

Uh oh!

jmmartinez commented Jul 24, 2025

Uh oh!

shiltian left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Jul 25, 2025

Uh oh!

Uh oh!

jmmartinez commented Jul 23, 2025 •

edited

Loading

shiltian commented Jul 23, 2025 •

edited

Loading

shiltian commented Jul 23, 2025 •

edited

Loading