-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[AMDGPU] Remove AAInstanceInfo from the AMDGPUAttributor #150232
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Juan Manuel Martinez Caamaño (jmmartinez) ChangesThis reverts commit 1ca9fe6. Related to compile-time issue SWDEV-543240 and functional issue SWDEV-544256 For the unrelated tests that changed, see #150231 Patch is 34.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150232.diff 8 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index dedee46a44237..f5c79a9c442e2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1382,20 +1382,13 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
- &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
- &AAIndirectCallInfo::ID, &AAInstanceInfo::ID});
+ &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID});
AttributorConfig AC(CGUpdater);
AC.IsClosedWorldModule = Options.IsClosedWorld;
AC.Allowed = &Allowed;
AC.IsModulePass = true;
AC.DefaultInitializeLiveInternals = false;
- AC.IndirectCalleeSpecializationCallback =
- [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
- Function &Callee, unsigned NumAssumedCallees) {
- return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
- (NumAssumedCallees <= IndirectCallSpecializationThreshold);
- };
AC.IPOAmendableCB = [](const Function &F) {
return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
};
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll
index d58a62408427d..18ec3ab64298b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll
@@ -14,8 +14,7 @@ define internal fastcc void @foo(ptr %kg) {
; CHECK-NEXT: [[NUM_CLOSURE_I26_I:%.*]] = getelementptr i8, ptr [[KG]], i64 276
; CHECK-NEXT: br label %[[WHILE_COND:.*]]
; CHECK: [[WHILE_COND]]:
-; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[KG]] to ptr addrspace(5)
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[KG]], align 4
; CHECK-NEXT: [[IDXPROM_I:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: switch i32 0, label %[[SW_BB92:.*]] [
; CHECK-NEXT: i32 1, label %[[SW_BB92]]
@@ -23,22 +22,18 @@ define internal fastcc void @foo(ptr %kg) {
; CHECK-NEXT: ]
; CHECK: [[SUBD_TRIANGLE_PATCH_EXIT_I_I35]]:
; CHECK-NEXT: [[ARRAYIDX_I27_I:%.*]] = getelementptr float, ptr [[KG]], i64 [[IDXPROM_I]]
-; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[ARRAYIDX_I27_I]] to ptr addrspace(5)
-; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[TMP2]], align 4
+; CHECK-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX_I27_I]], align 4
; CHECK-NEXT: br label %[[WHILE_COND]]
; CHECK: [[SW_BB92]]:
; CHECK-NEXT: [[INSERT:%.*]] = insertelement <3 x i32> zeroinitializer, i32 [[TMP1]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = bitcast <3 x i32> [[INSERT]] to <3 x float>
; CHECK-NEXT: [[SHFL:%.*]] = shufflevector <3 x float> [[SPLAT_SPLATINSERT_I]], <3 x float> zeroinitializer, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[NUM_CLOSURE_I26_I]] to ptr addrspace(5)
-; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[NUM_CLOSURE_I26_I]], align 4
; CHECK-NEXT: [[IDXPROM_I27_I:%.*]] = sext i32 [[LOAD]] to i64
; CHECK-NEXT: [[ARRAYIDX_I28_I:%.*]] = getelementptr [64 x %struct.ShaderClosure], ptr [[CLOSURE_I25_I]], i64 0, i64 [[IDXPROM_I27_I]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX_I28_I]] to ptr addrspace(5)
-; CHECK-NEXT: store <4 x float> [[SHFL]], ptr addrspace(5) [[TMP4]], align 16
+; CHECK-NEXT: store <4 x float> [[SHFL]], ptr [[ARRAYIDX_I28_I]], align 16
; CHECK-NEXT: [[INC_I30_I:%.*]] = or i32 [[LOAD]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[NUM_CLOSURE_I26_I]] to ptr addrspace(5)
-; CHECK-NEXT: store i32 [[INC_I30_I]], ptr addrspace(5) [[TMP5]], align 4
+; CHECK-NEXT: store i32 [[INC_I30_I]], ptr [[NUM_CLOSURE_I26_I]], align 4
; CHECK-NEXT: br label %[[WHILE_COND]]
;
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index 181dab8d4ca79..66b6910ff6db5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -231,19 +231,7 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
-; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
-; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
-; CHECK: 2:
-; CHECK-NEXT: call void @also_empty()
-; CHECK-NEXT: br label [[TMP6:%.*]]
-; CHECK: 3:
-; CHECK-NEXT: br i1 true, label [[TMP4:%.*]], label [[TMP5:%.*]]
-; CHECK: 4:
-; CHECK-NEXT: call void @empty()
-; CHECK-NEXT: br label [[TMP6]]
-; CHECK: 5:
-; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK-NEXT: call void [[FPTR]]()
; CHECK-NEXT: ret void
;
%fptr = select i1 %cond, ptr @empty, ptr @also_empty
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
index 7f450ed2b9aa4..29d54f11f6b94 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
@@ -44,13 +44,13 @@ define void @with_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) {
; GFX9-LABEL: define void @with_global_to_flat_addrspacecast(
; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META0:![0-9]+]]
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @with_global_to_flat_addrspacecast(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META0:![0-9]+]]
; GFX10-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(1) %ptr to ptr
@@ -62,13 +62,13 @@ define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(ptr addrs
; GFX9-LABEL: define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(
; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META0]]
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(
; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META0]]
; GFX10-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(1) %ptr to ptr
@@ -110,13 +110,13 @@ define void @with_region_to_flat_addrspacecast(ptr addrspace(2) %ptr) {
; GFX9-LABEL: define void @with_region_to_flat_addrspacecast(
; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
-; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META1:![0-9]+]]
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @with_region_to_flat_addrspacecast(
; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
-; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META1:![0-9]+]]
; GFX10-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(2) %ptr to ptr
@@ -128,13 +128,13 @@ define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(ptr addrs
; GFX9-LABEL: define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(
; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
-; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META1]]
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(
; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
-; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META1]]
; GFX10-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(2) %ptr to ptr
@@ -176,13 +176,13 @@ define void @with_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) {
; GFX9-LABEL: define void @with_group_to_flat_addrspacecast(
; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
-; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META2:![0-9]+]]
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @with_group_to_flat_addrspacecast(
; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
-; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META2:![0-9]+]]
; GFX10-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(3) %ptr to ptr
@@ -194,13 +194,13 @@ define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel(ptr addrsp
; GFX9-LABEL: define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel(
; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
-; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META2]]
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel(
; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
-; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META2]]
; GFX10-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(3) %ptr to ptr
@@ -242,13 +242,13 @@ define void @with_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) {
; GFX9-LABEL: define void @with_constant_to_flat_addrspacecast(
; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
-; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META3:![0-9]+]]
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @with_constant_to_flat_addrspacecast(
; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
-; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META3:![0-9]+]]
; GFX10-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(4) %ptr to ptr
@@ -260,13 +260,13 @@ define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel(ptr add
; GFX9-LABEL: define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel(
; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
-; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META3]]
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel(
; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
-; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META3]]
; GFX10-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(4) %ptr to ptr
@@ -308,13 +308,13 @@ define void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
; GFX9-LABEL: define void @with_private_to_flat_addrspacecast(
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4:![0-9]+]]
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @with_private_to_flat_addrspacecast(
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4:![0-9]+]]
; GFX10-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
@@ -326,13 +326,13 @@ define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(ptr addr
; GFX9-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
; GFX10-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
@@ -530,14 +530,14 @@ define void @with_cast_call_without_private_to_flat_addrspacecast(ptr addrspace(
; GFX9-LABEL: define void @with_cast_call_without_private_to_flat_addrspacecast(
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
; GFX9-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @with_cast_call_without_private_to_flat_addrspacecast(
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
; GFX10-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX10-NEXT: ret void
;
@@ -551,14 +551,14 @@ define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_
; GFX9-LABEL: define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_cc_kernel(
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
; GFX9-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_cc_kernel(
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
; GFX10-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX10-NEXT: ret void
;
@@ -572,14 +572,14 @@ define void @with_cast_call_with_private_to_flat_addrspacecast(ptr addrspace(5)
; GFX9-LABEL: define void @with_cast_call_with_private_to_flat_addrspacecast(
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @with_cast_call_with_private_to_flat_addrspacecast(
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX10-NEXT: ret void
;
@@ -593,14 +593,14 @@ define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_
; GFX9-LABEL: define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_kernel(
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_kernel(
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META4]]
; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX10-NEXT: ret void
;
@@ -725,37 +725,13 @@ define amdgpu_kernel void @indirect_call_known_callees(i1 %cond) {
; GFX9-LABEL: define amdgpu_kernel void @indirect_call_known_callees(
; GFX9-SAME: i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
; GFX9-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
-; GFX9-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
-; GFX9-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
-; GFX9: [[BB2]]:
-; GFX9-NEXT: call void @also_empty()
-; GFX9-NEXT: br label %[[BB6:.*]]
-; GFX9: [[BB3]]:
-; GFX9-NEXT: br i1 true, label %[[BB4:.*]], label %[[BB5:.*]]
-; GFX9: [[BB4]]:
-; GFX9-NEXT: call void @empty()
-; GFX9-NEXT: br label %[[BB6]]
-; GFX9: [[BB...
[truncated]
|
What is the functional issue here? I'm not sure about the revert. I'm fine with reverting it downstream. |
I'll file a proper ticket to reapply this. I haven't checked the root cause of SWDEV-544256 maybe @changpeng has some more info. I propose to revert this in the meantime while we find a fix for all of this. |
fb2299c
to
5616fa8
Compare
SWDEV-544256 is the correctness issue with wrong output. I am not sure about the root cause yet. |
There is a side effect of not using FWIW, if |
Does the issue still exist when |
NO. The issue disappeared with |
I'm okay with removing |
Sounds good to me. I'll update the patch and create the ticket. |
Related to compile-time issue SWDEV-543240 and functional issue SWDEV-544256
5616fa8
to
0ea8fd9
Compare
Updated ! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As the test changes show, it actually makes things worse.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/23186 Here is the relevant piece of the build log for the reference
|
Related to compile-time issue SWDEV-543240 and functional issue SWDEV-544256
Related to compile-time issue SWDEV-543240 and functional issue SWDEV-544256
Related to compile-time issue SWDEV-543240 and functional issue
SWDEV-544256