diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 82ae5b7edff577..9e4b65709659e3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -347,7 +347,6 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { } bool NeedsQueuePtr = false; - bool HasCall = false; for (Function *Callee : AAEdges.getOptimisticEdges()) { Intrinsic::ID IID = Callee->getIntrinsicID(); if (IID != Intrinsic::not_intrinsic) { @@ -361,7 +360,6 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { continue; } - HasCall = true; const AAAMDAttributes &AAAMD = A.getAAFor( *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); const DenseSet &CalleeAttributes = AAAMD.getAttributes(); @@ -371,20 +369,6 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { AddAttribute(AttrName); } - HasCall |= AAEdges.hasUnknownCallee(); - if (!IsNonEntryFunc && HasCall) - AddAttribute("amdgpu-calls"); - - // Check the function body. - auto CheckAlloca = [&](Instruction &I) { - AddAttribute("amdgpu-stack-objects"); - return false; - }; - - bool UsedAssumedInformation = false; - A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca}, - UsedAssumedInformation); - // If we found that we need amdgpu-queue-ptr, nothing else to do. if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) { AddAttribute("amdgpu-queue-ptr"); @@ -406,10 +390,12 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { // instructions, try it first. // amdgpu-queue-ptr is not needed if aperture regs is present. - if (!HasApertureRegs) + if (!HasApertureRegs) { + bool UsedAssumedInformation = false; A.checkForAllInstructions(CheckAddrSpaceCasts, *this, {Instruction::AddrSpaceCast}, UsedAssumedInformation); + } // If we found that we need amdgpu-queue-ptr, nothing else to do. if (NeedsQueuePtr) { diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index d92a0085a0b967..3ebe08381259a7 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -201,7 +201,7 @@ define void @func_indirect_use_workitem_id_x() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_x ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_x() #[[ATTR10:[0-9]+]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_x() #[[ATTR9:[0-9]+]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workitem_id_x() @@ -216,7 +216,7 @@ define void @kernel_indirect_use_workitem_id_x() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workitem_id_x ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_x() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_x() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workitem_id_x() @@ -231,7 +231,7 @@ define void @func_indirect_use_workitem_id_y() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_y ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_y() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_y() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workitem_id_y() @@ -246,7 +246,7 @@ define void @func_indirect_use_workitem_id_z() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_z ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_z() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_z() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workitem_id_z() @@ -261,7 +261,7 @@ define void @func_indirect_use_workgroup_id_x() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_x ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_x() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_x() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workgroup_id_x() @@ -276,7 +276,7 @@ define void @kernel_indirect_use_workgroup_id_x() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workgroup_id_x ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_x() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_x() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workgroup_id_x() @@ -291,7 +291,7 @@ define void @func_indirect_use_workgroup_id_y() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_y() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_y() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workgroup_id_y() @@ -306,7 +306,7 @@ define void @func_indirect_use_workgroup_id_z() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_z ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_z() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_z() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workgroup_id_z() @@ -321,7 +321,7 @@ define void @func_indirect_indirect_use_workgroup_id_y() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_indirect_use_workgroup_id_y ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_workgroup_id_y() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_workgroup_id_y() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @func_indirect_use_workgroup_id_y() @@ -336,7 +336,7 @@ define void @indirect_x2_use_workgroup_id_y() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_x2_use_workgroup_id_y ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_indirect_use_workgroup_id_y() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_indirect_use_workgroup_id_y() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @func_indirect_indirect_use_workgroup_id_y() @@ -351,7 +351,7 @@ define void @func_indirect_use_dispatch_ptr() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_dispatch_ptr() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_dispatch_ptr() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_dispatch_ptr() @@ -366,7 +366,7 @@ define void @func_indirect_use_queue_ptr() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_queue_ptr ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_queue_ptr() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_queue_ptr() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_queue_ptr() @@ -381,7 +381,7 @@ define void @func_indirect_use_dispatch_id() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_id ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_dispatch_id() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_dispatch_id() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_dispatch_id() @@ -413,7 +413,7 @@ define void @recursive_use_workitem_id_y() #1 { ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@recursive_use_workitem_id_y ; ATTRIBUTOR_HSA-SAME: () #[[ATTR3:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: store volatile i32 undef, i32 addrspace(1)* undef, align 4 -; ATTRIBUTOR_HSA-NEXT: call void @recursive_use_workitem_id_y() #[[ATTR11:[0-9]+]] +; ATTRIBUTOR_HSA-NEXT: call void @recursive_use_workitem_id_y() #[[ATTR10:[0-9]+]] ; ATTRIBUTOR_HSA-NEXT: unreachable ; %val = call i32 @llvm.amdgcn.workitem.id.y() @@ -430,7 +430,7 @@ define void @call_recursive_use_workitem_id_y() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@call_recursive_use_workitem_id_y ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @recursive_use_workitem_id_y() #[[ATTR11]] +; ATTRIBUTOR_HSA-NEXT: call void @recursive_use_workitem_id_y() #[[ATTR10]] ; ATTRIBUTOR_HSA-NEXT: unreachable ; call void @recursive_use_workitem_id_y() @@ -486,7 +486,7 @@ define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %p ; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR5]] { ; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 -; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_queue_ptr() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_queue_ptr() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* @@ -503,7 +503,7 @@ define void @indirect_use_group_to_flat_addrspacecast() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast ; ATTRIBUTOR_HSA-SAME: () #[[ATTR4]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null) #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null) #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null) @@ -518,7 +518,7 @@ define void @indirect_use_group_to_flat_addrspacecast_gfx9() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_gfx9 ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null) #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null) #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null) @@ -533,7 +533,7 @@ define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9 ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null) #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null) #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null) @@ -564,7 +564,7 @@ define void @func_indirect_use_kernarg_segment_ptr() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_kernarg_segment_ptr ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_kernarg_segment_ptr() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_kernarg_segment_ptr() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_kernarg_segment_ptr() @@ -613,7 +613,7 @@ define void @func_indirect_use_implicitarg_ptr() #1 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_implicitarg_ptr() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @use_implicitarg_ptr() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_implicitarg_ptr() @@ -639,7 +639,7 @@ define void @func_call_external() #3 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_external ; ATTRIBUTOR_HSA-SAME: () #[[ATTR6:[0-9]+]] { -; ATTRIBUTOR_HSA-NEXT: call void @external.func() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void @external.func() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @external.func() @@ -667,7 +667,7 @@ define void @func_call_asm() #3 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm ; ATTRIBUTOR_HSA-SAME: () #[[ATTR7]] { -; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void asm sideeffect "", ""() #3 @@ -681,8 +681,8 @@ define amdgpu_kernel void @kern_call_external() #3 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_external -; ATTRIBUTOR_HSA-SAME: () #[[ATTR8:[0-9]+]] { -; ATTRIBUTOR_HSA-NEXT: call void @external.func() #[[ATTR10]] +; ATTRIBUTOR_HSA-SAME: () #[[ATTR6]] { +; ATTRIBUTOR_HSA-NEXT: call void @external.func() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @external.func() @@ -711,7 +711,7 @@ define i32 @use_dispatch_ptr_ret_type() #1 { ; AKF_HSA-NEXT: ret i32 0 ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type -; ATTRIBUTOR_HSA-SAME: () #[[ATTR9:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR8:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: store volatile i8 addrspace(4)* undef, i8 addrspace(4)* addrspace(1)* undef, align 8 ; ATTRIBUTOR_HSA-NEXT: ret i32 0 ; @@ -728,7 +728,7 @@ define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 { ; AKF_HSA-NEXT: ret float [[FADD]] ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func -; ATTRIBUTOR_HSA-SAME: () #[[ATTR9]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] { ; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)() ; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] @@ -766,7 +766,7 @@ define float @func_extern_call() #3 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_extern_call ; ATTRIBUTOR_HSA-SAME: () #[[ATTR6]] { -; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float @extern() #[[ATTR10]] +; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float @extern() #[[ATTR9]] ; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] ; @@ -850,8 +850,7 @@ attributes #3 = { nounwind } ; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind } -; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { noreturn nounwind } +; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind } +; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { noreturn nounwind } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll index b8cb070a0c1591..c9e115cf7f90e1 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -448,8 +448,9 @@ define amdgpu_kernel void @use_alloca() #1 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca -; ATTRIBUTOR_HSA-SAME: () #[[ATTR12:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { ; ATTRIBUTOR_HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; ATTRIBUTOR_HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; %alloca = alloca i32, addrspace(5) @@ -468,11 +469,12 @@ define amdgpu_kernel void @use_alloca_non_entry_block() #1 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block -; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { ; ATTRIBUTOR_HSA-NEXT: entry: ; ATTRIBUTOR_HSA-NEXT: br label [[BB:%.*]] ; ATTRIBUTOR_HSA: bb: ; ATTRIBUTOR_HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; ATTRIBUTOR_HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; entry: @@ -492,8 +494,9 @@ define void @use_alloca_func() #1 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_func -; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { ; ATTRIBUTOR_HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; ATTRIBUTOR_HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; %alloca = alloca i32, addrspace(5) @@ -531,5 +534,4 @@ attributes #1 = { nounwind } ; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-stack-objects" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll index 5ea0579c984ab8..94b5f80f053c55 100644 --- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll @@ -20,7 +20,7 @@ define internal void @direct() { ; AKF_GCN-NEXT: ret void ; ; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@direct -; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_GCN-SAME: () #[[ATTR0]] { ; ATTRIBUTOR_GCN-NEXT: call void @indirect() ; ATTRIBUTOR_GCN-NEXT: ret void ; @@ -32,10 +32,15 @@ define internal void @direct() { } define amdgpu_kernel void @test_direct_indirect_call() { -; GCN-LABEL: define {{[^@]+}}@test_direct_indirect_call -; GCN-SAME: () #[[ATTR2:[0-9]+]] { -; GCN-NEXT: call void @direct() -; GCN-NEXT: ret void +; AKF_GCN-LABEL: define {{[^@]+}}@test_direct_indirect_call +; AKF_GCN-SAME: () #[[ATTR2:[0-9]+]] { +; AKF_GCN-NEXT: call void @direct() +; AKF_GCN-NEXT: ret void +; +; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_direct_indirect_call +; ATTRIBUTOR_GCN-SAME: () #[[ATTR0]] { +; ATTRIBUTOR_GCN-NEXT: call void @direct() +; ATTRIBUTOR_GCN-NEXT: ret void ; call void @direct() ret void @@ -46,6 +51,4 @@ define amdgpu_kernel void @test_direct_indirect_call() { ; AKF_GCN: attributes #[[ATTR2]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR2]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll index db37347414bcdf..2f932b90ccdd31 100644 --- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll +++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll @@ -23,8 +23,11 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 { ; AKF_GCN-NEXT: ret void ; ; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call -; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { -; ATTRIBUTOR_GCN-NEXT: call void @indirect() +; ATTRIBUTOR_GCN-SAME: () #[[ATTR0]] { +; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 +; ATTRIBUTOR_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 +; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8 +; ATTRIBUTOR_GCN-NEXT: call void [[FP]]() ; ATTRIBUTOR_GCN-NEXT: ret void ; ; CHECK-LABEL: define {{[^@]+}}@test_simple_indirect_call @@ -48,5 +51,4 @@ attributes #0 = { "amdgpu-dispatch-id" } ; AKF_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } ;. ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll index 55264563d5a63b..756c48f69ac881 100644 --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -53,8 +53,12 @@ define amdgpu_kernel void @test_simple_indirect_call() { ; AKF_GCN-NEXT: ret void ; ; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call -; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { -; ATTRIBUTOR_GCN-NEXT: call void @indirect() +; ATTRIBUTOR_GCN-SAME: () #[[ATTR0]] { +; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8, addrspace(5) +; ATTRIBUTOR_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast void ()* addrspace(5)* [[FPTR]] to void ()** +; ATTRIBUTOR_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR_CAST]], align 8 +; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR_CAST]], align 8 +; ATTRIBUTOR_GCN-NEXT: call void [[FP]]() ; ATTRIBUTOR_GCN-NEXT: ret void ; %fptr = alloca void()*, addrspace(5) @@ -70,5 +74,4 @@ define amdgpu_kernel void @test_simple_indirect_call() { ; AKF_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } ;. ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll index 84a96d9c8a5ac1..6bef52f5d726cf 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll @@ -41,6 +41,6 @@ attributes #0 = { "uniform-work-group-size"="true" } ; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind writeonly } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll index fe7d1d942e62b3..a46b749526cefb 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll @@ -148,7 +148,7 @@ attributes #0 = { "uniform-work-group-size"="true" } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nofree nosync nounwind "uniform-work-group-size"="true" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn writeonly "uniform-work-group-size"="true" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll index 24121882d688c7..e9991dc121c6be 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll @@ -63,6 +63,6 @@ attributes #2 = { "uniform-work-group-size"="true" } ; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="true" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind writeonly } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll index 23e2759ff2f6d9..d22f1e2a977b6a 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll @@ -59,7 +59,7 @@ attributes #1 = { "uniform-work-group-size"="true" } ; AKF_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind writeonly } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll index 9dc89708aad7c2..04ee89da28d201 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll @@ -73,9 +73,9 @@ attributes #2 = { "uniform-work-group-size"="true" } ; AKF_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { "uniform-work-group-size"="true" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind writeonly } ; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll index 2397c2b0e3f81b..c6e276c9d11b4d 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll @@ -150,7 +150,7 @@ attributes #1 = { "uniform-work-group-size"="true" } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind readnone "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readnone "uniform-work-group-size"="true" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind readnone } ; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nofree nounwind readnone } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll index 3f99e234b7205b..9aee29be8dc200 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll @@ -83,6 +83,6 @@ attributes #0 = { "uniform-work-group-size"="false" } ; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind writeonly } ;.