diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index ee8852903eda52..aec170ae557066 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -9520,15 +9520,6 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } - - if (IsHIPKernel) { - // FIXME: This is a dirty, dirty hack to fix bot failures at -O0 and should - // be removed. The HIP runtime currently fails to handle the case where one - // of these fields fails to optimize out. The runtime should tolerate all - // requested implicit inputs regardless of language. - F->addFnAttr("amdgpu-no-default-queue"); - F->addFnAttr("amdgpu-no-completion-action"); - } } void AMDGPUTargetCodeGenInfo::setTargetAttributes( diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index b4f4a620195676..37e0b988e79965 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -41,7 +41,8 @@ __global__ void kernel() { } //. // OPTNONE: attributes #0 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// OPTNONE: attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +// OPTNONE: attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } //. // OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +//. diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index e26d4be3c109d7..7e5c0c0be14e17 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -982,15 +982,6 @@ The AMDGPU backend supports the following LLVM IR attributes. "amdgpu-no-multigrid-sync-arg" Similar to amdgpu-no-implicitarg-ptr, except specific to the implicit kernel argument that holds the multigrid synchronization pointer. If this attribute is absent, then the amdgpu-no-implicitarg-ptr is also removed. - - "amdgpu-no-default-queue" Similar to amdgpu-no-implicitarg-ptr, except specific to the implicit - kernel argument that holds the default queue pointer. If this - attribute is absent, then the amdgpu-no-implicitarg-ptr is also removed. - - "amdgpu-no-completion-action" Similar to amdgpu-no-implicitarg-ptr, except specific to the implicit - kernel argument that holds the completion action pointer. If this - attribute is absent, then the amdgpu-no-implicitarg-ptr is also removed. - ======================================= ========================================================== .. _amdgpu-elf-code-object: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def index bacc8e4e821e5d..c7a060c5db5b71 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def @@ -28,7 +28,5 @@ AMDGPU_ATTRIBUTE(WORKITEM_ID_X, "amdgpu-no-workitem-id-x") AMDGPU_ATTRIBUTE(WORKITEM_ID_Y, "amdgpu-no-workitem-id-y") AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z") AMDGPU_ATTRIBUTE(LDS_KERNEL_ID, "amdgpu-no-lds-kernel-id") -AMDGPU_ATTRIBUTE(DEFAULT_QUEUE, "amdgpu-no-default-queue") -AMDGPU_ATTRIBUTE(COMPLETION_ACTION, "amdgpu-no-completion-action") #undef AMDGPU_ATTRIBUTE diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index f7298b59f0b909..6feceb0b42e617 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -223,10 +223,9 @@ class AMDGPUInformationCache : public InformationCache { DenseMap ConstantStatus; }; -struct AAAMDAttributes - : public StateWrapper, - AbstractAttribute> { - using Base = StateWrapper, +struct AAAMDAttributes : public StateWrapper< + BitIntegerState, AbstractAttribute> { + using Base = StateWrapper, AbstractAttribute>; AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} @@ -469,12 +468,6 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { removeAssumedBits(LDS_KERNEL_ID); } - if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A)) - removeAssumedBits(DEFAULT_QUEUE); - - if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A)) - removeAssumedBits(COMPLETION_ACTION); - return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; } @@ -569,18 +562,6 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { return funcRetrievesImplicitKernelArg(A, Range); } - bool funcRetrievesDefaultQueue(Attributor &A) { - auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition(); - AA::RangeTy Range(Pos, 8); - return funcRetrievesImplicitKernelArg(A, Range); - } - - bool funcRetrievesCompletionAction(Attributor &A) { - auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition(); - AA::RangeTy Range(Pos, 8); - return funcRetrievesImplicitKernelArg(A, Range); - } - bool funcRetrievesHeapPtr(Attributor &A) { if (AMDGPU::getAmdhsaCodeObjectVersion() != 5) return false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 9836de43467c4c..bd594342786cf6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -409,19 +409,13 @@ void MetadataStreamerYamlV2::emitHiddenKernelArgs(const Function &Func, // Emit "default queue" and "completion action" arguments if enqueue kernel is // used, otherwise emit dummy "none" arguments. - if (HiddenArgNumBytes >= 40) { - if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { - emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue); - } else { - emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); - } - } - if (HiddenArgNumBytes >= 48) { - if (!Func.hasFnAttribute("amdgpu-no-completion-action")) { + if (Func.hasFnAttribute("calls-enqueue-kernel")) { + emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue); emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction); } else { emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); + emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); } } @@ -842,21 +836,15 @@ void MetadataStreamerMsgPackV3::emitHiddenKernelArgs( // Emit "default queue" and "completion action" arguments if enqueue kernel is // used, otherwise emit dummy "none" arguments. - if (HiddenArgNumBytes >= 40) { - if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { + if (HiddenArgNumBytes >= 48) { + if (Func.hasFnAttribute("calls-enqueue-kernel")) { emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, Args); - } else { - emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); - } - } - - if (HiddenArgNumBytes >= 48) { - if (!Func.hasFnAttribute("amdgpu-no-completion-action")) { emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, Args); } else { emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); + emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); } } @@ -1068,18 +1056,13 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( else Offset += 8; // Skipped. - if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { + if (Func.hasFnAttribute("calls-enqueue-kernel")) { emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, Args); - } else { - Offset += 8; // Skipped. - } - - if (!Func.hasFnAttribute("amdgpu-no-completion-action")) { emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, Args); } else { - Offset += 8; // Skipped. + Offset += 16; // Skipped. } Offset += 72; // Reserved. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp index f242cbc494e4ef..7e824335cdfcc5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp @@ -24,6 +24,11 @@ // linkage does not work since optimization passes will try to replace loads // of the global variable with its initialization value. // +// It also identifies the kernels directly or indirectly enqueues kernels +// and adds "calls-enqueue-kernel" function attribute to them, which will +// be used to determine whether to emit runtime metadata for the kernel +// enqueue related hidden kernel arguments. +// //===----------------------------------------------------------------------===// #include "AMDGPU.h" @@ -67,7 +72,35 @@ ModulePass* llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass() { return new AMDGPUOpenCLEnqueuedBlockLowering(); } +/// Collect direct or indirect callers of \p F and save them +/// to \p Callers. +static void collectCallers(Function *F, DenseSet &Callers) { + for (auto *U : F->users()) { + if (auto *CI = dyn_cast(&*U)) { + auto *Caller = CI->getParent()->getParent(); + if (Callers.insert(Caller).second) + collectCallers(Caller, Callers); + } + } +} + +/// If \p U is instruction or constant, collect functions which directly or +/// indirectly use it. +static void collectFunctionUsers(User *U, DenseSet &Funcs) { + if (auto *I = dyn_cast(U)) { + auto *F = I->getParent()->getParent(); + if (Funcs.insert(F).second) + collectCallers(F, Funcs); + return; + } + if (!isa(U)) + return; + for (auto *UU : U->users()) + collectFunctionUsers(&*UU, Funcs); +} + bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { + DenseSet Callers; auto &C = M.getContext(); bool Changed = false; @@ -100,6 +133,13 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { /*isExternallyInitialized=*/false); LLVM_DEBUG(dbgs() << "runtime handle created: " << *GV << '\n'); + for (auto *U : F.users()) { + auto *UU = &*U; + + if (isa(UU)) + collectFunctionUsers(UU, Callers); + } + F.replaceAllUsesWith(ConstantExpr::getAddrSpaceCast(GV, F.getType())); F.addFnAttr("runtime-handle", RuntimeHandle); F.setLinkage(GlobalValue::ExternalLinkage); @@ -107,5 +147,11 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { } } + for (auto *F : Callers) { + if (F->getCallingConv() != CallingConv::AMDGPU_KERNEL) + continue; + F->addFnAttr("calls-enqueue-kernel"); + LLVM_DEBUG(dbgs() << "mark enqueue_kernel caller:" << F->getName() << '\n'); + } return Changed; } diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index e2bf95561638d0..1325afb68ca9a2 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -903,10 +903,6 @@ enum Offset_COV5 : unsigned { HOSTCALL_PTR_OFFSET = 80, MULTIGRID_SYNC_ARG_OFFSET = 88, HEAP_PTR_OFFSET = 96, - - DEFAULT_QUEUE_OFFSET = 104, - COMPLETION_ACTION_OFFSET = 112, - PRIVATE_BASE_OFFSET = 192, SHARED_BASE_OFFSET = 196, QUEUE_PTR_OFFSET = 200, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 6a1573c5b898d9..00efd462bc36f9 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -181,30 +181,6 @@ unsigned getHostcallImplicitArgPosition() { } } -unsigned getDefaultQueueImplicitArgPosition() { - switch (AmdhsaCodeObjectVersion) { - case 2: - case 3: - case 4: - return 32; - case 5: - default: - return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET; - } -} - -unsigned getCompletionActionImplicitArgPosition() { - switch (AmdhsaCodeObjectVersion) { - case 2: - case 3: - case 4: - return 40; - case 5: - default: - return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET; - } -} - #define GET_MIMGBaseOpcodesTable_IMPL #define GET_MIMGDimInfoTable_IMPL #define GET_MIMGInfoTable_IMPL diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 635f7f55798849..be52b736d7569f 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -66,9 +66,6 @@ unsigned getMultigridSyncArgImplicitArgPosition(); /// \returns The offset of the hostcall pointer argument from implicitarg_ptr unsigned getHostcallImplicitArgPosition(); -unsigned getDefaultQueueImplicitArgPosition(); -unsigned getCompletionActionImplicitArgPosition(); - /// \returns Code object version. unsigned getAmdhsaCodeObjectVersion(); diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll index d69107103da48e..66f249fe604b6c 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -230,6 +230,6 @@ attributes #1 = { nounwind } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind } ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index e2829f17d99977..401d219d7acca1 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -688,7 +688,7 @@ define void @func_call_asm() #3 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm ; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] { -; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR24:[0-9]+]] +; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR20:[0-9]+]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void asm sideeffect "", ""() #3 @@ -920,95 +920,12 @@ define amdgpu_kernel void @kern_decl_sanitize_address() #3 { ret void } -declare void @enqueue_block_decl() #6 - -define internal void @enqueue_block_def() #6 { -; AKF_HSA-LABEL: define {{[^@]+}}@enqueue_block_def -; AKF_HSA-SAME: () #[[ATTR7:[0-9]+]] { -; AKF_HSA-NEXT: ret void -; -; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def -; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] { -; ATTRIBUTOR_HSA-NEXT: ret void -; - ret void -} - -define amdgpu_kernel void @kern_call_enqueued_block_decl() { -; AKF_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl -; AKF_HSA-SAME: () #[[ATTR8:[0-9]+]] { -; AKF_HSA-NEXT: call void @enqueue_block_decl() -; AKF_HSA-NEXT: ret void -; -; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl -; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] { -; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_decl() -; ATTRIBUTOR_HSA-NEXT: ret void -; - call void @enqueue_block_decl() - ret void -} - -define amdgpu_kernel void @kern_call_enqueued_block_def() { -; AKF_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def -; AKF_HSA-SAME: () #[[ATTR8]] { -; AKF_HSA-NEXT: call void @enqueue_block_def() -; AKF_HSA-NEXT: ret void -; -; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def -; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] { -; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_def() -; ATTRIBUTOR_HSA-NEXT: ret void -; - call void @enqueue_block_def() - ret void -} - -define void @unused_enqueue_block() { -; AKF_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block() { -; AKF_HSA-NEXT: ret void -; -; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block -; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] { -; ATTRIBUTOR_HSA-NEXT: ret void -; - ret void -} - -define internal void @known_func() { -; AKF_HSA-LABEL: define {{[^@]+}}@known_func() { -; AKF_HSA-NEXT: ret void -; -; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func -; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] { -; ATTRIBUTOR_HSA-NEXT: ret void -; - ret void -} - -; Should never happen -define amdgpu_kernel void @kern_callsite_enqueue_block() { -; AKF_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block -; AKF_HSA-SAME: () #[[ATTR8]] { -; AKF_HSA-NEXT: call void @known_func() #[[ATTR7]] -; AKF_HSA-NEXT: ret void -; -; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block -; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] { -; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR25:[0-9]+]] -; ATTRIBUTOR_HSA-NEXT: ret void -; - call void @known_func() #6 - ret void -} - attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind "target-cpu"="fiji" } attributes #2 = { nounwind "target-cpu"="gfx900" } attributes #3 = { nounwind } attributes #4 = { nounwind sanitize_address } attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } -attributes #6 = { "enqueued-block" } ;. ; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } @@ -1018,33 +935,26 @@ attributes #6 = { "enqueued-block" } ; AKF_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-calls" } ; AKF_HSA: attributes #[[ATTR5]] = { nounwind sanitize_address } ; AKF_HSA: attributes #[[ATTR6:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } -; AKF_HSA: attributes #[[ATTR7]] = { "enqueued-block" } -; AKF_HSA: attributes #[[ATTR8]] = { "amdgpu-calls" } ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { nounwind } -; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "enqueued-block" } +; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll index f2cf2e5c062f95..8e3965b0f11298 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -15,7 +15,6 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0 declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0 -declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #2 declare i1 @llvm.amdgcn.is.private(ptr nocapture) #2 @@ -640,15 +639,15 @@ attributes #1 = { nounwind } ; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-stack-objects" } ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll index 8d84590616ccd3..2f899440b74d6a 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll @@ -418,13 +418,13 @@ attributes #1 = { nounwind } ; AKF_CHECK: attributes #[[ATTR1]] = { nounwind } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll index 2dcccdab7c40f7..f6403d381bf705 100644 --- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll @@ -35,6 +35,6 @@ define amdgpu_kernel void @test_direct_indirect_call() { ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll index 37feeb44513519..4e7b68c3889592 100644 --- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll +++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll @@ -42,6 +42,6 @@ attributes #0 = { "amdgpu-no-dispatch-id" } ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll index c4549286753e2a..43d4ceb7009d75 100644 --- a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll @@ -164,7 +164,7 @@ attributes #0 = { "enqueued-block" } ; ; ; CHECK-LABEL: define {{[^@]+}}@inlined_caller -; CHECK-SAME: (ptr addrspace(1) [[A:%.*]], i8 [[B:%.*]], ptr addrspace(1) [[C:%.*]], i64 [[D:%.*]]) { +; CHECK-SAME: (ptr addrspace(1) [[A:%.*]], i8 [[B:%.*]], ptr addrspace(1) [[C:%.*]], i64 [[D:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[INST:%.*]] = load i64, ptr addrspace(1) @__test_block_invoke_kernel.runtime_handle, align 4 ; CHECK-NEXT: store i64 [[INST]], ptr addrspace(1) [[C]], align 4 @@ -172,7 +172,7 @@ attributes #0 = { "enqueued-block" } ; ; ; CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_kernel -; CHECK-SAME: (<{ i32, i32, ptr addrspace(1), i8 }> [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (<{ i32, i32, ptr addrspace(1), i8 }> [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue <{ i32, i32, ptr addrspace(1), i8 }> [[ARG]], 2 ; CHECK-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue <{ i32, i32, ptr addrspace(1), i8 }> [[ARG]], 3 @@ -181,7 +181,7 @@ attributes #0 = { "enqueued-block" } ; ; ; CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel -; CHECK-SAME: (<{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-SAME: (<{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[ARG]], 2 ; CHECK-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[ARG]], 3 @@ -193,7 +193,7 @@ attributes #0 = { "enqueued-block" } ; ; ; CHECK-LABEL: define {{[^@]+}}@block_has_used_kernel_address -; CHECK-SAME: (<{ i32, i32, ptr addrspace(1), i8 }> [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-SAME: (<{ i32, i32, ptr addrspace(1), i8 }> [[ARG:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue <{ i32, i32, ptr addrspace(1), i8 }> [[ARG]], 2 ; CHECK-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue <{ i32, i32, ptr addrspace(1), i8 }> [[ARG]], 3 @@ -208,18 +208,19 @@ attributes #0 = { "enqueued-block" } ; ; ; CHECK-LABEL: define {{[^@]+}}@__amdgpu_enqueued_kernel -; CHECK-SAME: (<{ i32, i32, ptr addrspace(1), i8 }> [[ARG:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-SAME: (<{ i32, i32, ptr addrspace(1), i8 }> [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@__amdgpu_enqueued_kernel.1 -; CHECK-SAME: (<{ i32, i32, ptr addrspace(1), i8 }> [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-SAME: (<{ i32, i32, ptr addrspace(1), i8 }> [[ARG:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: ret void ; ;. -; CHECK: attributes #[[ATTR0]] = { "enqueued-block" "runtime-handle"="__test_block_invoke_kernel.runtime_handle" } -; CHECK: attributes #[[ATTR1]] = { "enqueued-block" "runtime-handle"="__test_block_invoke_2_kernel.runtime_handle" } -; CHECK: attributes #[[ATTR2]] = { "enqueued-block" "runtime-handle"="block_has_used_kernel_address.runtime_handle" } -; CHECK: attributes #[[ATTR3]] = { "enqueued-block" "runtime-handle"="__amdgpu_enqueued_kernel.runtime_handle" } -; CHECK: attributes #[[ATTR4]] = { "enqueued-block" "runtime-handle"="__amdgpu_enqueued_kernel.1.runtime_handle" } +; CHECK: attributes #[[ATTR0]] = { "calls-enqueue-kernel" } +; CHECK: attributes #[[ATTR1]] = { "enqueued-block" "runtime-handle"="__test_block_invoke_kernel.runtime_handle" } +; CHECK: attributes #[[ATTR2]] = { "enqueued-block" "runtime-handle"="__test_block_invoke_2_kernel.runtime_handle" } +; CHECK: attributes #[[ATTR3]] = { "enqueued-block" "runtime-handle"="block_has_used_kernel_address.runtime_handle" } +; CHECK: attributes #[[ATTR4]] = { "enqueued-block" "runtime-handle"="__amdgpu_enqueued_kernel.runtime_handle" } +; CHECK: attributes #[[ATTR5]] = { "enqueued-block" "runtime-handle"="__amdgpu_enqueued_kernel.1.runtime_handle" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll index 992c88c2c7f78d..c223d661ac7c8a 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll @@ -68,87 +68,13 @@ define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1 ret void } -; CHECK: - .args: -; CHECK-NEXT: - .name: a -; CHECK-NEXT: .offset: 0 -; CHECK-NEXT: .size: 1 -; CHECK-NEXT: .type_name: char -; CHECK-NEXT: .value_kind: by_value -; CHECK-NEXT: - .offset: 8 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_global_offset_x -; CHECK-NEXT: - .offset: 16 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_global_offset_y -; CHECK-NEXT: - .offset: 24 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_global_offset_z -; CHECK-NEXT: - .offset: 32 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_hostcall_buffer -; CHECK-NEXT: - .offset: 40 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_default_queue -; CHECK-NEXT: - .offset: 48 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_none -; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: -; CHECK-NEXT: - 2 -; CHECK-NEXT: - 0 -; CHECK: .name: test_no_completion_action -; CHECK: .symbol: test_no_completion_action.kd -define amdgpu_kernel void @test_no_completion_action(i8 %a) #2 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - .args: -; CHECK-NEXT: - .name: a -; CHECK-NEXT: .offset: 0 -; CHECK-NEXT: .size: 1 -; CHECK-NEXT: .type_name: char -; CHECK-NEXT: .value_kind: by_value -; CHECK-NEXT: - .offset: 8 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_global_offset_x -; CHECK-NEXT: - .offset: 16 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_global_offset_y -; CHECK-NEXT: - .offset: 24 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_global_offset_z -; CHECK-NEXT: - .offset: 32 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_hostcall_buffer -; CHECK-NEXT: - .offset: 40 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_none -; CHECK-NEXT: - .offset: 48 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_completion_action -; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: -; CHECK-NEXT: - 2 -; CHECK-NEXT: - 0 -; CHECK: .name: test_no_default_queue -; CHECK: .symbol: test_no_default_queue.kd -define amdgpu_kernel void @test_no_default_queue(i8 %a) #3 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { - ret void -} - ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 ; CHECK-NEXT: - 0 ; CHECK-NOT: amdhsa.printf: -attributes #0 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48" } -attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } -attributes #2 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48" } -attributes #3 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="48" } +attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } +attributes #1 = { optnone noinline "calls-enqueue-kernel" "amdgpu-implicitarg-num-bytes"="48" } !1 = !{i32 0} !2 = !{!"none"} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll index 4d58fb00680281..29da063b6de478 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll @@ -75,8 +75,8 @@ define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1 ret void } -attributes #0 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48" } -attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } +attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } +attributes #1 = { optnone noinline "calls-enqueue-kernel" "amdgpu-implicitarg-num-bytes"="48" } !1 = !{i32 0} !2 = !{!"none"} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll index b8768ad5be34b1..d9958947841fc2 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll @@ -1741,9 +1741,9 @@ define amdgpu_kernel void @unknown_addrspace_kernarg(ptr addrspace(12345) %ptr) ; CHECK-NEXT: - 1 ; CHECK-NEXT: - 0 -attributes #0 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" } -attributes #1 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } -attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } +attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } +attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } +attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "calls-enqueue-kernel" } !llvm.printf.fmts = !{!100, !101} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll index d628eeed6d9c90..93e1241cb851f5 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll @@ -1866,9 +1866,9 @@ define amdgpu_kernel void @unknown_addrspace_kernarg(ptr addrspace(12345) %ptr) ret void } -attributes #0 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" } -attributes #1 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } -attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } +attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } +attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } +attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "calls-enqueue-kernel" } !llvm.printf.fmts = !{!100, !101} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll index 91df59eb8002b5..cde62ebd5a16e2 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll @@ -215,10 +215,10 @@ entry: ; CHECK-NEXT: .value_kind: hidden_hostcall_buffer ; CHECK-NEXT: - .offset: 56 ; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_default_queue +; CHECK-NEXT: .value_kind: hidden_none ; CHECK-NEXT: - .offset: 64 ; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_completion_action +; CHECK-NEXT: .value_kind: hidden_none ; CHECK: .name: test48 ; CHECK: .symbol: test48.kd define amdgpu_kernel void @test48( @@ -263,10 +263,10 @@ entry: ; CHECK-NEXT: .value_kind: hidden_hostcall_buffer ; CHECK-NEXT: - .offset: 56 ; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_default_queue +; CHECK-NEXT: .value_kind: hidden_none ; CHECK-NEXT: - .offset: 64 ; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_completion_action +; CHECK-NEXT: .value_kind: hidden_none ; CHECK-NEXT: - .offset: 72 ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll index 8f51c5f06dfc8b..464f2ad4993fcd 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll @@ -114,5 +114,5 @@ entry: !1 = !{!"1:1:4:%d\5Cn"} !2 = !{!"2:1:8:%g\5Cn"} -attributes #0 = { optnone noinline } +attributes #0 = { optnone noinline "calls-enqueue-kernel" } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll index 49b64a9eb25bc3..1a80f9d7518d8b 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll @@ -225,11 +225,11 @@ entry: ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenDefaultQueue +; CHECK-NEXT: ValueKind: HiddenNone ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenCompletionAction +; CHECK-NEXT: ValueKind: HiddenNone ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: CodeProps: define amdgpu_kernel void @test48( @@ -277,11 +277,11 @@ entry: ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenDefaultQueue +; CHECK-NEXT: ValueKind: HiddenNone ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenCompletionAction +; CHECK-NEXT: ValueKind: HiddenNone ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll deleted file mode 100644 index 639d1da4549d5c..00000000000000 --- a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll +++ /dev/null @@ -1,240 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor --amdhsa-code-object-version=4 < %s | FileCheck -check-prefixes=CHECK,V4 %s -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor --amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=CHECK,V5 %s - -declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 - -declare i32 @llvm.amdgcn.workgroup.id.x() #0 -declare i32 @llvm.amdgcn.workgroup.id.y() #0 -declare i32 @llvm.amdgcn.workgroup.id.z() #0 - -declare i32 @llvm.amdgcn.workitem.id.x() #0 -declare i32 @llvm.amdgcn.workitem.id.y() #0 -declare i32 @llvm.amdgcn.workitem.id.z() #0 -declare i32 @llvm.amdgcn.lds.kernel.id() #0 -declare i64 @llvm.amdgcn.dispatch.id() #0 - - -declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 -declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 -declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0 - -; Avoid adding all of these to the output attribute sets -define void @use_everything_else() { -; CHECK-LABEL: define {{[^@]+}}@use_everything_else -; CHECK-SAME: () #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() -; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() -; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() -; CHECK-NEXT: [[VAL3:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() -; CHECK-NEXT: [[VAL4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() -; CHECK-NEXT: [[VAL5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() -; CHECK-NEXT: store volatile i32 [[VAL0]], ptr addrspace(1) null, align 4 -; CHECK-NEXT: store volatile i32 [[VAL1]], ptr addrspace(1) null, align 4 -; CHECK-NEXT: store volatile i32 [[VAL2]], ptr addrspace(1) null, align 4 -; CHECK-NEXT: store volatile i32 [[VAL3]], ptr addrspace(1) null, align 4 -; CHECK-NEXT: store volatile i32 [[VAL4]], ptr addrspace(1) null, align 4 -; CHECK-NEXT: store volatile i32 [[VAL5]], ptr addrspace(1) null, align 4 -; CHECK-NEXT: [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: [[QUEUE_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr() -; CHECK-NEXT: [[VAL6:%.*]] = load volatile ptr, ptr addrspace(4) [[DISPATCH_PTR]], align 8 -; CHECK-NEXT: [[VAL7:%.*]] = load volatile ptr, ptr addrspace(4) [[QUEUE_PTR]], align 8 -; CHECK-NEXT: [[VAL8:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; CHECK-NEXT: store volatile i32 [[VAL8]], ptr addrspace(1) null, align 4 -; CHECK-NEXT: [[VAL9:%.*]] = call i64 @llvm.amdgcn.dispatch.id() -; CHECK-NEXT: store volatile i64 [[VAL9]], ptr addrspace(1) null, align 4 -; CHECK-NEXT: ret void -; - %val0 = call i32 @llvm.amdgcn.workitem.id.x() - %val1 = call i32 @llvm.amdgcn.workitem.id.y() - %val2 = call i32 @llvm.amdgcn.workitem.id.z() - %val3 = call i32 @llvm.amdgcn.workgroup.id.x() - %val4 = call i32 @llvm.amdgcn.workgroup.id.y() - %val5 = call i32 @llvm.amdgcn.workgroup.id.z() - store volatile i32 %val0, ptr addrspace(1) null - store volatile i32 %val1, ptr addrspace(1) null - store volatile i32 %val2, ptr addrspace(1) null - store volatile i32 %val3, ptr addrspace(1) null - store volatile i32 %val4, ptr addrspace(1) null - store volatile i32 %val5, ptr addrspace(1) null - %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() - %queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr() - %val6 = load volatile ptr, ptr addrspace(4) %dispatch.ptr - %val7 = load volatile ptr, ptr addrspace(4) %queue.ptr - %val8 = call i32 @llvm.amdgcn.lds.kernel.id() - store volatile i32 %val8, ptr addrspace(1) null - %val9 = call i64 @llvm.amdgcn.dispatch.id() - store volatile i64 %val9, ptr addrspace(1) null - ret void -} - -define amdgpu_kernel void @test_default_queue_offset_v4_0(ptr addrspace(1) %kernarg) { -; V4-LABEL: define {{[^@]+}}@test_default_queue_offset_v4_0 -; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] { -; V4-NEXT: call void @use_everything_else() -; V4-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; V4-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32 -; V4-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 -; V4-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 -; V4-NEXT: ret void -; -; V5-LABEL: define {{[^@]+}}@test_default_queue_offset_v4_0 -; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] { -; V5-NEXT: call void @use_everything_else() -; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32 -; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 -; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 -; V5-NEXT: ret void -; - call void @use_everything_else() - %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() - %gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 32 - %load = load ptr, ptr addrspace(4) %gep - store ptr %load, ptr addrspace(1) %kernarg - ret void -} - -define amdgpu_kernel void @test_default_queue_offset_v5_0(ptr addrspace(1) %kernarg) { -; V4-LABEL: define {{[^@]+}}@test_default_queue_offset_v5_0 -; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3:[0-9]+]] { -; V4-NEXT: call void @use_everything_else() -; V4-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; V4-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104 -; V4-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 -; V4-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 -; V4-NEXT: ret void -; -; V5-LABEL: define {{[^@]+}}@test_default_queue_offset_v5_0 -; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] { -; V5-NEXT: call void @use_everything_else() -; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104 -; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 -; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 -; V5-NEXT: ret void -; - call void @use_everything_else() - %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() - %gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 104 - %load = load ptr, ptr addrspace(4) %gep - store ptr %load, ptr addrspace(1) %kernarg - ret void -} - -define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) %kernarg) { -; V4-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0 -; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] { -; V4-NEXT: call void @use_everything_else() -; V4-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; V4-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40 -; V4-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 -; V4-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 -; V4-NEXT: ret void -; -; V5-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0 -; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] { -; V5-NEXT: call void @use_everything_else() -; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40 -; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 -; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 -; V5-NEXT: ret void -; - call void @use_everything_else() - %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() - %gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 40 - %load = load ptr, ptr addrspace(4) %gep - store ptr %load, ptr addrspace(1) %kernarg - ret void -} - -define amdgpu_kernel void @test_completion_action_offset_v5_0(ptr addrspace(1) %kernarg) { -; CHECK-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0 -; CHECK-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3:[0-9]+]] { -; CHECK-NEXT: call void @use_everything_else() -; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112 -; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 -; CHECK-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 -; CHECK-NEXT: ret void -; - call void @use_everything_else() - %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() - %gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 112 - %load = load ptr, ptr addrspace(4) %gep - store ptr %load, ptr addrspace(1) %kernarg - ret void -} - -define amdgpu_kernel void @test_default_queue_completion_action_offset_v3_0(ptr addrspace(1) %kernarg) { -; V4-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0 -; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR5:[0-9]+]] { -; V4-NEXT: call void @use_everything_else() -; V4-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; V4-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32 -; V4-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16 -; V4-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16 -; V4-NEXT: ret void -; -; V5-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0 -; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] { -; V5-NEXT: call void @use_everything_else() -; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32 -; V5-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16 -; V5-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16 -; V5-NEXT: ret void -; - call void @use_everything_else() - %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() - %gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 32 - %load = load <2 x ptr>, ptr addrspace(4) %gep - store <2 x ptr> %load, ptr addrspace(1) %kernarg - ret void -} - -define amdgpu_kernel void @test_default_queue_completion_action_offset_v5_0(ptr addrspace(1) %kernarg) { -; V4-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0 -; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3]] { -; V4-NEXT: call void @use_everything_else() -; V4-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; V4-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104 -; V4-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16 -; V4-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16 -; V4-NEXT: ret void -; -; V5-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0 -; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] { -; V5-NEXT: call void @use_everything_else() -; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104 -; V5-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16 -; V5-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16 -; V5-NEXT: ret void -; - - call void @use_everything_else()%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() - %gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 104 - %load = load <2 x ptr>, ptr addrspace(4) %gep - store <2 x ptr> %load, ptr addrspace(1) %kernarg - ret void -} - - -attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } - -;. -; V4: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; V4: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -; V4: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -; V4: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -; V4: attributes #[[ATTR4]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -; V4: attributes #[[ATTR5]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -;. -; V5: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; V5: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -; V5: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -; V5: attributes #[[ATTR3]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -; V5: attributes #[[ATTR4]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -;. diff --git a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll index 9e4f57f60556c2..67b5ae74e943ad 100644 --- a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll +++ b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll @@ -202,13 +202,13 @@ attributes #5 = { "amdgpu-flat-work-group-size"="128,512" } attributes #6 = { "amdgpu-flat-work-group-size"="512,512" } attributes #7 = { "amdgpu-flat-work-group-size"="64,256" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll index d2c2601d8f67a9..04ae40800b363c 100644 --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -73,6 +73,6 @@ define amdgpu_kernel void @test_simple_indirect_call() { ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll index 4fe64e6860cdef..284dc5630d62e7 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll @@ -31,5 +31,5 @@ define amdgpu_kernel void @kernel1() #1 { attributes #0 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll index 6fc5fb4941b395..7f248e2ec7f606 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll @@ -97,6 +97,6 @@ define amdgpu_kernel void @kernel2() #0 { attributes #0 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll index ac29588558c273..f82b468542e74e 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll @@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel3() #2 { attributes #2 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll index 878e1616e05b33..27e1876842381c 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll @@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel2() #2 { attributes #1 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll index 4b0541c43c8dad..6012ca681e1d18 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll @@ -51,8 +51,3 @@ define amdgpu_kernel void @kernel2() #2 { attributes #0 = { nounwind } attributes #1 = { "uniform-work-group-size"="false" } attributes #2 = { "uniform-work-group-size"="true" } -;. -; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } -;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll index 4f9c7fdb34d9c0..be5be1b28d2786 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll @@ -101,7 +101,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %m) #1 { attributes #0 = { nounwind readnone } attributes #1 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll index cb4086b930a369..f67b10f111a5ff 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll @@ -61,5 +61,5 @@ define amdgpu_kernel void @kernel3() #0 { attributes #0 = { "uniform-work-group-size"="false" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;.