diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index aec170ae557066..ee8852903eda52 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -9520,6 +9520,15 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } + + if (IsHIPKernel) { + // FIXME: This is a dirty, dirty hack to fix bot failures at -O0 and should + // be removed. The HIP runtime currently fails to handle the case where one + // of these fields fails to optimize out. The runtime should tolerate all + // requested implicit inputs regardless of language. + F->addFnAttr("amdgpu-no-default-queue"); + F->addFnAttr("amdgpu-no-completion-action"); + } } void AMDGPUTargetCodeGenInfo::setTargetAttributes( diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip new file mode 100644 index 00000000000000..b4f4a620195676 --- /dev/null +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -0,0 +1,47 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fcuda-is-device \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPTNONE %s + +// RUN: %clang_cc1 -O3 -triple amdgcn-amd-amdhsa -x hip -fcuda-is-device \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPT %s + +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) + +// OPTNONE: Function Attrs: convergent mustprogress noinline nounwind optnone +// OPTNONE-LABEL: define {{[^@]+}}@_Z4funcv +// OPTNONE-SAME: () #[[ATTR0:[0-9]+]] { +// OPTNONE-NEXT: entry: +// OPTNONE-NEXT: ret void +// +// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +// OPT-LABEL: define {{[^@]+}}@_Z4funcv +// OPT-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] { +// OPT-NEXT: entry: +// OPT-NEXT: ret void +// +__device__ void func() { + +} + +// OPTNONE: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// OPTNONE-LABEL: define {{[^@]+}}@_Z6kernelv +// OPTNONE-SAME: () #[[ATTR1:[0-9]+]] { +// OPTNONE-NEXT: entry: +// OPTNONE-NEXT: ret void +// +// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +// OPT-LABEL: define {{[^@]+}}@_Z6kernelv +// OPT-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] { +// OPT-NEXT: entry: +// OPT-NEXT: ret void +// +__global__ void kernel() { + +} +//. +// OPTNONE: attributes #0 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// OPTNONE: attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +//. +// OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }