diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index d2e5bd284d350..da391082eba5b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -401,6 +401,7 @@ Bug Fixes to Attribute Support - Using ``[[gnu::cleanup(some_func)]]`` where some_func is annotated with ``[[gnu::error("some error")]]`` now correctly triggers an error. (#GH146520) - Fix a crash when the function name is empty in the `swift_name` attribute. (#GH157075) +- Fixes crashes or missing diagnostics with the `device_kernel` attribute. (#GH161905) Bug Fixes to C++ Support ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 3c697ed8dd882..6ec0eac529245 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1599,7 +1599,7 @@ def CUDAShared : InheritableAttr { } def : MutualExclusions<[CUDAConstant, CUDAShared, HIPManaged]>; -def DeviceKernel : DeclOrTypeAttr { +def DeviceKernel : InheritableAttr { let Spellings = [Clang<"device_kernel">, Clang<"sycl_kernel">, Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">, CustomKeyword<"__kernel">, CustomKeyword<"kernel">]; diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 66a1b684ec68b..568d56ab0b911 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -2147,9 +2147,6 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, } case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break; case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break; - case attr::DeviceKernel: - OS << T->getAttr()->getSpelling(); - break; case attr::IntelOclBicc: OS << "inteloclbicc"; break; diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index 33c29586359be..f5c8396f398aa 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -200,7 +200,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { // a host function. if (HostTarget) return HostTarget->checkCallingConvention(CC); - return CCCR_Warning; + return CC == CC_DeviceKernel ? CCCR_OK : CCCR_Warning; } bool hasBitIntType() const override { return true; } diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 0fcbf7e458a34..d9e3c9969686d 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -419,9 +419,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( return; const FunctionDecl *FD = dyn_cast_or_null(D); - if (FD) + if (FD) { setFunctionDeclAttributes(FD, F, M); - + if (FD->hasAttr() && !M.getLangOpts().OpenCL) + F->setCallingConv(getDeviceKernelCallingConv()); + } if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts) F->addFnAttr("amdgpu-ieee", "false"); } @@ -634,7 +636,7 @@ llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( // kernel address (only the kernel descriptor). auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, &Mod); - F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); + F->setCallingConv(getDeviceKernelCallingConv()); llvm::AttrBuilder KernelAttrs(C); // FIXME: The invoke isn't applying the right attributes either diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 53f2fc4a040a7..f6715861d91bc 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -264,7 +264,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( // And kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); if (FD->hasAttr()) { - F->setCallingConv(llvm::CallingConv::PTX_Kernel); + F->setCallingConv(getDeviceKernelCallingConv()); for (auto IV : llvm::enumerate(FD->parameters())) if (IV.value()->hasAttr()) @@ -278,7 +278,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( } // Attach kernel metadata directly if compiling for NVPTX. if (FD->hasAttr()) - F->setCallingConv(llvm::CallingConv::PTX_Kernel); + F->setCallingConv(getDeviceKernelCallingConv()); } void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 4aa63143a66cd..b3b688891bf07 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -61,6 +61,8 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo { QualType SampledType, CodeGenModule &CGM) const; void setOCLKernelStubCallingConvention(const FunctionType *&FT) const override; + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const override; }; class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo { public: @@ -240,6 +242,22 @@ void CommonSPIRTargetCodeGenInfo::setOCLKernelStubCallingConvention( FT, FT->getExtInfo().withCallingConv(CC_SpirFunction)); } +void CommonSPIRTargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (M.getLangOpts().OpenCL || GV->isDeclaration()) + return; + + const FunctionDecl *FD = dyn_cast(D); + if (!FD) + return; + + llvm::Function *F = dyn_cast(GV); + assert(F && "Expected GlobalValue to be a Function"); + + if (FD->hasAttr()) + F->setCallingConv(getDeviceKernelCallingConv()); +} + LangAS SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const { @@ -264,19 +282,23 @@ SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, void SPIRVTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { - if (!M.getLangOpts().HIP || - M.getTarget().getTriple().getVendor() != llvm::Triple::AMD) - return; if (GV->isDeclaration()) return; - auto F = dyn_cast(GV); - if (!F) + const FunctionDecl *FD = dyn_cast_or_null(D); + if (!FD) return; - auto FD = dyn_cast_or_null(D); - if (!FD) + llvm::Function *F = dyn_cast(GV); + assert(F && "Expected GlobalValue to be a Function"); + + if (FD->hasAttr()) + F->setCallingConv(getDeviceKernelCallingConv()); + + if (!M.getLangOpts().HIP || + M.getTarget().getTriple().getVendor() != llvm::Triple::AMD) return; + if (!FD->hasAttr()) return; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 328ccf6694073..f08e1da400459 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5204,25 +5204,53 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) { const auto *FD = dyn_cast_or_null(D); bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate(); - if (S.getLangOpts().SYCLIsDevice) { + llvm::Triple Triple = S.getASTContext().getTargetInfo().getTriple(); + const LangOptions &LangOpts = S.getLangOpts(); + + if (LangOpts.SYCLIsDevice) { if (!IsFunctionTemplate) { S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type_str) << AL << AL.isRegularKeywordAttribute() << "function templates"; - } else { - S.SYCL().handleKernelAttr(D, AL); + AL.setInvalid(); + return; } + S.SYCL().handleKernelAttr(D, AL); } else if (DeviceKernelAttr::isSYCLSpelling(AL)) { S.Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL; - } else if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) { + AL.setInvalid(); + return; + } else if (Triple.isNVPTX()) { handleGlobalAttr(S, D, AL); } else { // OpenCL C++ will throw a more specific error. - if (!S.getLangOpts().OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) { + if (!LangOpts.OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) { S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str) << AL << AL.isRegularKeywordAttribute() << "functions"; + AL.setInvalid(); + return; } handleSimpleAttribute(S, D, AL); } + // TODO: isGPU() should probably return true for SPIR. + bool TargetDeviceEnvironment = Triple.isGPU() || Triple.isSPIR() || + LangOpts.isTargetDevice() || LangOpts.OpenCL; + bool IsAMDGPUMismatch = + DeviceKernelAttr::isAMDGPUSpelling(AL) && !Triple.isAMDGPU(); + bool IsNVPTXMismatch = + DeviceKernelAttr::isNVPTXSpelling(AL) && !Triple.isNVPTX(); + if (IsAMDGPUMismatch || IsNVPTXMismatch || !TargetDeviceEnvironment) { + // While both are just different spellings of the same underlying + // attribute, it makes more sense to the user if amdgpu_kernel can only + // be used on AMDGPU and the equivalent for NVPTX, so warn and ignore + // the attribute if there's a mismatch. + // Also warn if this is not an environment where a device kernel makes + // sense. + S.Diag(AL.getLoc(), diag::warn_cconv_unsupported) + << AL << (int)Sema::CallingConventionIgnoredReason::ForThisTarget; + AL.setInvalid(); + return; + } + // Make sure we validate the CC with the target // and warn/error if necessary. handleCallConvAttr(S, D, AL); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index bee613aa5f1c5..0d5b0e7e842b3 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -134,7 +134,6 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr, case ParsedAttr::AT_VectorCall: \ case ParsedAttr::AT_AArch64VectorPcs: \ case ParsedAttr::AT_AArch64SVEPcs: \ - case ParsedAttr::AT_DeviceKernel: \ case ParsedAttr::AT_MSABI: \ case ParsedAttr::AT_SysVABI: \ case ParsedAttr::AT_Pcs: \ @@ -3781,7 +3780,8 @@ static CallingConv getCCForDeclaratorChunk( } } if (!S.getLangOpts().isSYCL()) { - for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) { + for (const ParsedAttr &AL : llvm::concat( + D.getDeclSpec().getAttributes(), D.getAttributes())) { if (AL.getKind() == ParsedAttr::AT_DeviceKernel) { CC = CC_DeviceKernel; break; @@ -7565,8 +7565,6 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) { return createSimpleAttr(Ctx, Attr); case ParsedAttr::AT_ArmStreaming: return createSimpleAttr(Ctx, Attr); - case ParsedAttr::AT_DeviceKernel: - return createSimpleAttr(Ctx, Attr); case ParsedAttr::AT_Pcs: { // The attribute may have had a fixit applied where we treated an // identifier as a string literal. The contents of the string are valid, @@ -8805,16 +8803,6 @@ static void HandleHLSLParamModifierAttr(TypeProcessingState &State, } } -static bool isMultiSubjectAttrAllowedOnType(const ParsedAttr &Attr) { - // The DeviceKernel attribute is shared for many targets, and - // it is only allowed to be a type attribute with the AMDGPU - // spelling, so skip processing the attr as a type attr - // unless it has that spelling. - if (Attr.getKind() != ParsedAttr::AT_DeviceKernel) - return true; - return DeviceKernelAttr::isAMDGPUSpelling(Attr); -} - static void processTypeAttrs(TypeProcessingState &state, QualType &type, TypeAttrLocation TAL, const ParsedAttributesView &attrs, @@ -9068,8 +9056,6 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, break; [[fallthrough]]; FUNCTION_TYPE_ATTRS_CASELIST: - if (!isMultiSubjectAttrAllowedOnType(attr)) - break; attr.setUsedAsTypeAttr(); diff --git a/clang/test/Sema/callingconv-devicekernel.c b/clang/test/Sema/callingconv-devicekernel.c new file mode 100644 index 0000000000000..869687f8ca65d --- /dev/null +++ b/clang/test/Sema/callingconv-devicekernel.c @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s 2>&1 -o -| FileCheck -check-prefix=CHECK-AMDGPU %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -emit-llvm %s 2>&1 -o -| FileCheck -check-prefix=CHECK-NVPTX %s +// RUN: %clang_cc1 -triple spir64 -emit-llvm %s 2>&1 -o - | FileCheck -check-prefix=CHECK-SPIR %s +// RUN: %clang_cc1 -triple spirv64 -emit-llvm %s 2>&1 -o - | FileCheck -check-prefix=CHECK-SPIR %s + +// CHECK-AMDGPU-DAG: amdgpu_kernel void @kernel1() +// CHECK-NVPTX-DAG: ptx_kernel void @kernel1() +// CHECK-SPIR-DAG: spir_kernel void @kernel1() +[[clang::device_kernel]] void kernel1() {} + +// CHECK-AMDGPU-DAG: amdgpu_kernel void @kernel2() +// CHECK-NVPTX-DAG: 14:3: warning: 'clang::amdgpu_kernel' calling convention is not supported for this target +// CHECK-SPIR-DAG: 14:3: warning: 'clang::amdgpu_kernel' calling convention is not supported for this target +[[clang::amdgpu_kernel]] void kernel2() {} + +// CHECK-AMDGPU-DAG: 19:3: warning: 'clang::nvptx_kernel' calling convention is not supported for this target +// CHECK-NVPTX-DAG: ptx_kernel void @kernel3() +// CHECK-SPIR-DAG: 19:3: warning: 'clang::nvptx_kernel' calling convention is not supported for this target +[[clang::nvptx_kernel]] void kernel3() {} + +// CHECK-AMDGPU-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored +// CHECK-NVPTX-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored +// CHECK-SPIR-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored +[[clang::sycl_kernel]] void kernel4() {} diff --git a/clang/test/Sema/callingconv.c b/clang/test/Sema/callingconv.c index f0b8b80a32974..28342b56af39a 100644 --- a/clang/test/Sema/callingconv.c +++ b/clang/test/Sema/callingconv.c @@ -55,6 +55,10 @@ int __attribute__((aarch64_vector_pcs)) aavpcs(void); // expected-warning {{'aar int __attribute__((aarch64_sve_pcs)) aasvepcs(void); // expected-warning {{'aarch64_sve_pcs' calling convention is not supported for this target}} int __attribute__((amdgpu_kernel)) amdgpu_kernel(void); // expected-warning {{'amdgpu_kernel' calling convention is not supported for this target}} +int __attribute__((device_kernel)) device_kernel(void) { // expected-warning {{'device_kernel' calling convention is not supported for this target}} +} +int __attribute__((sycl_kernel)) sycl_kernel(void) { // expected-warning {{'sycl_kernel' attribute ignored}} +} // PR6361 void ctest3();