diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 6a66ecf6f94c1..4d4f4b6a2d4d9 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -895,7 +895,10 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { if (DiagKind == SemaDiagnosticBuilder::K_Nop) { // For -fgpu-rdc, keep track of external kernels used by host functions. if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode && - Callee->hasAttr() && !Callee->isDefined()) + Callee->hasAttr() && !Callee->isDefined() && + (!Caller || (!Caller->getDescribedFunctionTemplate() && + getASTContext().GetGVALinkageForFunction(Caller) == + GVA_StrongExternal))) getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee); return true; } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 47bb263f56aad..93f82e68ab644 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -19218,7 +19218,10 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef, // externalize the static device side variable ODR-used by host code. if (!Var->hasExternalStorage()) SemaRef.getASTContext().CUDADeviceVarODRUsedByHost.insert(Var); - else if (SemaRef.LangOpts.GPURelocatableDeviceCode) + else if (SemaRef.LangOpts.GPURelocatableDeviceCode && + (!FD || (!FD->getDescribedFunctionTemplate() && + SemaRef.getASTContext().GetGVALinkageForFunction(FD) == + GVA_StrongExternal))) SemaRef.getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Var); } } diff --git a/clang/test/CodeGenCUDA/host-used-extern.cu b/clang/test/CodeGenCUDA/host-used-extern.cu index e8f8e12aad47d..1ae644ae981aa 100644 --- a/clang/test/CodeGenCUDA/host-used-extern.cu +++ b/clang/test/CodeGenCUDA/host-used-extern.cu @@ -24,6 +24,7 @@ // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel2v // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v +// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel5v // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var2 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var3 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @ext_shvar @@ -44,6 +45,10 @@ __global__ void kernel3(); // kernel4 is marked as used even though it is not called. __global__ void kernel4(); +// kernel5 is not marked as used since it is called by host function +// with weak_odr linkage, which may be dropped by linker. +__global__ void kernel5(); + extern __device__ int var1; __device__ int var2; @@ -67,3 +72,11 @@ __global__ void test_lambda_using_extern_shared() { }; lambda(); } + +template +void template_caller() { + kernel5<<<1, 1>>>(); + var1 = 1; +} + +template void template_caller();