diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index ec6b579870abb..4ee32c76a95d8 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -1141,6 +1141,9 @@ class ASTContext : public RefCountedBase { mutable TagDecl *MSGuidTagDecl = nullptr; /// Keep track of CUDA/HIP device-side variables ODR-used by host code. + /// This does not include extern shared variables used by device host + /// functions as addresses of shared variables are per warp, therefore + /// cannot be accessed by host code. llvm::DenseSet CUDADeviceVarODRUsedByHost; /// Keep track of CUDA/HIP external kernels or device variables ODR-used by diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 3e9c3fad03918..92496b03ecabe 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -19136,6 +19136,7 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef, : diag::note_cuda_host_var); } } else if (VarTarget == Sema::CVT_Device && + !Var->hasAttr() && (UserTarget == Sema::CFT_Host || UserTarget == Sema::CFT_HostDevice)) { // Record a CUDA/HIP device side variable if it is ODR-used diff --git a/clang/test/CodeGenCUDA/host-used-extern.cu b/clang/test/CodeGenCUDA/host-used-extern.cu index c7edabf5c582b..e8f8e12aad47d 100644 --- a/clang/test/CodeGenCUDA/host-used-extern.cu +++ b/clang/test/CodeGenCUDA/host-used-extern.cu @@ -9,6 +9,11 @@ // RUN: -std=c++11 -emit-llvm -o - -target-cpu gfx906 \ // RUN: | FileCheck -check-prefixes=NEG,NORDC %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x hip %s \ +// RUN: -fgpu-rdc -std=c++11 -emit-llvm -o - \ +// RUN: | FileCheck -check-prefix=HOST-NEG %s + + #include "Inputs/cuda.h" // CHECK-LABEL: @__clang_gpu_used_external = internal {{.*}}global @@ -21,10 +26,13 @@ // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var2 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var3 +// NEG-NOT: @__clang_gpu_used_external = {{.*}} @ext_shvar +// NEG-NOT: @__clang_gpu_used_external = {{.*}} @shvar // NORDC-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel1v // NORDC-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel4v // NORDC-NOT: @__clang_gpu_used_external = {{.*}} @var1 - +// HOST-NEG-NOT: call void @__hipRegisterVar({{.*}}, ptr @ext_shvar +// HOST-NEG-NOT: call void @__hipRegisterVar({{.*}}, ptr @shvar __global__ void kernel1(); // kernel2 is not marked as used since it is a definition. @@ -49,3 +57,13 @@ void test() { void *p = (void*)kernel4; use(&var1); } + +__global__ void test_lambda_using_extern_shared() { + extern __shared__ int ext_shvar[]; + __shared__ int shvar[10]; + auto lambda = [&]() { + ext_shvar[0] = 1; + shvar[0] = 2; + }; + lambda(); +}