Skip to content

Commit

Permalink
[CUDA][HIP] Fix device variables used by host
Browse files Browse the repository at this point in the history
variables emitted on both host and device side with different addresses
when ODR-used by host function should not cause device side counter-part
to be force emitted.

This fixes the regression caused by https://reviews.llvm.org/D102237

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D102801
  • Loading branch information
yxsamliu committed May 20, 2021
1 parent dccf5c7 commit 4cb4256
Show file tree
Hide file tree
Showing 9 changed files with 247 additions and 27 deletions.
9 changes: 9 additions & 0 deletions clang/include/clang/Sema/Sema.h
Expand Up @@ -12066,6 +12066,15 @@ class Sema final {
bool IgnoreImplicitHDAttr = false);
CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs);

enum CUDAVariableTarget {
CVT_Device, /// Emitted on device side with a shadow variable on host side
CVT_Host, /// Emitted on host side only
CVT_Both, /// Emitted on both sides with different addresses
CVT_Unified, /// Emitted as a unified address, e.g. managed variables
};
/// Determines whether the given variable is emitted on host or device side.
CUDAVariableTarget IdentifyCUDATarget(const VarDecl *D);

/// Gets the CUDA target for the current context.
CUDAFunctionTarget CurrentCUDATarget() {
return IdentifyCUDATarget(dyn_cast<FunctionDecl>(CurContext));
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/CodeGen/CGDeclCXX.cpp
Expand Up @@ -644,7 +644,9 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
}

if (getLangOpts().HIP) {
assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
getLangOpts().GPUAllowDeviceInit);
if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
Fn->addFnAttr("device-init");
}
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/CodeGenModule.cpp
Expand Up @@ -2368,6 +2368,8 @@ void CodeGenModule::EmitDeferred() {
}

// Emit CUDA/HIP static device variables referenced by host code only.
// Note we should not clear CUDADeviceVarODRUsedByHost since it is still
// needed for further handling.
if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
for (const auto *V : getContext().CUDADeviceVarODRUsedByHost)
DeferredDeclsToEmit.push_back(V);
Expand Down
40 changes: 39 additions & 1 deletion clang/lib/Sema/SemaCUDA.cpp
Expand Up @@ -26,6 +26,14 @@
#include "llvm/ADT/SmallVector.h"
using namespace clang;

template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) {
if (!D)
return false;
if (auto *A = D->getAttr<AttrT>())
return !A->isImplicit();
return false;
}

void Sema::PushForceCUDAHostDevice() {
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
ForceCUDAHostDeviceDepth++;
Expand Down Expand Up @@ -133,6 +141,35 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
return CFT_Host;
}

/// IdentifyTarget - Determine the CUDA compilation target for this variable.
Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) {
if (Var->hasAttr<HIPManagedAttr>())
return CVT_Unified;
if (Var->isConstexpr() && !hasExplicitAttr<CUDAConstantAttr>(Var))
return CVT_Both;
if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
Var->hasAttr<CUDASharedAttr>() ||
Var->getType()->isCUDADeviceBuiltinSurfaceType() ||
Var->getType()->isCUDADeviceBuiltinTextureType())
return CVT_Device;
// Function-scope static variable without explicit device or constant
// attribute are emitted
// - on both sides in host device functions
// - on device side in device or global functions
if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {
switch (IdentifyCUDATarget(FD)) {
case CFT_HostDevice:
return CVT_Both;
case CFT_Device:
case CFT_Global:
return CVT_Device;
default:
return CVT_Host;
}
}
return CVT_Host;
}

// * CUDA Call preference table
//
// F - from,
Expand Down Expand Up @@ -637,7 +674,8 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,

void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) {
if (getLangOpts().CUDAIsDevice && VD->isConstexpr() &&
(VD->isFileVarDecl() || VD->isStaticDataMember())) {
(VD->isFileVarDecl() || VD->isStaticDataMember()) &&
!VD->hasAttr<CUDAConstantAttr>()) {
VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));
}
}
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Expand Up @@ -4419,6 +4419,13 @@ static void handleConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
S.Diag(AL.getLoc(), diag::err_cuda_nonstatic_constdev);
return;
}
// constexpr variable may already get an implicit constant attr, which should
// be replaced by the explicit constant attr.
if (auto *A = D->getAttr<CUDAConstantAttr>()) {
if (!A->isImplicit())
return;
D->dropAttr<CUDAConstantAttr>();
}
D->addAttr(::new (S.Context) CUDAConstantAttr(S.Context, AL));
}

Expand Down
26 changes: 9 additions & 17 deletions clang/lib/Sema/SemaExpr.cpp
Expand Up @@ -17146,28 +17146,20 @@ MarkVarDeclODRUsed(VarDecl *Var, SourceLocation Loc, Sema &SemaRef,

if (SemaRef.LangOpts.CUDA && Var && Var->hasGlobalStorage()) {
auto *FD = dyn_cast_or_null<FunctionDecl>(SemaRef.CurContext);
auto Target = SemaRef.IdentifyCUDATarget(FD);
auto IsEmittedOnDeviceSide = [](VarDecl *Var) {
if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
Var->hasAttr<CUDASharedAttr>() ||
Var->getType()->isCUDADeviceBuiltinSurfaceType() ||
Var->getType()->isCUDADeviceBuiltinTextureType())
return true;
// Function-scope static variable in device functions or kernels are
// emitted on device side.
if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {
return FD->hasAttr<CUDADeviceAttr>() || FD->hasAttr<CUDAGlobalAttr>();
}
return false;
};
if (!IsEmittedOnDeviceSide(Var)) {
auto VarTarget = SemaRef.IdentifyCUDATarget(Var);
auto UserTarget = SemaRef.IdentifyCUDATarget(FD);
if (VarTarget == Sema::CVT_Host &&
(UserTarget == Sema::CFT_Device || UserTarget == Sema::CFT_HostDevice ||
UserTarget == Sema::CFT_Global)) {
// Diagnose ODR-use of host global variables in device functions.
// Reference of device global variables in host functions is allowed
// through shadow variables therefore it is not diagnosed.
if (SemaRef.LangOpts.CUDAIsDevice)
SemaRef.targetDiag(Loc, diag::err_ref_bad_target)
<< /*host*/ 2 << /*variable*/ 1 << Var << Target;
} else if ((Target == Sema::CFT_Host || Target == Sema::CFT_HostDevice) &&
<< /*host*/ 2 << /*variable*/ 1 << Var << UserTarget;
} else if (VarTarget == Sema::CVT_Device &&
(UserTarget == Sema::CFT_Host ||
UserTarget == Sema::CFT_HostDevice) &&
!Var->hasExternalStorage()) {
// Record a CUDA/HIP device side variable if it is ODR-used
// by host code. This is done conservatively, when the variable is
Expand Down
32 changes: 32 additions & 0 deletions clang/test/AST/ast-dump-constant-var.cu
@@ -0,0 +1,32 @@
// RUN: %clang_cc1 -std=c++14 -ast-dump -x hip %s | FileCheck -check-prefixes=CHECK,HOST %s
// RUN: %clang_cc1 -std=c++14 -ast-dump -fcuda-is-device -x hip %s | FileCheck -check-prefixes=CHECK,DEV %s

#include "Inputs/cuda.h"

// CHECK-LABEL: VarDecl {{.*}} m1 'int'
// CHECK-NEXT: CUDAConstantAttr {{.*}}cuda.h
__constant__ int m1;

// CHECK-LABEL: VarDecl {{.*}} m2 'int'
// CHECK-NEXT: CUDAConstantAttr {{.*}}cuda.h
// CHECK-NOT: CUDAConstantAttr
__constant__ __constant__ int m2;

// CHECK-LABEL: VarDecl {{.*}} m3 'const int'
// HOST-NOT: CUDAConstantAttr
// DEV-NOT: CUDAConstantAttr {{.*}}cuda.h
// DEV: CUDAConstantAttr {{.*}}Implicit
// DEV-NOT: CUDAConstantAttr {{.*}}cuda.h
constexpr int m3 = 1;

// CHECK-LABEL: VarDecl {{.*}} m3a 'const int'
// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit
// CHECK: CUDAConstantAttr {{.*}}cuda.h
// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit
constexpr __constant__ int m3a = 2;

// CHECK-LABEL: VarDecl {{.*}} m3b 'const int'
// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit
// CHECK: CUDAConstantAttr {{.*}}cuda.h
// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit
__constant__ constexpr int m3b = 3;
122 changes: 120 additions & 2 deletions clang/test/CodeGenCUDA/host-used-device-var.cu
Expand Up @@ -66,30 +66,148 @@ __device__ T add_func (T x, T y)
template <typename T>
__device__ func_t<T> p_add_func = add_func<T>;

// Check non-constant constexpr variables ODR-used by host code only is not emitted.
// DEV-NEG-NOT: constexpr_var1a
// DEV-NEG-NOT: constexpr_var1b
constexpr int constexpr_var1a = 1;
inline constexpr int constexpr_var1b = 1;

// Check constant constexpr variables ODR-used by host code only.
// Non-inline constexpr variable has internal linkage, therefore it is not accessible by host and not kept.
// Inline constexpr variable has linkonce_ord linkage, therefore it can be accessed by host and kept.
// DEV-NEG-NOT: constexpr_var2a
// DEV-DAG: @constexpr_var2b = linkonce_odr addrspace(4) externally_initialized constant i32 2
__constant__ constexpr int constexpr_var2a = 2;
inline __constant__ constexpr int constexpr_var2b = 2;

void use(func_t<int> p);
void use(int *p);
__host__ __device__ void use(const int *p);

// Check static device variable in host function.
// DEV-DAG: @_ZZ4fun1vE11static_var1 = addrspace(1) externally_initialized global i32 3
void fun1() {
static __device__ int static_var1 = 3;
use(&u1);
use(&u2);
use(&u3);
use(&ext_var);
use(&inline_var);
use(p_add_func<int>);
use(&constexpr_var1a);
use(&constexpr_var1b);
use(&constexpr_var2a);
use(&constexpr_var2b);
use(&static_var1);
}

// Check static variable in host device function.
// DEV-DAG: @_ZZ4fun2vE11static_var2 = internal addrspace(1) global i32 4
// DEV-DAG: @_ZZ4fun2vE11static_var3 = addrspace(1) global i32 4
__host__ __device__ void fun2() {
static int static_var2 = 4;
static __device__ int static_var3 = 4;
use(&static_var2);
use(&static_var3);
}

__global__ void kern1(int **x) {
*x = &u4;
fun2();
}

// Check static variables of lambda functions.

// Lambda functions are implicit host device functions.
// Default static variables in lambda functions should be treated
// as host variables on host side, therefore should not be forced
// to be emitted on device.

// DEV-DAG: @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2 = addrspace(1) externally_initialized global i32 5
// DEV-NEG-NOT: @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var1
namespace TestStaticVarInLambda {
class A {
public:
A(char *);
};
void fun() {
(void) [](char *c) {
static A var1(c);
static __device__ int var2 = 5;
(void) var1;
(void) var2;
};
}
}

// Check implicit constant variable ODR-used by host code is not emitted.

// AST contains instantiation of al<ar>, which triggers AST instantiation
// of x::al<ar>::am, which triggers AST instatiation of x::ap<ar>,
// which triggers AST instantiation of aw<ar>::c, which has type
// ar. ar has base class x which has member ah. x::ah is initialized
// with function pointer pointing to ar:as, which returns an object
// of type ou. The constexpr aw<ar>::c is an implicit constant variable
// which is ODR-used by host function x::ap<ar>. An incorrect implementation
// will force aw<ar>::c to be emitted on device side, which will trigger
// emit of x::as and further more ctor of ou and variable o.
// The ODR-use of aw<ar>::c in x::ap<ar> should be treated as a host variable
// instead of device variable.

// DEV-NEG-NOT: _ZN16TestConstexprVar1oE
namespace TestConstexprVar {
char o;
class ou {
public:
ou(char) { __builtin_strlen(&o); }
};
template < typename ao > struct aw { static constexpr ao c; };
class x {
protected:
typedef ou (*y)(const x *);
constexpr x(y ag) : ah(ag) {}
template < bool * > struct ak;
template < typename > struct al {
static bool am;
static ak< &am > an;
};
template < typename ao > static x ap() { (void)aw< ao >::c; return x(nullptr); }
y ah;
};
template < typename ao > bool x::al< ao >::am(&ap< ao >);
class ar : x {
public:
constexpr ar() : x(as) {}
static ou as(const x *) { return 0; }
al< ar > av;
};
}

// Check the exact list of variables to ensure @_ZL2u4 is not among them.
// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE {{[^@]*}} @_ZL2u3 {{[^@]*}} @inline_var {{[^@]*}} @u1 {{[^@]*}} @u2 {{[^@]*}} @u5
// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE
// DEV-SAME: {{^[^@]*}} @_ZL2u3
// DEV-SAME: {{^[^@]*}} @_ZZ4fun1vE11static_var1
// DEV-SAME: {{^[^@]*}} @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2
// DEV-SAME: {{^[^@]*}} @constexpr_var2b
// DEV-SAME: {{^[^@]*}} @inline_var
// DEV-SAME: {{^[^@]*}} @u1
// DEV-SAME: {{^[^@]*}} @u2
// DEV-SAME: {{^[^@]*}} @u5
// DEV-SAME: {{^[^@]*$}}

// HOST-DAG: hipRegisterVar{{.*}}@u1
// HOST-DAG: hipRegisterVar{{.*}}@u2
// HOST-DAG: hipRegisterVar{{.*}}@_ZL2u3
// HOST-DAG: hipRegisterVar{{.*}}@constexpr_var2b
// HOST-DAG: hipRegisterVar{{.*}}@u5
// HOST-DAG: hipRegisterVar{{.*}}@inline_var
// HOST-DAG: hipRegisterVar{{.*}}@_Z10p_add_funcIiE
// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun1vE11static_var1
// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun2vE11static_var2
// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun2vE11static_var3
// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2
// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var1
// HOST-NEG-NOT: hipRegisterVar{{.*}}@ext_var
// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZL2u4
// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var1a
// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var1b
// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var2a
32 changes: 26 additions & 6 deletions clang/test/SemaCUDA/static-device-var.cu
@@ -1,16 +1,14 @@
// REQUIRES: x86-registered-target
// REQUIRES: amdgpu-registered-target

// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
// RUN: -emit-llvm -o - %s -fsyntax-only -verify=dev
// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -std=c++11 \
// RUN: -emit-llvm -o - %s -fsyntax-only -verify=dev,com

// RUN: %clang_cc1 -triple x86_64-gnu-linux \
// RUN: -emit-llvm -o - %s -fsyntax-only -verify=host
// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
// RUN: -emit-llvm -o - %s -fsyntax-only -verify=host,com

// Checks allowed usage of file-scope and function-scope static variables.

// host-no-diagnostics

#include "Inputs/cuda.h"

// Checks static variables are allowed in device functions.
Expand Down Expand Up @@ -42,6 +40,28 @@ __global__ void kernel(int *a) {
// dev-error@-1 {{reference to __host__ variable 'z' in __global__ function}}
}

// Check dynamic initialization of static device variable is not allowed.

namespace TestStaticVarInLambda {
class A {
public:
A(char *);
};
class B {
public:
__device__ B(char *);
};
void fun() {
(void) [](char *c) {
static A var1(c);
static __device__ B var2(c);
// com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
(void) var1;
(void) var2;
};
}
}

int* getDeviceSymbol(int *x);

void foo() {
Expand Down

0 comments on commit 4cb4256

Please sign in to comment.