Skip to content

Commit

Permalink
CUDA host device code with two code paths
Browse files Browse the repository at this point in the history
Summary:
Allow CUDA host device functions with two code paths using __CUDA_ARCH__
to differentiate between code path being compiled.

For example:
  __host__ __device__ void host_device_function(void) {
  #ifdef __CUDA_ARCH__
    device_only_function();
  #else
    host_only_function();
  #endif
  }

Patch by Jacques Pienaar.

Reviewed By: rnk

Differential Revision: http://reviews.llvm.org/D6457

llvm-svn: 223271
  • Loading branch information
rnk committed Dec 3, 2014
1 parent d34e4d2 commit bbc0178
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 17 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/LangOptions.def
Expand Up @@ -157,6 +157,7 @@ LANGOPT(NativeHalfType , 1, 0, "Native half type support")
LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
LANGOPT(CUDA , 1, 0, "CUDA")
LANGOPT(OpenMP , 1, 0, "OpenMP support")
LANGOPT(CUDAIsDevice , 1, 0, "Compiling for CUDA device")

LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")
LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions")
Expand Down
49 changes: 41 additions & 8 deletions clang/lib/Basic/Targets.cpp
Expand Up @@ -1377,6 +1377,16 @@ namespace {
class NVPTXTargetInfo : public TargetInfo {
static const char * const GCCRegNames[];
static const Builtin::Info BuiltinInfo[];

// The GPU profiles supported by the NVPTX backend
enum GPUKind {
GK_NONE,
GK_SM20,
GK_SM21,
GK_SM30,
GK_SM35,
} GPU;

public:
NVPTXTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
BigEndian = false;
Expand All @@ -1387,11 +1397,34 @@ namespace {
// Define available target features
// These must be defined in sorted order!
NoAsmVariants = true;
// Set the default GPU to sm20
GPU = GK_SM20;
}
void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override {
Builder.defineMacro("__PTX__");
Builder.defineMacro("__NVPTX__");
if (Opts.CUDAIsDevice) {
// Set __CUDA_ARCH__ for the GPU specified.
std::string CUDAArchCode;
switch (GPU) {
case GK_SM20:
CUDAArchCode = "200";
break;
case GK_SM21:
CUDAArchCode = "210";
break;
case GK_SM30:
CUDAArchCode = "300";
break;
case GK_SM35:
CUDAArchCode = "350";
break;
default:
llvm_unreachable("Unhandled target CPU");
}
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
}
}
void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const override {
Expand Down Expand Up @@ -1434,14 +1467,14 @@ namespace {
return TargetInfo::CharPtrBuiltinVaList;
}
bool setCPU(const std::string &Name) override {
bool Valid = llvm::StringSwitch<bool>(Name)
.Case("sm_20", true)
.Case("sm_21", true)
.Case("sm_30", true)
.Case("sm_35", true)
.Default(false);

return Valid;
GPU = llvm::StringSwitch<GPUKind>(Name)
.Case("sm_20", GK_SM20)
.Case("sm_21", GK_SM21)
.Case("sm_30", GK_SM30)
.Case("sm_35", GK_SM35)
.Default(GK_NONE);

return GPU != GK_NONE;
}
};

Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Frontend/CompilerInvocation.cpp
Expand Up @@ -1349,6 +1349,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
if (Args.hasArg(OPT_fno_operator_names))
Opts.CXXOperatorNames = 0;

if (Args.hasArg(OPT_fcuda_is_device))
Opts.CUDAIsDevice = 1;

if (Opts.ObjC1) {
if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
StringRef value = arg->getValue();
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/Frontend/InitPreprocessor.cpp
Expand Up @@ -870,6 +870,13 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
Builder.defineMacro("_OPENMP", "201307");
}

// CUDA device path compilaton
if (LangOpts.CUDAIsDevice) {
// The CUDA_ARCH value is set for the GPU target specified in the NVPTX
// backend's target defines.
Builder.defineMacro("__CUDA_ARCH__");
}

// Get other target #defines.
TI.getTargetDefines(LangOpts, Builder);
}
Expand Down
19 changes: 14 additions & 5 deletions clang/lib/Sema/SemaCUDA.cpp
Expand Up @@ -14,6 +14,7 @@
#include "clang/Sema/Sema.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/SemaDiagnostic.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
Expand Down Expand Up @@ -72,21 +73,29 @@ bool Sema::CheckCUDATarget(CUDAFunctionTarget CallerTarget,
if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
return true;

// CUDA B.1.1 "The __device__ qualifier declares a function that is...
// CUDA B.1.1 "The __device__ qualifier declares a function that is [...]
// Callable from the device only."
if (CallerTarget == CFT_Host && CalleeTarget == CFT_Device)
return true;

// CUDA B.1.2 "The __global__ qualifier declares a function that is...
// CUDA B.1.2 "The __global__ qualifier declares a function that is [...]
// Callable from the host only."
// CUDA B.1.3 "The __host__ qualifier declares a function that is...
// CUDA B.1.3 "The __host__ qualifier declares a function that is [...]
// Callable from the host only."
if ((CallerTarget == CFT_Device || CallerTarget == CFT_Global) &&
(CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))
return true;

if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice)
return true;
// CUDA B.1.3 "The __device__ and __host__ qualifiers can be used together
// however, in which case the function is compiled for both the host and the
// device. The __CUDA_ARCH__ macro [...] can be used to differentiate code
// paths between host and device."
bool InDeviceMode = getLangOpts().CUDAIsDevice;
if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice) {
if ((InDeviceMode && CalleeTarget != CFT_Device) ||
(!InDeviceMode && CalleeTarget != CFT_Host))
return true;
}

return false;
}
Expand Down
35 changes: 31 additions & 4 deletions clang/test/SemaCUDA/function-target.cu
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -fsyntax-only -verify %s
// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s

#include "Inputs/cuda.h"

Expand Down Expand Up @@ -31,14 +32,40 @@ __device__ void d1(void) {
d1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'd1g' in __device__ function}}
}

__host__ void hd1h(void); // expected-note {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
__device__ void hd1d(void); // expected-note {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
// Expected 0-1 as in one of host/device side compilation it is an error, while
// not in the other
__host__ void hd1h(void); // expected-note 0-1 {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
__device__ void hd1d(void); // expected-note 0-1 {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
__host__ void hd1hg(void);
__device__ void hd1dg(void);
#ifdef __CUDA_ARCH__
__host__ void hd1hig(void); // expected-note {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
#else
__device__ void hd1dig(void); // expected-note {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
#endif
__host__ __device__ void hd1hd(void);
__global__ void hd1g(void); // expected-note {{'hd1g' declared here}}

__host__ __device__ void hd1(void) {
hd1h(); // expected-error {{no matching function}}
hd1d(); // expected-error {{no matching function}}
// Expected 0-1 as in one of host/device side compilation it is an error,
// while not in the other
hd1d(); // expected-error 0-1 {{no matching function}}
hd1h(); // expected-error 0-1 {{no matching function}}

// No errors as guarded
#ifdef __CUDA_ARCH__
hd1d();
#else
hd1h();
#endif

// Errors as incorrectly guarded
#ifndef __CUDA_ARCH__
hd1dig(); // expected-error {{no matching function}}
#else
hd1hig(); // expected-error {{no matching function}}
#endif

hd1hd();
hd1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'hd1g' in __host__ __device__ function}}
}

0 comments on commit bbc0178

Please sign in to comment.