Skip to content

Commit

Permalink
[HIP][Clang][Sema] Add Sema support for hipstdpar
Browse files Browse the repository at this point in the history
This patch adds the Sema changes needed for enabling HIP parallel algorithm offload on AMDGPU targets. This change impacts the CUDA / HIP language specific checks, and only manifests if compiling in `hipstdpar` mode. In this case, we essentially do three things:

1. Allow device side callers to call host side callees - since the user visible HLL would be standard C++, with no annotations / restriction mechanisms, we cannot unambiguously establish that such a call is an error, so we conservatively allow all such calls, deferring actual cleanup to a subsequent pass over IR;
2. Allow host formed lambdas to capture by reference;
3. Allow device functions to use host global variables.

Reviewed by: yaxunl

Differential Revision: https://reviews.llvm.org/D155833
  • Loading branch information
AlexVlx committed Oct 3, 2023
1 parent 5ec9faf commit 4d680f5
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 3 deletions.
11 changes: 10 additions & 1 deletion clang/lib/Sema/SemaCUDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,15 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
(CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
return CFP_Native;

// HipStdPar mode is special, in that assessing whether a device side call to
// a host target is deferred to a subsequent pass, and cannot unambiguously be
// adjudicated in the AST, hence we optimistically allow them to pass here.
if (getLangOpts().HIPStdPar &&
(CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
CallerTarget == CFT_HostDevice) &&
CalleeTarget == CFT_Host)
return CFP_HostDevice;

// (d) HostDevice behavior depends on compilation mode.
if (CallerTarget == CFT_HostDevice) {
// It's OK to call a compilation-mode matching function from an HD one.
Expand Down Expand Up @@ -895,7 +904,7 @@ void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee,
if (!ShouldCheck || !Capture.isReferenceCapture())
return;
auto DiagKind = SemaDiagnosticBuilder::K_Deferred;
if (Capture.isVariableCapture()) {
if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) {
SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
diag::err_capture_bad_target, Callee, *this)
<< Capture.getVariable();
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Sema/SemaExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19157,7 +19157,7 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef,
// Diagnose ODR-use of host global variables in device functions.
// Reference of device global variables in host functions is allowed
// through shadow variables therefore it is not diagnosed.
if (SemaRef.LangOpts.CUDAIsDevice) {
if (SemaRef.LangOpts.CUDAIsDevice && !SemaRef.LangOpts.HIPStdPar) {
SemaRef.targetDiag(Loc, diag::err_ref_bad_target)
<< /*host*/ 2 << /*variable*/ 1 << Var << UserTarget;
SemaRef.targetDiag(Var->getLocation(),
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Sema/SemaStmtAsm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,8 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
OutputName = Names[i]->getName();

TargetInfo::ConstraintInfo Info(Literal->getString(), OutputName);
if (!Context.getTargetInfo().validateOutputConstraint(Info)) {
if (!Context.getTargetInfo().validateOutputConstraint(Info) &&
!(LangOpts.HIPStdPar && LangOpts.CUDAIsDevice)) {
targetDiag(Literal->getBeginLoc(),
diag::err_asm_invalid_output_constraint)
<< Info.getConstraintStr();
Expand Down
93 changes: 93 additions & 0 deletions clang/test/SemaHipStdPar/device-can-call-host.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// RUN: %clang_cc1 -x hip %s --hipstdpar -triple amdgcn-amd-amdhsa --std=c++17 \
// RUN: -fcuda-is-device -emit-llvm -o /dev/null -verify

// Note: These would happen implicitly, within the implementation of the
// accelerator specific algorithm library, and not from user code.

// Calls from the accelerator side to implicitly host (i.e. unannotated)
// functions are fine.

// expected-no-diagnostics

#define __device__ __attribute__((device))
#define __global__ __attribute__((global))

extern "C" void host_fn() {}

struct Dummy {};

struct S {
S() {}
~S() { host_fn(); }

int x;
};

struct T {
__device__ void hd() { host_fn(); }

__device__ void hd3();

void h() {}

void operator+();
void operator-(const T&) {}

operator Dummy() { return Dummy(); }
};

__device__ void T::hd3() { host_fn(); }

template <typename T> __device__ void hd2() { host_fn(); }

__global__ void kernel() { hd2<int>(); }

__device__ void hd() { host_fn(); }

template <typename T> __device__ void hd3() { host_fn(); }
__device__ void device_fn() { hd3<int>(); }

__device__ void local_var() {
S s;
}

__device__ void explicit_destructor(S *s) {
s->~S();
}

__device__ void hd_member_fn() {
T t;

t.hd();
}

__device__ void h_member_fn() {
T t;
t.h();
}

__device__ void unaryOp() {
T t;
(void) +t;
}

__device__ void binaryOp() {
T t;
(void) (t - t);
}

__device__ void implicitConversion() {
T t;
Dummy d = t;
}

template <typename T>
struct TmplStruct {
template <typename U> __device__ void fn() {}
};

template <>
template <>
__device__ void TmplStruct<int>::fn<int>() { host_fn(); }

__device__ void double_specialization() { TmplStruct<int>().fn<int>(); }

0 comments on commit 4d680f5

Please sign in to comment.