70 changes: 65 additions & 5 deletions clang/lib/Sema/SemaOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1608,6 +1608,26 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
Params);
break;
}
case OMPD_target_parallel: {
Sema::CapturedParamNameType ParamsTarget[] = {
std::make_pair(StringRef(), QualType()) // __context with shared vars
};
// Start a captured region for 'target' with no implicit parameters.
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
ParamsTarget);
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
QualType KmpInt32PtrTy =
Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
Sema::CapturedParamNameType ParamsParallel[] = {
std::make_pair(".global_tid.", KmpInt32PtrTy),
std::make_pair(".bound_tid.", KmpInt32PtrTy),
std::make_pair(StringRef(), QualType()) // __context with shared vars
};
// Start a captured region for 'parallel'.
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
ParamsParallel);
break;
}
case OMPD_simd:
case OMPD_for:
case OMPD_for_simd:
Expand All @@ -1622,7 +1642,6 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
case OMPD_atomic:
case OMPD_target_data:
case OMPD_target:
case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
case OMPD_target_simd: {
Expand Down Expand Up @@ -1737,6 +1756,12 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
}
}

int Sema::getOpenMPCaptureLevels(OpenMPDirectiveKind DKind) {
SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
getOpenMPCaptureRegions(CaptureRegions, DKind);
return CaptureRegions.size();
}

static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id,
Expr *CaptureExpr, bool WithInit,
bool AsExpression) {
Expand Down Expand Up @@ -1796,10 +1821,42 @@ static ExprResult buildCapture(Sema &S, Expr *CaptureExpr, DeclRefExpr *&Ref) {
return CaptureExpr->isGLValue() ? Res : S.DefaultLvalueConversion(Res.get());
}

namespace {
// OpenMP directives parsed in this section are represented as a
// CapturedStatement with an associated statement. If a syntax error
// is detected during the parsing of the associated statement, the
// compiler must abort processing and close the CapturedStatement.
//
// Combined directives such as 'target parallel' have more than one
// nested CapturedStatements. This RAII ensures that we unwind out
// of all the nested CapturedStatements when an error is found.
class CaptureRegionUnwinderRAII {
private:
Sema &S;
bool &ErrorFound;
OpenMPDirectiveKind DKind;

public:
CaptureRegionUnwinderRAII(Sema &S, bool &ErrorFound,
OpenMPDirectiveKind DKind)
: S(S), ErrorFound(ErrorFound), DKind(DKind) {}
~CaptureRegionUnwinderRAII() {
if (ErrorFound) {
int ThisCaptureLevel = S.getOpenMPCaptureLevels(DKind);
while (--ThisCaptureLevel >= 0)
S.ActOnCapturedRegionError();
}
}
};
} // namespace

StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
ArrayRef<OMPClause *> Clauses) {
bool ErrorFound = false;
CaptureRegionUnwinderRAII CaptureRegionUnwinder(
*this, ErrorFound, DSAStack->getCurrentDirective());
if (!S.isUsable()) {
ActOnCapturedRegionError();
ErrorFound = true;
return StmtError();
}

Expand Down Expand Up @@ -1843,7 +1900,6 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
else if (Clause->getClauseKind() == OMPC_linear)
LCs.push_back(cast<OMPLinearClause>(Clause));
}
bool ErrorFound = false;
// OpenMP, 2.7.1 Loop Construct, Restrictions
// The nonmonotonic modifier cannot be specified if an ordered clause is
// specified.
Expand Down Expand Up @@ -1874,10 +1930,14 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
ErrorFound = true;
}
if (ErrorFound) {
ActOnCapturedRegionError();
return StmtError();
}
return ActOnCapturedRegionEnd(S.get());
StmtResult SR = S;
int ThisCaptureLevel =
getOpenMPCaptureLevels(DSAStack->getCurrentDirective());
while (--ThisCaptureLevel >= 0)
SR = ActOnCapturedRegionEnd(SR.get());
return SR;
}

static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
Expand Down
8 changes: 6 additions & 2 deletions clang/lib/Sema/TreeTransform.h
Original file line number Diff line number Diff line change
Expand Up @@ -7238,8 +7238,12 @@ StmtResult TreeTransform<Derived>::TransformOMPExecutableDirective(
StmtResult Body;
{
Sema::CompoundScopeRAII CompoundScope(getSema());
Body = getDerived().TransformStmt(
cast<CapturedStmt>(D->getAssociatedStmt())->getCapturedStmt());
int ThisCaptureLevel =
Sema::getOpenMPCaptureLevels(D->getDirectiveKind());
Stmt *CS = D->getAssociatedStmt();
while (--ThisCaptureLevel >= 0)
CS = cast<CapturedStmt>(CS)->getCapturedStmt();
Body = getDerived().TransformStmt(CS);
}
AssociatedStmt =
getDerived().getSema().ActOnOpenMPRegionEnd(Body, TClauses);
Expand Down
802 changes: 802 additions & 0 deletions clang/test/OpenMP/target_parallel_codegen.cpp

Large diffs are not rendered by default.

437 changes: 437 additions & 0 deletions clang/test/OpenMP/target_parallel_codegen_registration.cpp

Large diffs are not rendered by default.

66 changes: 66 additions & 0 deletions clang/test/OpenMP/target_parallel_codegen_registration_naming.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Test host codegen.
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s

// Test target parallel codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK

// expected-no-diagnostics
#ifndef HEADER
#define HEADER

// CHECK: [[CA:%.+]] = type { i32* }

// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}})
int nested(int a){
// CHECK: call void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME]]_l[[T1L:[0-9]+]](
#pragma omp target parallel
++a;

// CHECK: call void @"[[LNAME:.+]]"([[CA]]*
auto F = [&](){
#pragma omp parallel
{
#pragma omp target parallel
++a;
}
};

F();

return a;
}

// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T1L]](
// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME:.+]]_l[[T1L:[0-9]+]](

// CHECK: define {{.*}}void @"[[LNAME]]"(
// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to

// CHECK: define {{.*}}void [[PNAME]](
// CHECK: call void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L:[0-9]+]](

// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L]](
// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME:.+]]_l[[T2L:[0-9]+]](


// Check metadata is properly generated:
// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}

// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
#endif