188 changes: 187 additions & 1 deletion clang/lib/CodeGen/CGOpenMPRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,9 @@ llvm::Value *CGOpenMPRuntime::EmitOpenMPUpdateLocation(
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
if (I != OpenMPLocThreadIDMap.end())
LocValue = I->second.DebugLoc;
else {
// OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
// GetOpenMPThreadID was called before this routine.
if (LocValue == nullptr) {
// Generate "ident_t .kmpc_loc.addr;"
llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
Expand Down Expand Up @@ -332,6 +334,95 @@ CGOpenMPRuntime::CreateRuntimeFunction(OpenMPRTLFunction Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
break;
}
// Build __kmpc_for_static_init*(
// ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
// kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
// kmp_int[32|64] incr, kmp_int[32|64] chunk);
case OMPRTL__kmpc_for_static_init_4: {
auto ITy = CGM.Int32Ty;
auto PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
CGM.Int32Ty, // schedtype
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
PtrTy, // p_lower
PtrTy, // p_upper
PtrTy, // p_stride
ITy, // incr
ITy // chunk
};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4");
break;
}
case OMPRTL__kmpc_for_static_init_4u: {
auto ITy = CGM.Int32Ty;
auto PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
CGM.Int32Ty, // schedtype
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
PtrTy, // p_lower
PtrTy, // p_upper
PtrTy, // p_stride
ITy, // incr
ITy // chunk
};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4u");
break;
}
case OMPRTL__kmpc_for_static_init_8: {
auto ITy = CGM.Int64Ty;
auto PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
CGM.Int32Ty, // schedtype
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
PtrTy, // p_lower
PtrTy, // p_upper
PtrTy, // p_stride
ITy, // incr
ITy // chunk
};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8");
break;
}
case OMPRTL__kmpc_for_static_init_8u: {
auto ITy = CGM.Int64Ty;
auto PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
CGM.Int32Ty, // schedtype
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
PtrTy, // p_lower
PtrTy, // p_upper
PtrTy, // p_stride
ITy, // incr
ITy // chunk
};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8u");
break;
}
case OMPRTL__kmpc_for_static_fini: {
// Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
break;
}
case OMPRTL__kmpc_push_num_threads: {
// Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 num_threads)
Expand Down Expand Up @@ -713,6 +804,101 @@ void CGOpenMPRuntime::EmitOMPBarrierCall(CodeGenFunction &CGF,
CGF.EmitRuntimeCall(RTLFn, Args);
}

/// \brief Schedule types for 'omp for' loops (these enumerators are taken from
/// the enum sched_type in kmp.h).
enum OpenMPSchedType {
/// \brief Lower bound for default (unordered) versions.
OMP_sch_lower = 32,
OMP_sch_static_chunked = 33,
OMP_sch_static = 34,
OMP_sch_dynamic_chunked = 35,
OMP_sch_guided_chunked = 36,
OMP_sch_runtime = 37,
OMP_sch_auto = 38,
/// \brief Lower bound for 'ordered' versions.
OMP_ord_lower = 64,
/// \brief Lower bound for 'nomerge' versions.
OMP_nm_lower = 160,
};

/// \brief Map the OpenMP loop schedule to the runtime enumeration.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
bool Chunked) {
switch (ScheduleKind) {
case OMPC_SCHEDULE_static:
return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
case OMPC_SCHEDULE_dynamic:
return OMP_sch_dynamic_chunked;
case OMPC_SCHEDULE_guided:
return OMP_sch_guided_chunked;
case OMPC_SCHEDULE_auto:
return OMP_sch_auto;
case OMPC_SCHEDULE_runtime:
return OMP_sch_runtime;
case OMPC_SCHEDULE_unknown:
assert(!Chunked && "chunk was specified but schedule kind not known");
return OMP_sch_static;
}
llvm_unreachable("Unexpected runtime schedule");
}

bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
bool Chunked) const {
auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
return Schedule == OMP_sch_static;
}

void CGOpenMPRuntime::EmitOMPForInit(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPScheduleClauseKind ScheduleKind,
unsigned IVSize, bool IVSigned,
llvm::Value *IL, llvm::Value *LB,
llvm::Value *UB, llvm::Value *ST,
llvm::Value *Chunk) {
OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
// Call __kmpc_for_static_init(
// ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
// kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
// kmp_int[32|64] incr, kmp_int[32|64] chunk);
// TODO: Implement dynamic schedule.

// If the Chunk was not specified in the clause - use default value 1.
if (Chunk == nullptr)
Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1);

llvm::Value *Args[] = {
EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
GetOpenMPThreadID(CGF, Loc),
CGF.Builder.getInt32(Schedule), // Schedule type
IL, // &isLastIter
LB, // &LB
UB, // &UB
ST, // &Stride
CGF.Builder.getIntN(IVSize, 1), // Incr
Chunk // Chunk
};
assert((IVSize == 32 || IVSize == 64) &&
"Index size is not compatible with the omp runtime");
auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4
: OMPRTL__kmpc_for_static_init_4u)
: (IVSigned ? OMPRTL__kmpc_for_static_init_8
: OMPRTL__kmpc_for_static_init_8u);
auto RTLFn = CreateRuntimeFunction(F);
CGF.EmitRuntimeCall(RTLFn, Args);
}

void CGOpenMPRuntime::EmitOMPForFinish(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPScheduleClauseKind ScheduleKind) {
assert((ScheduleKind == OMPC_SCHEDULE_static ||
ScheduleKind == OMPC_SCHEDULE_unknown) &&
"Non-static schedule kinds are not yet implemented");
// Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
GetOpenMPThreadID(CGF, Loc)};
auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_for_static_fini);
CGF.EmitRuntimeCall(RTLFn, Args);
}

void CGOpenMPRuntime::EmitOMPNumThreadsClause(CodeGenFunction &CGF,
llvm::Value *NumThreads,
SourceLocation Loc) {
Expand Down
54 changes: 54 additions & 0 deletions clang/lib/CodeGen/CGOpenMPRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H

#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/OpenMPKinds.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringMap.h"
Expand Down Expand Up @@ -66,6 +67,12 @@ class CGOpenMPRuntime {
// Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
// global_tid);
OMPRTL__kmpc_cancel_barrier,
// Calls for static scheduling 'omp for' loops.
OMPRTL__kmpc_for_static_init_4,
OMPRTL__kmpc_for_static_init_4u,
OMPRTL__kmpc_for_static_init_8,
OMPRTL__kmpc_for_static_init_8u,
OMPRTL__kmpc_for_static_fini,
// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
OMPRTL__kmpc_serialized_parallel,
Expand Down Expand Up @@ -305,6 +312,53 @@ class CGOpenMPRuntime {
virtual void EmitOMPBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
bool IsExplicit = true);

/// \brief Check if the specified \a ScheduleKind is static non-chunked.
/// This kind of worksharing directive is emitted without outer loop.
/// \param ScheduleKind Schedule kind specified in the 'schedule' clause.
/// \param Chunked True if chunk is specified in the clause.
///
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
bool Chunked) const;

/// \brief Call the appropriate runtime routine to initialize it before start
/// of loop.
///
/// Depending on the loop schedule, it is nesessary to call some runtime
/// routine before start of the OpenMP loop to get the loop upper / lower
/// bounds \a LB and \a UB and stride \a ST.
///
/// \param CGF Reference to current CodeGenFunction.
/// \param Loc Clang source location.
/// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
/// \param IVSize Size of the iteration variable in bits.
/// \param IVSigned Sign of the interation variable.
/// \param IL Address of the output variable in which the flag of the
/// last iteration is returned.
/// \param LB Address of the output variable in which the lower iteration
/// number is returned.
/// \param UB Address of the output variable in which the upper iteration
/// number is returned.
/// \param ST Address of the output variable in which the stride value is
/// returned nesessary to generated the static_chunked scheduled loop.
/// \param Chunk Value of the chunk for the static_chunked scheduled loop.
/// For the default (nullptr) value, the chunk 1 will be used.
///
virtual void EmitOMPForInit(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPScheduleClauseKind SchedKind,
unsigned IVSize, bool IVSigned, llvm::Value *IL,
llvm::Value *LB, llvm::Value *UB, llvm::Value *ST,
llvm::Value *Chunk = nullptr);

/// \brief Call the appropriate runtime routine to notify that we finished
/// all the work with current loop.
///
/// \param CGF Reference to current CodeGenFunction.
/// \param Loc Clang source location.
/// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
///
virtual void EmitOMPForFinish(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPScheduleClauseKind ScheduleKind);

/// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
/// clause.
Expand Down
106 changes: 104 additions & 2 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -470,8 +470,110 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
}

void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &) {
llvm_unreachable("CodeGen for 'omp for' is not supported yet.");
/// \brief Emit a helper variable and return corresponding lvalue.
static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
const DeclRefExpr *Helper) {
auto VDecl = cast<VarDecl>(Helper->getDecl());
CGF.EmitVarDecl(*VDecl);
return CGF.EmitLValue(Helper);
}

void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
// Emit the loop iteration variable.
auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
EmitVarDecl(*IVDecl);

// Emit the iterations count variable.
// If it is not a variable, Sema decided to calculate iterations count on each
// iteration (e.g., it is foldable into a constant).
if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
// Emit calculation of the iterations count.
EmitIgnoredExpr(S.getCalcLastIteration());
}

auto &RT = CGM.getOpenMPRuntime();

// Check pre-condition.
{
// Skip the entire loop if we don't meet the precondition.
RegionCounter Cnt = getPGORegionCounter(&S);
auto ThenBlock = createBasicBlock("omp.precond.then");
auto ContBlock = createBasicBlock("omp.precond.end");
EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
EmitBlock(ThenBlock);
Cnt.beginRegion(Builder);
// Emit 'then' code.
{
// Emit helper vars inits.
LValue LB =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
LValue UB =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
LValue ST =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
LValue IL =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));

OMPPrivateScope LoopScope(*this);
EmitPrivateLoopCounters(*this, LoopScope, S.counters());

// Detect the loop schedule kind and chunk.
auto ScheduleKind = OMPC_SCHEDULE_unknown;
llvm::Value *Chunk = nullptr;
if (auto C = cast_or_null<OMPScheduleClause>(
S.getSingleClause(OMPC_schedule))) {
ScheduleKind = C->getScheduleKind();
if (auto Ch = C->getChunkSize()) {
Chunk = EmitScalarExpr(Ch);
Chunk = EmitScalarConversion(Chunk, Ch->getType(),
S.getIterationVariable()->getType());
}
}
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
if (RT.isStaticNonchunked(ScheduleKind,
/* Chunked */ Chunk != nullptr)) {
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
// When no chunk_size is specified, the iteration space is divided into
// chunks that are approximately equal in size, and at most one chunk is
// distributed to each thread. Note that the size of the chunks is
// unspecified in this case.
RT.EmitOMPForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
IL.getAddress(), LB.getAddress(), UB.getAddress(),
ST.getAddress());
// UB = min(UB, GlobalUB);
EmitIgnoredExpr(S.getEnsureUpperBound());
// IV = LB;
EmitIgnoredExpr(S.getInit());
// while (idx <= UB) { BODY; ++idx; }
EmitOMPInnerLoop(S, LoopScope);
// Tell the runtime we are done.
RT.EmitOMPForFinish(*this, S.getLocStart(), ScheduleKind);
} else
ErrorUnsupported(&S, "OpenMP loop with requested schedule");
}
// We're now done with the loop, so jump to the continuation block.
EmitBranch(ContBlock);
EmitBlock(ContBlock, true);
}
}

void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
RunCleanupsScope DirectiveScope(*this);

CGDebugInfo *DI = getDebugInfo();
if (DI)
DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());

EmitOMPWorksharingLoop(S);

// Emit an implicit barrier at the end.
CGM.getOpenMPRuntime().EmitOMPBarrierCall(*this, S.getLocStart(),
/*IsExplicit*/ false);
if (DI)
DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
}

void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
Expand Down
7 changes: 6 additions & 1 deletion clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -2043,12 +2043,17 @@ class CodeGenFunction : public CodeGenTypeCache {
void EmitOMPTargetDirective(const OMPTargetDirective &S);
void EmitOMPTeamsDirective(const OMPTeamsDirective &S);

/// Helpers for 'omp simd' directive.
private:

/// Helpers for the OpenMP loop directives.
void EmitOMPLoopBody(const OMPLoopDirective &Directive,
bool SeparateIter = false);
void EmitOMPInnerLoop(const OMPLoopDirective &S, OMPPrivateScope &LoopScope,
bool SeparateIter = false);
void EmitOMPSimdFinal(const OMPLoopDirective &S);
void EmitOMPWorksharingLoop(const OMPLoopDirective &S);

public:

//===--------------------------------------------------------------------===//
// LValue Expression Emission
Expand Down
189 changes: 113 additions & 76 deletions clang/lib/Sema/SemaOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2357,46 +2357,6 @@ struct LoopIterationSpace {
SourceRange IncSrcRange;
};

/// \brief The resulting expressions built for the OpenMP loop CodeGen for the
/// whole collapsed loop nest. See class OMPLoopDirective for their description.
struct BuiltLoopExprs {
Expr *IterationVarRef;
Expr *LastIteration;
Expr *CalcLastIteration;
Expr *PreCond;
Expr *Cond;
Expr *SeparatedCond;
Expr *Init;
Expr *Inc;
SmallVector<Expr *, 4> Counters;
SmallVector<Expr *, 4> Updates;
SmallVector<Expr *, 4> Finals;

bool builtAll() {
return IterationVarRef != nullptr && LastIteration != nullptr &&
PreCond != nullptr && Cond != nullptr && SeparatedCond != nullptr &&
Init != nullptr && Inc != nullptr;
}
void clear(unsigned size) {
IterationVarRef = nullptr;
LastIteration = nullptr;
CalcLastIteration = nullptr;
PreCond = nullptr;
Cond = nullptr;
SeparatedCond = nullptr;
Init = nullptr;
Inc = nullptr;
Counters.resize(size);
Updates.resize(size);
Finals.resize(size);
for (unsigned i = 0; i < size; ++i) {
Counters[i] = nullptr;
Updates[i] = nullptr;
Finals[i] = nullptr;
}
}
};

} // namespace

/// \brief Called on a for stmt to check and extract its iteration space
Expand Down Expand Up @@ -2613,7 +2573,7 @@ static unsigned
CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
Stmt *AStmt, Sema &SemaRef, DSAStackTy &DSA,
llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA,
BuiltLoopExprs &Built) {
OMPLoopDirective::HelperExprs &Built) {
unsigned NestedLoopCount = 1;
if (NestedLoopCountExpr) {
// Found 'collapse' clause - calculate collapse number.
Expand Down Expand Up @@ -2750,23 +2710,71 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
CurScope, InitLoc, BO_GT, LastIteration.get(),
SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get());

// Build the iteration variable and its initialization to zero before loop.
QualType VType = LastIteration.get()->getType();
// Build variables passed into runtime, nesessary for worksharing directives.
ExprResult LB, UB, IL, ST, EUB;
if (isOpenMPWorksharingDirective(DKind)) {
// Lower bound variable, initialized with zero.
VarDecl *LBDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.lb");
LB = SemaRef.BuildDeclRefExpr(LBDecl, VType, VK_LValue, InitLoc);
SemaRef.AddInitializerToDecl(
LBDecl, SemaRef.ActOnIntegerConstant(InitLoc, 0).get(),
/*DirectInit*/ false, /*TypeMayContainAuto*/ false);

// Upper bound variable, initialized with last iteration number.
VarDecl *UBDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.ub");
UB = SemaRef.BuildDeclRefExpr(UBDecl, VType, VK_LValue, InitLoc);
SemaRef.AddInitializerToDecl(UBDecl, LastIteration.get(),
/*DirectInit*/ false,
/*TypeMayContainAuto*/ false);

// A 32-bit variable-flag where runtime returns 1 for the last iteration.
// This will be used to implement clause 'lastprivate'.
QualType Int32Ty = SemaRef.Context.getIntTypeForBitwidth(32, true);
VarDecl *ILDecl = BuildVarDecl(SemaRef, InitLoc, Int32Ty, ".omp.is_last");
IL = SemaRef.BuildDeclRefExpr(ILDecl, Int32Ty, VK_LValue, InitLoc);
SemaRef.AddInitializerToDecl(
ILDecl, SemaRef.ActOnIntegerConstant(InitLoc, 0).get(),
/*DirectInit*/ false, /*TypeMayContainAuto*/ false);

// Stride variable returned by runtime (we initialize it to 1 by default).
VarDecl *STDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.stride");
ST = SemaRef.BuildDeclRefExpr(STDecl, VType, VK_LValue, InitLoc);
SemaRef.AddInitializerToDecl(
STDecl, SemaRef.ActOnIntegerConstant(InitLoc, 1).get(),
/*DirectInit*/ false, /*TypeMayContainAuto*/ false);

// Build expression: UB = min(UB, LastIteration)
// It is nesessary for CodeGen of directives with static scheduling.
ExprResult IsUBGreater = SemaRef.BuildBinOp(CurScope, InitLoc, BO_GT,
UB.get(), LastIteration.get());
ExprResult CondOp = SemaRef.ActOnConditionalOp(
InitLoc, InitLoc, IsUBGreater.get(), LastIteration.get(), UB.get());
EUB = SemaRef.BuildBinOp(CurScope, InitLoc, BO_Assign, UB.get(),
CondOp.get());
EUB = SemaRef.ActOnFinishFullExpr(EUB.get());
}

// Build the iteration variable and its initialization before loop.
ExprResult IV;
ExprResult Init;
{
VarDecl *IVDecl = BuildVarDecl(SemaRef, InitLoc,
LastIteration.get()->getType(), ".omp.iv");
IV = SemaRef.BuildDeclRefExpr(IVDecl, LastIteration.get()->getType(),
VK_LValue, InitLoc);
Init = SemaRef.BuildBinOp(
CurScope, InitLoc, BO_Assign, IV.get(),
SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get());
VarDecl *IVDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.iv");
IV = SemaRef.BuildDeclRefExpr(IVDecl, VType, VK_LValue, InitLoc);
Expr *RHS = isOpenMPWorksharingDirective(DKind)
? LB.get()
: SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get();
Init = SemaRef.BuildBinOp(CurScope, InitLoc, BO_Assign, IV.get(), RHS);
Init = SemaRef.ActOnFinishFullExpr(Init.get());
}

// Loop condition (IV < NumIterations)
// Loop condition (IV < NumIterations) or (IV <= UB) for worksharing loops.
SourceLocation CondLoc;
ExprResult Cond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(),
NumIterations.get());
ExprResult Cond =
isOpenMPWorksharingDirective(DKind)
? SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get())
: SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(),
NumIterations.get());
// Loop condition with 1 iteration separated (IV < LastIteration)
ExprResult SeparatedCond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT,
IV.get(), LastIteration.get());
Expand All @@ -2779,6 +2787,35 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
if (!Inc.isUsable())
return 0;
Inc = SemaRef.BuildBinOp(CurScope, IncLoc, BO_Assign, IV.get(), Inc.get());
Inc = SemaRef.ActOnFinishFullExpr(Inc.get());
if (!Inc.isUsable())
return 0;

// Increments for worksharing loops (LB = LB + ST; UB = UB + ST).
// Used for directives with static scheduling.
ExprResult NextLB, NextUB;
if (isOpenMPWorksharingDirective(DKind)) {
// LB + ST
NextLB = SemaRef.BuildBinOp(CurScope, IncLoc, BO_Add, LB.get(), ST.get());
if (!NextLB.isUsable())
return 0;
// LB = LB + ST
NextLB =
SemaRef.BuildBinOp(CurScope, IncLoc, BO_Assign, LB.get(), NextLB.get());
NextLB = SemaRef.ActOnFinishFullExpr(NextLB.get());
if (!NextLB.isUsable())
return 0;
// UB + ST
NextUB = SemaRef.BuildBinOp(CurScope, IncLoc, BO_Add, UB.get(), ST.get());
if (!NextUB.isUsable())
return 0;
// UB = UB + ST
NextUB =
SemaRef.BuildBinOp(CurScope, IncLoc, BO_Assign, UB.get(), NextUB.get());
NextUB = SemaRef.ActOnFinishFullExpr(NextUB.get());
if (!NextUB.isUsable())
return 0;
}

// Build updates and final values of the loop counters.
bool HasErrors = false;
Expand Down Expand Up @@ -2868,6 +2905,13 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
Built.SeparatedCond = SeparatedCond.get();
Built.Init = Init.get();
Built.Inc = Inc.get();
Built.LB = LB.get();
Built.UB = UB.get();
Built.IL = IL.get();
Built.ST = ST.get();
Built.EUB = EUB.get();
Built.NLB = NextLB.get();
Built.NUB = NextUB.get();

return NestedLoopCount;
}
Expand All @@ -2887,7 +2931,7 @@ StmtResult Sema::ActOnOpenMPSimdDirective(
ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
SourceLocation EndLoc,
llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
BuiltLoopExprs B;
OMPLoopDirective::HelperExprs B;
// In presence of clause 'collapse', it will define the nested loops number.
unsigned NestedLoopCount =
CheckOpenMPLoop(OMPD_simd, GetCollapseNumberExpr(Clauses), AStmt, *this,
Expand All @@ -2899,17 +2943,15 @@ StmtResult Sema::ActOnOpenMPSimdDirective(
"omp simd loop exprs were not built");

getCurFunction()->setHasBranchProtectedScope();
return OMPSimdDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt,
B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond,
B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals);
return OMPSimdDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
Clauses, AStmt, B);
}

StmtResult Sema::ActOnOpenMPForDirective(
ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
SourceLocation EndLoc,
llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
BuiltLoopExprs B;
OMPLoopDirective::HelperExprs B;
// In presence of clause 'collapse', it will define the nested loops number.
unsigned NestedLoopCount =
CheckOpenMPLoop(OMPD_for, GetCollapseNumberExpr(Clauses), AStmt, *this,
Expand All @@ -2921,29 +2963,28 @@ StmtResult Sema::ActOnOpenMPForDirective(
"omp for loop exprs were not built");

getCurFunction()->setHasBranchProtectedScope();
return OMPForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt,
B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond,
B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals);
return OMPForDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
Clauses, AStmt, B);
}

StmtResult Sema::ActOnOpenMPForSimdDirective(
ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
SourceLocation EndLoc,
llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
BuiltLoopExprs B;
OMPLoopDirective::HelperExprs B;
// In presence of clause 'collapse', it will define the nested loops number.
unsigned NestedLoopCount =
CheckOpenMPLoop(OMPD_for_simd, GetCollapseNumberExpr(Clauses), AStmt,
*this, *DSAStack, VarsWithImplicitDSA, B);
if (NestedLoopCount == 0)
return StmtError();

assert((CurContext->isDependentContext() || B.builtAll()) &&
"omp for simd loop exprs were not built");

getCurFunction()->setHasBranchProtectedScope();
return OMPForSimdDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt,
B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond,
B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals);
return OMPForSimdDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
Clauses, AStmt, B);
}

StmtResult Sema::ActOnOpenMPSectionsDirective(ArrayRef<OMPClause *> Clauses,
Expand Down Expand Up @@ -3036,7 +3077,7 @@ StmtResult Sema::ActOnOpenMPParallelForDirective(
// longjmp() and throw() must not violate the entry/exit criteria.
CS->getCapturedDecl()->setNothrow();

BuiltLoopExprs B;
OMPLoopDirective::HelperExprs B;
// In presence of clause 'collapse', it will define the nested loops number.
unsigned NestedLoopCount =
CheckOpenMPLoop(OMPD_parallel_for, GetCollapseNumberExpr(Clauses), AStmt,
Expand All @@ -3048,10 +3089,8 @@ StmtResult Sema::ActOnOpenMPParallelForDirective(
"omp parallel for loop exprs were not built");

getCurFunction()->setHasBranchProtectedScope();
return OMPParallelForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt,
B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond,
B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals);
return OMPParallelForDirective::Create(Context, StartLoc, EndLoc,
NestedLoopCount, Clauses, AStmt, B);
}

StmtResult Sema::ActOnOpenMPParallelForSimdDirective(
Expand All @@ -3067,7 +3106,7 @@ StmtResult Sema::ActOnOpenMPParallelForSimdDirective(
// longjmp() and throw() must not violate the entry/exit criteria.
CS->getCapturedDecl()->setNothrow();

BuiltLoopExprs B;
OMPLoopDirective::HelperExprs B;
// In presence of clause 'collapse', it will define the nested loops number.
unsigned NestedLoopCount =
CheckOpenMPLoop(OMPD_parallel_for_simd, GetCollapseNumberExpr(Clauses),
Expand All @@ -3077,9 +3116,7 @@ StmtResult Sema::ActOnOpenMPParallelForSimdDirective(

getCurFunction()->setHasBranchProtectedScope();
return OMPParallelForSimdDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt,
B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond,
B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals);
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
}

StmtResult
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/Serialization/ASTReaderStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2002,6 +2002,15 @@ void ASTStmtReader::VisitOMPLoopDirective(OMPLoopDirective *D) {
D->setCond(Fst, Snd);
D->setInit(Reader.ReadSubExpr());
D->setInc(Reader.ReadSubExpr());
if (isOpenMPWorksharingDirective(D->getDirectiveKind())) {
D->setIsLastIterVariable(Reader.ReadSubExpr());
D->setLowerBoundVariable(Reader.ReadSubExpr());
D->setUpperBoundVariable(Reader.ReadSubExpr());
D->setStrideVariable(Reader.ReadSubExpr());
D->setEnsureUpperBound(Reader.ReadSubExpr());
D->setNextLowerBound(Reader.ReadSubExpr());
D->setNextUpperBound(Reader.ReadSubExpr());
}
SmallVector<Expr *, 4> Sub;
unsigned CollapsedNum = D->getCollapsedNumber();
Sub.reserve(CollapsedNum);
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/Serialization/ASTWriterStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1882,6 +1882,15 @@ void ASTStmtWriter::VisitOMPLoopDirective(OMPLoopDirective *D) {
Writer.AddStmt(D->getCond(/* SeparateIter */ true));
Writer.AddStmt(D->getInit());
Writer.AddStmt(D->getInc());
if (isOpenMPWorksharingDirective(D->getDirectiveKind())) {
Writer.AddStmt(D->getIsLastIterVariable());
Writer.AddStmt(D->getLowerBoundVariable());
Writer.AddStmt(D->getUpperBoundVariable());
Writer.AddStmt(D->getStrideVariable());
Writer.AddStmt(D->getEnsureUpperBound());
Writer.AddStmt(D->getNextLowerBound());
Writer.AddStmt(D->getNextUpperBound());
}
for (auto I : D->counters()) {
Writer.AddStmt(I);
}
Expand Down