diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py index b71f9ed2275e0..a90d48cf6d481 100644 --- a/clang/bindings/python/clang/cindex.py +++ b/clang/bindings/python/clang/cindex.py @@ -1453,6 +1453,9 @@ def is_unexposed(self): # OpenMP fuse directive. OMP_FUSE_DIRECTIVE = 311 + # OpenMP split directive. + OMP_SPLIT_DIRECTIVE = 312 + # OpenACC Compute Construct. OPEN_ACC_COMPUTE_DIRECTIVE = 320 diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index dcf1f4f1b4258..119bd68ff9814 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2166,6 +2166,10 @@ enum CXCursorKind { */ CXCursor_OMPFuseDirective = 311, + /** OpenMP split directive. + */ + CXCursor_OMPSplitDirective = 312, + /** OpenACC Compute Construct. */ CXCursor_OpenACCComputeConstruct = 320, diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index af5d3f4698eda..ccf2c40bc5efa 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -39,6 +39,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/TrailingObjects.h" #include +#include #include #include #include @@ -1023,6 +1024,106 @@ class OMPSizesClause final } }; +/// This represents the 'counts' clause in the '#pragma omp split' directive. +/// +/// \code +/// #pragma omp split counts(3, omp_fill, 2) +/// for (int i = 0; i < n; ++i) { ... } +/// \endcode +class OMPCountsClause final + : public OMPClause, + private llvm::TrailingObjects { + friend class OMPClauseReader; + friend class llvm::TrailingObjects; + + /// Location of '('. + SourceLocation LParenLoc; + + /// Number of count expressions in the clause. + unsigned NumCounts = 0; + + /// 0-based index of the omp_fill list item. + std::optional OmpFillIndex; + + /// Source location of the omp_fill keyword. + SourceLocation OmpFillLoc; + + /// Build an empty clause. + explicit OMPCountsClause(int NumCounts) + : OMPClause(llvm::omp::OMPC_counts, SourceLocation(), SourceLocation()), + NumCounts(NumCounts) {} + + /// Sets the location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + void setOmpFillIndex(std::optional Idx) { OmpFillIndex = Idx; } + void setOmpFillLoc(SourceLocation Loc) { OmpFillLoc = Loc; } + + /// Sets the count expressions. + void setCountsRefs(ArrayRef VL) { + assert(VL.size() == NumCounts); + llvm::copy(VL, getCountsRefs().begin()); + } + +public: + /// Build a 'counts' AST node. + /// + /// \param C Context of the AST. + /// \param StartLoc Location of the 'counts' identifier. + /// \param LParenLoc Location of '('. + /// \param EndLoc Location of ')'. + /// \param Counts Content of the clause. + static OMPCountsClause *Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc, ArrayRef Counts, + std::optional FillIdx, + SourceLocation FillLoc); + + /// Build an empty 'counts' AST node for deserialization. + /// + /// \param C Context of the AST. + /// \param NumCounts Number of items in the clause. + static OMPCountsClause *CreateEmpty(const ASTContext &C, unsigned NumCounts); + + /// Returns the location of '('. + SourceLocation getLParenLoc() const { return LParenLoc; } + + /// Returns the number of list items. + unsigned getNumCounts() const { return NumCounts; } + + std::optional getOmpFillIndex() const { return OmpFillIndex; } + SourceLocation getOmpFillLoc() const { return OmpFillLoc; } + bool hasOmpFill() const { return OmpFillIndex.has_value(); } + + /// Returns the count expressions. + MutableArrayRef getCountsRefs() { + return getTrailingObjects(NumCounts); + } + ArrayRef getCountsRefs() const { + return getTrailingObjects(NumCounts); + } + + child_range children() { + MutableArrayRef Counts = getCountsRefs(); + return child_range(reinterpret_cast(Counts.begin()), + reinterpret_cast(Counts.end())); + } + const_child_range children() const { + ArrayRef Counts = getCountsRefs(); + return const_child_range(reinterpret_cast(Counts.begin()), + reinterpret_cast(Counts.end())); + } + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_counts; + } +}; + /// This class represents the 'permutation' clause in the /// '#pragma omp interchange' directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index ce6ad723191e0..1a14dd2c666b5 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3202,6 +3202,9 @@ DEF_TRAVERSE_STMT(OMPFuseDirective, DEF_TRAVERSE_STMT(OMPInterchangeDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPSplitDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPForDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) @@ -3503,6 +3506,13 @@ bool RecursiveASTVisitor::VisitOMPSizesClause(OMPSizesClause *C) { return true; } +template +bool RecursiveASTVisitor::VisitOMPCountsClause(OMPCountsClause *C) { + for (Expr *E : C->getCountsRefs()) + TRY_TO(TraverseStmt(E)); + return true; +} + template bool RecursiveASTVisitor::VisitOMPPermutationClause( OMPPermutationClause *C) { diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index bc6aeaa8d143c..dbc76e7df8ecd 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -6065,6 +6065,84 @@ class OMPFuseDirective final } }; +/// Represents the '#pragma omp split' loop transformation directive. +/// +/// \code{.c} +/// #pragma omp split counts(3, omp_fill, 2) +/// for (int i = 0; i < n; ++i) +/// ... +/// \endcode +/// +/// This directive transforms a single loop into multiple loops based on +/// index ranges. The transformation splits the iteration space of the loop +/// into multiple contiguous ranges. The \c counts clause is required and +/// exactly one list item must be \c omp_fill. +class OMPSplitDirective final + : public OMPCanonicalLoopNestTransformationDirective { + friend class ASTStmtReader; + friend class OMPExecutableDirective; + + /// Offsets of child members. + enum { + PreInitsOffset = 0, + TransformedStmtOffset, + }; + + explicit OMPSplitDirective(SourceLocation StartLoc, SourceLocation EndLoc, + unsigned NumLoops) + : OMPCanonicalLoopNestTransformationDirective( + OMPSplitDirectiveClass, llvm::omp::OMPD_split, StartLoc, EndLoc, + NumLoops) {} + + void setPreInits(Stmt *PreInits) { + Data->getChildren()[PreInitsOffset] = PreInits; + } + + void setTransformedStmt(Stmt *S) { + Data->getChildren()[TransformedStmtOffset] = S; + } + +public: + /// Create a new AST node representation for '#pragma omp split'. + /// + /// \param C Context of the AST. + /// \param StartLoc Location of the introducer (e.g. the 'omp' token). + /// \param EndLoc Location of the directive's end (e.g. the tok::eod). + /// \param Clauses The directive's clauses (e.g. the required \c counts + /// clause). + /// \param NumLoops Number of affected loops (should be 1 for split). + /// \param AssociatedStmt The outermost associated loop. + /// \param TransformedStmt The loop nest after splitting, or nullptr in + /// dependent contexts. + /// \param PreInits Helper preinits statements for the loop nest. + static OMPSplitDirective *Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, + ArrayRef Clauses, + unsigned NumLoops, Stmt *AssociatedStmt, + Stmt *TransformedStmt, Stmt *PreInits); + + /// Build an empty '#pragma omp split' AST node for deserialization. + /// + /// \param C Context of the AST. + /// \param NumClauses Number of clauses to allocate. + /// \param NumLoops Number of associated loops to allocate. + static OMPSplitDirective *CreateEmpty(const ASTContext &C, + unsigned NumClauses, unsigned NumLoops); + + /// Gets/sets the associated loops after the transformation, i.e. after + /// de-sugaring. + Stmt *getTransformedStmt() const { + return Data->getChildren()[TransformedStmtOffset]; + } + + /// Return preinits statement. + Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPSplitDirectiveClass; + } +}; + /// This represents '#pragma omp scan' directive. /// /// \code diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index e8e7643e0dddd..87b6dbefa7a62 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -8781,6 +8781,26 @@ extern const internal::VariadicDynCastAllOfMatcher ompTargetUpdateDirective; +/// Matches any ``#pragma omp split`` executable directive. +/// +/// Given +/// +/// \code +/// #pragma omp split counts(2, omp_fill) +/// for (int i = 0; i < n; ++i) {} +/// \endcode +/// +/// ``ompSplitDirective()`` matches the split directive. +extern const internal::VariadicDynCastAllOfMatcher + ompSplitDirective; + +/// Matches OpenMP ``counts`` clause used by ``#pragma omp split``. +/// +/// Given ``#pragma omp split counts(1, 2, omp_fill)``, ``ompCountsClause()`` +/// matches the ``counts`` clause node. +extern const internal::VariadicDynCastAllOfMatcher + ompCountsClause; + /// Matches OpenMP ``default`` clause. /// /// Given diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d5904bd1d6f26..71d504c659cc2 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11176,6 +11176,8 @@ def err_omp_bind_required_on_loop : Error< "construct">; def err_omp_loop_reduction_clause : Error< "'reduction' clause not allowed with '#pragma omp loop bind(teams)'">; +def err_omp_split_counts_not_one_omp_fill : Error< + "exactly one 'omp_fill' must appear in the 'counts' clause">; def warn_break_binds_to_switch : Warning< "'break' is bound to loop, GCC binds it to switch">, InGroup; diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index 61d76bafdfcde..e166894ea024b 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -244,6 +244,7 @@ def OMPTileDirective : StmtNode; def OMPStripeDirective : StmtNode; def OMPUnrollDirective : StmtNode; def OMPReverseDirective : StmtNode; +def OMPSplitDirective : StmtNode; def OMPInterchangeDirective : StmtNode; def OMPCanonicalLoopSequenceTransformationDirective diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 08a3d88ee6a36..bd313d37cc4b5 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -6812,6 +6812,9 @@ class Parser : public CodeCompletionHandler { /// Parses the 'sizes' clause of a '#pragma omp tile' directive. OMPClause *ParseOpenMPSizesClause(); + /// Parses the 'counts' clause of a '#pragma omp split' directive. + OMPClause *ParseOpenMPCountsClause(); + /// Parses the 'permutation' clause of a '#pragma omp interchange' directive. OMPClause *ParseOpenMPPermutationClause(); diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 7853f29f98c25..3621ce96b8724 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -42,6 +42,7 @@ class FunctionScopeInfo; class DeclContext; class DeclGroupRef; +class EnumConstantDecl; class ParsedAttr; class Scope; @@ -457,6 +458,11 @@ class SemaOpenMP : public SemaBase { /// Called on well-formed '#pragma omp reverse'. StmtResult ActOnOpenMPReverseDirective(Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc); + /// Called on well-formed '#pragma omp split' after parsing of its + /// associated statement. + StmtResult ActOnOpenMPSplitDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed '#pragma omp interchange' after parsing of its /// clauses and the associated statement. StmtResult ActOnOpenMPInterchangeDirective(ArrayRef Clauses, @@ -911,6 +917,12 @@ class SemaOpenMP : public SemaBase { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + /// Called on well-formed 'counts' clause after parsing its arguments. + OMPClause * + ActOnOpenMPCountsClause(ArrayRef CountExprs, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc, + std::optional FillIdx, + SourceLocation FillLoc, unsigned FillCount); /// Called on well-form 'permutation' clause after parsing its arguments. OMPClause *ActOnOpenMPPermutationClause(ArrayRef PermExprs, SourceLocation StartLoc, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 783cd82895a90..9b798ed484454 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1965,6 +1965,7 @@ enum StmtCode { STMP_OMP_STRIPE_DIRECTIVE, STMT_OMP_UNROLL_DIRECTIVE, STMT_OMP_REVERSE_DIRECTIVE, + STMT_OMP_SPLIT_DIRECTIVE, STMT_OMP_INTERCHANGE_DIRECTIVE, STMT_OMP_FUSE_DIRECTIVE, STMT_OMP_FOR_DIRECTIVE, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index d4826c3c6edca..3a35e17aff40b 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -15,10 +15,12 @@ #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclOpenMP.h" +#include "clang/AST/Expr.h" #include "clang/AST/ExprOpenMP.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/ErrorHandling.h" #include @@ -986,6 +988,26 @@ OMPSizesClause *OMPSizesClause::CreateEmpty(const ASTContext &C, return new (Mem) OMPSizesClause(NumSizes); } +OMPCountsClause *OMPCountsClause::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc, ArrayRef Counts, + std::optional FillIdx, SourceLocation FillLoc) { + OMPCountsClause *Clause = CreateEmpty(C, Counts.size()); + Clause->setLocStart(StartLoc); + Clause->setLParenLoc(LParenLoc); + Clause->setLocEnd(EndLoc); + Clause->setCountsRefs(Counts); + Clause->setOmpFillIndex(FillIdx); + Clause->setOmpFillLoc(FillLoc); + return Clause; +} + +OMPCountsClause *OMPCountsClause::CreateEmpty(const ASTContext &C, + unsigned NumCounts) { + void *Mem = C.Allocate(totalSizeToAlloc(NumCounts)); + return new (Mem) OMPCountsClause(NumCounts); +} + OMPPermutationClause *OMPPermutationClause::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, @@ -1984,6 +2006,19 @@ void OMPClausePrinter::VisitOMPSizesClause(OMPSizesClause *Node) { OS << ")"; } +void OMPClausePrinter::VisitOMPCountsClause(OMPCountsClause *Node) { + OS << "counts("; + std::optional FillIdx = Node->getOmpFillIndex(); + ArrayRef Refs = Node->getCountsRefs(); + llvm::interleaveComma(llvm::seq(Refs.size()), OS, [&](unsigned I) { + if (FillIdx && I == *FillIdx) + OS << "omp_fill"; + else + Refs[I]->printPretty(OS, nullptr, Policy, 0); + }); + OS << ")"; +} + void OMPClausePrinter::VisitOMPPermutationClause(OMPPermutationClause *Node) { OS << "permutation("; llvm::interleaveComma(Node->getArgsRefs(), OS, [&](const Expr *E) { diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index a5b0cd3786a28..9d6b315effb41 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -552,6 +552,27 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, SourceLocation(), SourceLocation(), NumLoops); } +OMPSplitDirective * +OMPSplitDirective::Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, ArrayRef Clauses, + unsigned NumLoops, Stmt *AssociatedStmt, + Stmt *TransformedStmt, Stmt *PreInits) { + OMPSplitDirective *Dir = createDirective( + C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc, + NumLoops); + Dir->setTransformedStmt(TransformedStmt); + Dir->setPreInits(PreInits); + return Dir; +} + +OMPSplitDirective *OMPSplitDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned NumLoops) { + return createEmptyDirective( + C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, + SourceLocation(), SourceLocation(), NumLoops); +} + OMPFuseDirective *OMPFuseDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, ArrayRef Clauses, unsigned NumGeneratedTopLevelLoops, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 4d364fdcd5502..e0b930ba0a21a 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -800,6 +800,11 @@ void StmtPrinter::VisitOMPInterchangeDirective(OMPInterchangeDirective *Node) { PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPSplitDirective(OMPSplitDirective *Node) { + Indent() << "#pragma omp split"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPFuseDirective(OMPFuseDirective *Node) { Indent() << "#pragma omp fuse"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index e8c1f8a8ecb5f..c75652e5c1dd3 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -498,6 +498,12 @@ void OMPClauseProfiler::VisitOMPSizesClause(const OMPSizesClause *C) { Profiler->VisitExpr(E); } +void OMPClauseProfiler::VisitOMPCountsClause(const OMPCountsClause *C) { + for (auto *E : C->getCountsRefs()) + if (E) + Profiler->VisitExpr(E); +} + void OMPClauseProfiler::VisitOMPPermutationClause( const OMPPermutationClause *C) { for (Expr *E : C->getArgsRefs()) @@ -1051,6 +1057,10 @@ void StmtProfiler::VisitOMPInterchangeDirective( VisitOMPCanonicalLoopNestTransformationDirective(S); } +void StmtProfiler::VisitOMPSplitDirective(const OMPSplitDirective *S) { + VisitOMPCanonicalLoopNestTransformationDirective(S); +} + void StmtProfiler::VisitOMPCanonicalLoopSequenceTransformationDirective( const OMPCanonicalLoopSequenceTransformationDirective *S) { VisitOMPExecutableDirective(S); diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp index d6860ca660987..5cbf134620e34 100644 --- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp +++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp @@ -1139,6 +1139,10 @@ const internal::VariadicDynCastAllOfMatcher ompExecutableDirective; const internal::VariadicDynCastAllOfMatcher ompTargetUpdateDirective; +const internal::VariadicDynCastAllOfMatcher + ompSplitDirective; +const internal::VariadicDynCastAllOfMatcher + ompCountsClause; const internal::VariadicDynCastAllOfMatcher ompDefaultClause; const internal::VariadicDynCastAllOfMatcher diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp index f31684f93f6f3..a04070971f0eb 100644 --- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp @@ -529,10 +529,12 @@ RegistryMaps::RegistryMaps() { REGISTER_MATCHER(objcTryStmt); REGISTER_MATCHER(ofClass); REGISTER_MATCHER(ofKind); + REGISTER_MATCHER(ompCountsClause); REGISTER_MATCHER(ompDefaultClause); REGISTER_MATCHER(ompFromClause); REGISTER_MATCHER(ompToClause); REGISTER_MATCHER(ompExecutableDirective); + REGISTER_MATCHER(ompSplitDirective); REGISTER_MATCHER(ompTargetUpdateDirective); REGISTER_MATCHER(on); REGISTER_MATCHER(onImplicitObjectArgument); diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 2c693b1958ee7..287eb217ba458 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -256,6 +256,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, case OMPC_safelen: case OMPC_simdlen: case OMPC_sizes: + case OMPC_counts: case OMPC_permutation: case OMPC_allocator: case OMPC_collapse: @@ -635,6 +636,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_safelen: case OMPC_simdlen: case OMPC_sizes: + case OMPC_counts: case OMPC_permutation: case OMPC_allocator: case OMPC_collapse: @@ -815,7 +817,8 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) { bool clang::isOpenMPCanonicalLoopNestTransformationDirective( OpenMPDirectiveKind DKind) { return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse || - DKind == OMPD_interchange || DKind == OMPD_stripe; + DKind == OMPD_split || DKind == OMPD_interchange || + DKind == OMPD_stripe; } bool clang::isOpenMPCanonicalLoopSequenceTransformationDirective( diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index a75d3dc64c6b4..7b6035a6968b1 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -230,6 +230,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { case Stmt::OMPReverseDirectiveClass: EmitOMPReverseDirective(cast(*S)); break; + case Stmt::OMPSplitDirectiveClass: + EmitOMPSplitDirective(cast(*S)); + break; case Stmt::OMPInterchangeDirectiveClass: EmitOMPInterchangeDirective(cast(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 990ec47488465..59d0e6825a975 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -197,6 +197,8 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { PreInits = Unroll->getPreInits(); } else if (const auto *Reverse = dyn_cast(&S)) { PreInits = Reverse->getPreInits(); + } else if (const auto *Split = dyn_cast(&S)) { + PreInits = Split->getPreInits(); } else if (const auto *Interchange = dyn_cast(&S)) { PreInits = Interchange->getPreInits(); @@ -3203,6 +3205,12 @@ void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) { EmitStmt(S.getTransformedStmt()); } +void CodeGenFunction::EmitOMPSplitDirective(const OMPSplitDirective &S) { + // Emit the de-sugared statement (the split loops). + OMPTransformDirectiveScopeRAII SplitScope(*this, &S); + EmitStmt(S.getTransformedStmt()); +} + void CodeGenFunction::EmitOMPInterchangeDirective( const OMPInterchangeDirective &S) { // Emit the de-sugared statement. diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index fd474c09044ef..f06c216e0c746 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3930,6 +3930,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPStripeDirective(const OMPStripeDirective &S); void EmitOMPUnrollDirective(const OMPUnrollDirective &S); void EmitOMPReverseDirective(const OMPReverseDirective &S); + void EmitOMPSplitDirective(const OMPSplitDirective &S); void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S); void EmitOMPFuseDirective(const OMPFuseDirective &S); void EmitOMPForDirective(const OMPForDirective &S); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 29397d67b5bcc..0e92c3fa1b572 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -28,6 +28,7 @@ #include "llvm/Frontend/OpenMP/DirectiveNameParser.h" #include "llvm/Frontend/OpenMP/OMPAssume.h" #include "llvm/Frontend/OpenMP/OMPContext.h" +#include #include using namespace clang; @@ -2424,6 +2425,10 @@ StmtResult Parser::ParseOpenMPExecutableDirective( Diag(Loc, diag::err_omp_required_clause) << getOpenMPDirectiveName(DKind, OMPVersion) << "sizes"; } + if (DKind == OMPD_split && !SeenClauses[unsigned(OMPC_counts)]) { + Diag(Loc, diag::err_omp_required_clause) + << getOpenMPDirectiveName(DKind, OMPVersion) << "counts"; + } StmtResult AssociatedStmt; if (HasAssociatedStatement) { @@ -2986,6 +2991,51 @@ OMPClause *Parser::ParseOpenMPSizesClause() { OpenLoc, CloseLoc); } +OMPClause *Parser::ParseOpenMPCountsClause() { + SourceLocation ClauseNameLoc, OpenLoc, CloseLoc; + SmallVector ValExprs; + std::optional FillIdx; + unsigned FillCount = 0; + SourceLocation FillLoc; + + assert(getOpenMPClauseName(OMPC_counts) == PP.getSpelling(Tok) && + "Expected parsing to start at clause name"); + ClauseNameLoc = ConsumeToken(); + + BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end); + if (T.consumeOpen()) { + Diag(Tok, diag::err_expected) << tok::l_paren; + return nullptr; + } + + do { + if (Tok.is(tok::identifier) && + Tok.getIdentifierInfo()->getName() == "omp_fill") { + if (FillCount == 0) + FillIdx = ValExprs.size(); + ++FillCount; + FillLoc = Tok.getLocation(); + ConsumeToken(); + ValExprs.push_back(nullptr); + } else { + ExprResult Val = ParseConstantExpression(); + if (!Val.isUsable()) { + T.skipToEnd(); + return nullptr; + } + ValExprs.push_back(Val.get()); + } + } while (TryConsumeToken(tok::comma)); + + if (T.consumeClose()) + return nullptr; + OpenLoc = T.getOpenLocation(); + CloseLoc = T.getCloseLocation(); + + return Actions.OpenMP().ActOnOpenMPCountsClause( + ValExprs, ClauseNameLoc, OpenLoc, CloseLoc, FillIdx, FillLoc, FillCount); +} + OMPClause *Parser::ParseOpenMPLoopRangeClause() { SourceLocation ClauseNameLoc = ConsumeToken(); SourceLocation FirstLoc, CountLoc; @@ -3432,6 +3482,15 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, } Clause = ParseOpenMPPermutationClause(); break; + case OMPC_counts: + if (!FirstClause) { + Diag(Tok, diag::err_omp_more_one_clause) + << getOpenMPDirectiveName(DKind, OMPVersion) + << getOpenMPClauseName(CKind) << 0; + ErrorFound = true; + } + Clause = ParseOpenMPCountsClause(); + break; case OMPC_uses_allocators: Clause = ParseOpenMPUsesAllocatorClause(DKind); break; diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 56079ea8e1bf8..40d530a1f3925 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1508,6 +1508,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPUnrollDirectiveClass: case Stmt::OMPReverseDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: + case Stmt::OMPSplitDirectiveClass: case Stmt::OMPFuseDirectiveClass: case Stmt::OMPSingleDirectiveClass: case Stmt::OMPTargetDataDirectiveClass: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index fada37ba45755..4a99689e794e1 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4627,6 +4627,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, case OMPD_stripe: case OMPD_unroll: case OMPD_reverse: + case OMPD_split: case OMPD_interchange: case OMPD_fuse: case OMPD_assume: @@ -6466,6 +6467,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( "reverse directive does not support any clauses"); Res = ActOnOpenMPReverseDirective(AStmt, StartLoc, EndLoc); break; + case OMPD_split: + Res = + ActOnOpenMPSplitDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); + break; case OMPD_interchange: Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); @@ -15911,6 +15916,235 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt, buildPreInits(Context, PreInits)); } +/// Build the AST for \#pragma omp split counts(c1, c2, ...). +/// +/// Splits the single associated loop into N consecutive loops, where N is the +/// number of count expressions. +StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); + Scope *CurScope = SemaRef.getCurScope(); + + // Empty statement should only be possible if there already was an error. + if (!AStmt) + return StmtError(); + + const auto *CountsClause = + OMPExecutableDirective::getSingleClause(Clauses); + if (!CountsClause) + return StmtError(); + + // Split applies to a single loop; check it is transformable and get helpers. + constexpr unsigned NumLoops = 1; + Stmt *Body = nullptr; + SmallVector LoopHelpers( + NumLoops); + SmallVector, NumLoops + 1> OriginalInits; + if (!checkTransformableLoopNest(OMPD_split, AStmt, NumLoops, LoopHelpers, + Body, OriginalInits)) + return StmtError(); + + // Delay applying the transformation to when template is completely + // instantiated. + if (SemaRef.CurContext->isDependentContext()) + return OMPSplitDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, AStmt, nullptr, nullptr); + + assert(LoopHelpers.size() == NumLoops && + "Expecting a single-dimensional loop iteration space"); + assert(OriginalInits.size() == NumLoops && + "Expecting a single-dimensional loop iteration space"); + OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers.front(); + + // Find the loop statement. + Stmt *LoopStmt = nullptr; + collectLoopStmts(AStmt, {LoopStmt}); + + // Determine the PreInit declarations. + SmallVector PreInits; + addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits); + + // Type and name of the original loop variable; we create one IV per segment + // and assign it to the original var so the body sees the same name. + auto *IterationVarRef = cast(LoopHelper.IterationVarRef); + QualType IVTy = IterationVarRef->getType(); + uint64_t IVWidth = Context.getTypeSize(IVTy); + auto *OrigVar = cast(LoopHelper.Counters.front()); + + // Iteration variable SourceLocations. + SourceLocation OrigVarLoc = OrigVar->getExprLoc(); + SourceLocation OrigVarLocBegin = OrigVar->getBeginLoc(); + SourceLocation OrigVarLocEnd = OrigVar->getEndLoc(); + // Internal variable names. + std::string OrigVarName = OrigVar->getNameInfo().getAsString(); + + if (!CountsClause->hasOmpFill()) + return StmtError(); + unsigned FillIdx = *CountsClause->getOmpFillIndex(); + + unsigned NumItems = CountsClause->getNumCounts(); + SmallVector CountValues(NumItems, 0); + ArrayRef Refs = CountsClause->getCountsRefs(); + for (unsigned I = 0; I < NumItems; ++I) { + if (I == FillIdx) + continue; + Expr *CountExpr = Refs[I]; + if (!CountExpr) + return OMPSplitDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, AStmt, nullptr, nullptr); + std::optional OptVal = + CountExpr->getIntegerConstantExpr(Context); + if (!OptVal || OptVal->isNegative()) + return OMPSplitDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, AStmt, nullptr, nullptr); + CountValues[I] = OptVal->getZExtValue(); + } + + Expr *NumIterExpr = LoopHelper.NumIterations; + + uint64_t RightSum = 0; + for (unsigned I = FillIdx + 1; I < NumItems; ++I) + RightSum += CountValues[I]; + + auto MakeIntLit = [&](uint64_t Val) { + return IntegerLiteral::Create(Context, llvm::APInt(IVWidth, Val), IVTy, + OrigVarLoc); + }; + + size_t NumSegments = NumItems; + SmallVector SplitLoops; + + auto *IterVarDecl = cast(IterationVarRef->getDecl()); + SplitLoops.push_back(new (Context) DeclStmt(DeclGroupRef(IterVarDecl), + IterationVarRef->getBeginLoc(), + IterationVarRef->getEndLoc())); + + uint64_t LeftAccum = 0; + uint64_t RightRemaining = RightSum; + + for (size_t Seg = 0; Seg < NumSegments; ++Seg) { + Expr *StartExpr = nullptr; + Expr *EndExpr = nullptr; + + if (Seg < FillIdx) { + StartExpr = MakeIntLit(LeftAccum); + LeftAccum += CountValues[Seg]; + EndExpr = MakeIntLit(LeftAccum); + } else if (Seg == FillIdx) { + StartExpr = MakeIntLit(LeftAccum); + if (RightRemaining == 0) { + EndExpr = NumIterExpr; + } else { + ExprResult Sub = + SemaRef.BuildBinOp(CurScope, OrigVarLoc, BO_Sub, NumIterExpr, + MakeIntLit(RightRemaining)); + if (!Sub.isUsable()) + return StmtError(); + EndExpr = Sub.get(); + } + } else { + if (RightRemaining == RightSum) { + if (RightSum == 0) + StartExpr = NumIterExpr; + else { + ExprResult Sub = + SemaRef.BuildBinOp(CurScope, OrigVarLoc, BO_Sub, NumIterExpr, + MakeIntLit(RightRemaining)); + if (!Sub.isUsable()) + return StmtError(); + StartExpr = Sub.get(); + } + } else { + ExprResult Sub = + SemaRef.BuildBinOp(CurScope, OrigVarLoc, BO_Sub, NumIterExpr, + MakeIntLit(RightRemaining)); + if (!Sub.isUsable()) + return StmtError(); + StartExpr = Sub.get(); + } + RightRemaining -= CountValues[Seg]; + if (RightRemaining == 0) + EndExpr = NumIterExpr; + else { + ExprResult Sub = + SemaRef.BuildBinOp(CurScope, OrigVarLoc, BO_Sub, NumIterExpr, + MakeIntLit(RightRemaining)); + if (!Sub.isUsable()) + return StmtError(); + EndExpr = Sub.get(); + } + } + + SmallString<64> IVName(".split.iv."); + IVName += (Twine(Seg) + "." + OrigVarName).str(); + VarDecl *IVDecl = buildVarDecl(SemaRef, {}, IVTy, IVName, nullptr, OrigVar); + auto MakeIVRef = [&SemaRef = this->SemaRef, IVDecl, IVTy, OrigVarLoc]() { + return buildDeclRefExpr(SemaRef, IVDecl, IVTy, OrigVarLoc); + }; + + SemaRef.AddInitializerToDecl(IVDecl, StartExpr, /*DirectInit=*/false); + StmtResult InitStmt = new (Context) + DeclStmt(DeclGroupRef(IVDecl), OrigVarLocBegin, OrigVarLocEnd); + if (!InitStmt.isUsable()) + return StmtError(); + + ExprResult CondExpr = SemaRef.BuildBinOp( + CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, MakeIVRef(), EndExpr); + if (!CondExpr.isUsable()) + return StmtError(); + + ExprResult IncrExpr = SemaRef.BuildUnaryOp( + CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, MakeIVRef()); + if (!IncrExpr.isUsable()) + return StmtError(); + + ExprResult IVAssign = SemaRef.BuildBinOp(CurScope, OrigVarLoc, BO_Assign, + IterationVarRef, MakeIVRef()); + if (!IVAssign.isUsable()) + return StmtError(); + + SmallVector BodyStmts; + BodyStmts.push_back(IVAssign.get()); + BodyStmts.append(LoopHelper.Updates.begin(), LoopHelper.Updates.end()); + if (auto *CXXRangeFor = dyn_cast(LoopStmt)) { + if (Seg == 0) { + BodyStmts.push_back(CXXRangeFor->getLoopVarStmt()); + } else { + VarDecl *LoopVar = CXXRangeFor->getLoopVariable(); + DeclRefExpr *LVRef = buildDeclRefExpr( + SemaRef, LoopVar, LoopVar->getType().getNonReferenceType(), + OrigVarLoc); + ExprResult LVAssign = SemaRef.BuildBinOp( + CurScope, OrigVarLoc, BO_Assign, LVRef, LoopVar->getInit()); + if (!LVAssign.isUsable()) + return StmtError(); + BodyStmts.push_back(LVAssign.get()); + } + } + BodyStmts.push_back(Body); + + auto *LoopBody = + CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(), + Body->getBeginLoc(), Body->getEndLoc()); + + auto *For = new (Context) + ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, + IncrExpr.get(), LoopBody, LoopHelper.Init->getBeginLoc(), + LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc()); + SplitLoops.push_back(For); + } + + auto *SplitStmt = CompoundStmt::Create( + Context, SplitLoops, FPOptionsOverride(), + SplitLoops.front()->getBeginLoc(), SplitLoops.back()->getEndLoc()); + + return OMPSplitDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops, + AStmt, SplitStmt, + buildPreInits(Context, PreInits)); +} + StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) { @@ -17859,6 +18093,43 @@ OMPClause *SemaOpenMP::ActOnOpenMPSizesClause(ArrayRef SizeExprs, SanitizedSizeExprs); } +OMPClause *SemaOpenMP::ActOnOpenMPCountsClause(ArrayRef CountExprs, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc, + std::optional FillIdx, + SourceLocation FillLoc, + unsigned FillCount) { + SmallVector SanitizedCountExprs(CountExprs); + + // OpenMP 6.0: each list item in counts(...) is either the omp_fill keyword + // or an integral constant expression (non-negative). Runtime variables are + // not permitted; this matches split codegen, which needs segment sizes at + // compile time. + for (unsigned I = 0; I < SanitizedCountExprs.size(); ++I) { + Expr *&CountExpr = SanitizedCountExprs[I]; + if (FillIdx && I == *FillIdx) + continue; + if (!CountExpr) + continue; + + ExprResult Verified = VerifyPositiveIntegerConstantInClause( + CountExpr, OMPC_counts, /*StrictlyPositive=*/false); + if (Verified.isInvalid()) + CountExpr = nullptr; + else + CountExpr = Verified.get(); + } + + if (FillCount != 1) { + Diag(FillCount == 0 ? StartLoc : FillLoc, + diag::err_omp_split_counts_not_one_omp_fill); + } + + return OMPCountsClause::Create(getASTContext(), StartLoc, LParenLoc, EndLoc, + SanitizedCountExprs, FillIdx, FillLoc); +} + OMPClause *SemaOpenMP::ActOnOpenMPPermutationClause(ArrayRef PermExprs, SourceLocation StartLoc, SourceLocation LParenLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 8ae5df367e0dd..9c6b4ecfe58b3 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1769,6 +1769,17 @@ class TreeTransform { EndLoc); } + OMPClause *RebuildOMPCountsClause(ArrayRef Counts, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc, + std::optional FillIdx, + SourceLocation FillLoc) { + unsigned FillCount = FillIdx ? 1 : 0; + return getSema().OpenMP().ActOnOpenMPCountsClause( + Counts, StartLoc, LParenLoc, EndLoc, FillIdx, FillLoc, FillCount); + } + /// Build a new OpenMP 'permutation' clause. OMPClause *RebuildOMPPermutationClause(ArrayRef PermExprs, SourceLocation StartLoc, @@ -9759,6 +9770,17 @@ StmtResult TreeTransform::TransformOMPInterchangeDirective( return Res; } +template +StmtResult +TreeTransform::TransformOMPSplitDirective(OMPSplitDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPFuseDirective(OMPFuseDirective *D) { @@ -10615,6 +10637,28 @@ OMPClause *TreeTransform::TransformOMPSizesClause(OMPSizesClause *C) { C->getLParenLoc(), C->getEndLoc()); } +template +OMPClause * +TreeTransform::TransformOMPCountsClause(OMPCountsClause *C) { + SmallVector TransformedCounts; + TransformedCounts.reserve(C->getNumCounts()); + for (Expr *E : C->getCountsRefs()) { + if (!E) { + TransformedCounts.push_back(nullptr); + continue; + } + + ExprResult T = getDerived().TransformExpr(E); + if (T.isInvalid()) + return nullptr; + TransformedCounts.push_back(T.get()); + } + + return RebuildOMPCountsClause(TransformedCounts, C->getBeginLoc(), + C->getLParenLoc(), C->getEndLoc(), + C->getOmpFillIndex(), C->getOmpFillLoc()); +} + template OMPClause * TreeTransform::TransformOMPPermutationClause(OMPPermutationClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index b211b0d32e1de..328bc9a08b114 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11437,6 +11437,11 @@ OMPClause *OMPClauseReader::readClause() { C = OMPSizesClause::CreateEmpty(Context, NumSizes); break; } + case llvm::omp::OMPC_counts: { + unsigned NumCounts = Record.readInt(); + C = OMPCountsClause::CreateEmpty(Context, NumCounts); + break; + } case llvm::omp::OMPC_permutation: { unsigned NumLoops = Record.readInt(); C = OMPPermutationClause::CreateEmpty(Context, NumLoops); @@ -11850,6 +11855,16 @@ void OMPClauseReader::VisitOMPSizesClause(OMPSizesClause *C) { C->setLParenLoc(Record.readSourceLocation()); } +void OMPClauseReader::VisitOMPCountsClause(OMPCountsClause *C) { + bool HasFill = Record.readBool(); + if (HasFill) + C->setOmpFillIndex(Record.readInt()); + C->setOmpFillLoc(Record.readSourceLocation()); + for (Expr *&E : C->getCountsRefs()) + E = Record.readSubExpr(); + C->setLParenLoc(Record.readSourceLocation()); +} + void OMPClauseReader::VisitOMPPermutationClause(OMPPermutationClause *C) { for (Expr *&E : C->getArgsRefs()) E = Record.readSubExpr(); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 801eed43c2440..fb81e4fefdebb 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2529,6 +2529,10 @@ void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { VisitOMPCanonicalLoopNestTransformationDirective(D); } +void ASTStmtReader::VisitOMPSplitDirective(OMPSplitDirective *D) { + VisitOMPCanonicalLoopNestTransformationDirective(D); +} + void ASTStmtReader::VisitOMPFuseDirective(OMPFuseDirective *D) { VisitOMPCanonicalLoopSequenceTransformationDirective(D); } @@ -3687,6 +3691,13 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; } + case STMT_OMP_SPLIT_DIRECTIVE: { + unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; + S = OMPSplitDirective::CreateEmpty(Context, NumClauses, NumLoops); + break; + } + case STMT_OMP_FUSE_DIRECTIVE: { unsigned NumClauses = Record[ASTStmtReader::NumStmtFields]; S = OMPFuseDirective::CreateEmpty(Context, NumClauses); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 4b3adce07f10c..f22536a81941a 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -8064,6 +8064,17 @@ void OMPClauseWriter::VisitOMPSizesClause(OMPSizesClause *C) { Record.AddSourceLocation(C->getLParenLoc()); } +void OMPClauseWriter::VisitOMPCountsClause(OMPCountsClause *C) { + Record.push_back(C->getNumCounts()); + Record.push_back(C->hasOmpFill()); + if (C->hasOmpFill()) + Record.push_back(*C->getOmpFillIndex()); + Record.AddSourceLocation(C->getOmpFillLoc()); + for (Expr *Count : C->getCountsRefs()) + Record.AddStmt(Count); + Record.AddSourceLocation(C->getLParenLoc()); +} + void OMPClauseWriter::VisitOMPPermutationClause(OMPPermutationClause *C) { Record.push_back(C->getNumLoops()); for (Expr *Size : C->getArgsRefs()) diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 934a95df1be7e..4612cd2a7944d 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2546,6 +2546,11 @@ void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE; } +void ASTStmtWriter::VisitOMPSplitDirective(OMPSplitDirective *D) { + VisitOMPCanonicalLoopNestTransformationDirective(D); + Code = serialization::STMT_OMP_SPLIT_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPCanonicalLoopSequenceTransformationDirective( OMPCanonicalLoopSequenceTransformationDirective *D) { VisitStmt(D); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 4b24ac61337da..ca544aad46e0a 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1812,6 +1812,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPStripeDirectiveClass: case Stmt::OMPTileDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: + case Stmt::OMPSplitDirectiveClass: case Stmt::OMPFuseDirectiveClass: case Stmt::OMPInteropDirectiveClass: case Stmt::OMPDispatchDirectiveClass: diff --git a/clang/test/AST/ast-dump-openmp-split.c b/clang/test/AST/ast-dump-openmp-split.c new file mode 100644 index 0000000000000..821badae55e66 --- /dev/null +++ b/clang/test/AST/ast-dump-openmp-split.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=60 -ast-dump %s | FileCheck %s +// +// OMPSplitDirective / OMPCountsClause; + +void body(int); + +void test(void) { +#pragma omp split counts(3, omp_fill) + for (int i = 0; i < 10; ++i) + body(i); +} + +// CHECK: OMPSplitDirective +// CHECK: OMPCountsClause +// CHECK: IntegerLiteral{{.*}}3 +// CHECK: <<>> +// CHECK: ForStmt +// CHECK: <<>> +// CHECK: CallExpr diff --git a/clang/test/Analysis/split_analyze.c b/clang/test/Analysis/split_analyze.c new file mode 100644 index 0000000000000..60e9be477b919 --- /dev/null +++ b/clang/test/Analysis/split_analyze.c @@ -0,0 +1,11 @@ +// Static analyzer invocation on split loop. +// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -fopenmp-version=60 -analyzer-checker=core.builtin -verify %s +// expected-no-diagnostics + +void g(int); + +void f(int n) { +#pragma omp split counts(2, omp_fill) + for (int i = 0; i < n; ++i) + g(i); +} diff --git a/clang/test/Index/openmp-split.c b/clang/test/Index/openmp-split.c new file mode 100644 index 0000000000000..0c63f12297930 --- /dev/null +++ b/clang/test/Index/openmp-split.c @@ -0,0 +1,11 @@ +// RUN: c-index-test -test-load-source local %s -fopenmp=libomp -fopenmp-version=60 | FileCheck %s + +void test(void) { +#pragma omp split counts(3, omp_fill) + for (int i = 0; i < 20; i += 1) + ; +} + +// CHECK: openmp-split.c:4:1: OMPSplitDirective= Extent=[4:1 - 4:38] +// CHECK: openmp-split.c:4:26: IntegerLiteral= Extent=[4:26 - 4:27] +// CHECK: openmp-split.c:5:3: ForStmt= Extent=[5:3 - 6:6] diff --git a/clang/test/OpenMP/split_ast_print.cpp b/clang/test/OpenMP/split_ast_print.cpp new file mode 100644 index 0000000000000..9673882bc6778 --- /dev/null +++ b/clang/test/OpenMP/split_ast_print.cpp @@ -0,0 +1,71 @@ +// AST dump + ast-print round-trip for omp_fill at every position in counts(). +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT + +#ifndef HEADER +#define HEADER + +extern "C" void body(...); + +// --- omp_fill at last position: counts(2, omp_fill) --- +// PRINT-LABEL: void fill_last( +// DUMP-LABEL: FunctionDecl {{.*}} fill_last +void fill_last(int n) { + // PRINT: #pragma omp split counts(2, omp_fill) + // DUMP: OMPSplitDirective + // DUMP: OMPCountsClause + #pragma omp split counts(2, omp_fill) + // PRINT: for (int i = 0; i < n; ++i) + // DUMP: ForStmt + for (int i = 0; i < n; ++i) + body(i); +} + +// --- omp_fill at first position: counts(omp_fill, 3) --- +// PRINT-LABEL: void fill_first( +// DUMP-LABEL: FunctionDecl {{.*}} fill_first +void fill_first(int n) { + // PRINT: #pragma omp split counts(omp_fill, 3) + // DUMP: OMPSplitDirective + // DUMP: OMPCountsClause + #pragma omp split counts(omp_fill, 3) + // PRINT: for (int i = 0; i < n; ++i) + // DUMP: ForStmt + for (int i = 0; i < n; ++i) + body(i); +} + +// --- omp_fill at middle position: counts(1, omp_fill, 1) --- +// PRINT-LABEL: void fill_mid( +// DUMP-LABEL: FunctionDecl {{.*}} fill_mid +void fill_mid(int n) { + // PRINT: #pragma omp split counts(1, omp_fill, 1) + // DUMP: OMPSplitDirective + // DUMP: OMPCountsClause + #pragma omp split counts(1, omp_fill, 1) + // PRINT: for (int i = 0; i < n; ++i) + // DUMP: ForStmt + for (int i = 0; i < n; ++i) + body(i); +} + +// --- omp_fill as sole item: counts(omp_fill) --- +// PRINT-LABEL: void fill_only( +// DUMP-LABEL: FunctionDecl {{.*}} fill_only +void fill_only(int n) { + // PRINT: #pragma omp split counts(omp_fill) + // DUMP: OMPSplitDirective + // DUMP: OMPCountsClause + #pragma omp split counts(omp_fill) + // PRINT: for (int i = 0; i < n; ++i) + // DUMP: ForStmt + for (int i = 0; i < n; ++i) + body(i); +} + +#endif diff --git a/clang/test/OpenMP/split_codegen.cpp b/clang/test/OpenMP/split_codegen.cpp new file mode 100644 index 0000000000000..9c739c013c2fc --- /dev/null +++ b/clang/test/OpenMP/split_codegen.cpp @@ -0,0 +1,1986 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 4 +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest -std=c++20 -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest -std=c++20 -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest -std=c++20 -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +extern "C" void body(...) {} + +struct S { + int i; + S() { +#pragma omp split counts(5, omp_fill) + for (i = 0; i < 20; i++) + body(i); + } +} s; + +extern "C" void split_two_const_trip() { +#pragma omp split counts(3, omp_fill) + for (int i = 0; i < 10; ++i) + body(i); +} + +extern "C" void split_var_trip(int n) { +#pragma omp split counts(3, omp_fill) + for (int i = 0; i < n; ++i) + body(i); +} + +extern "C" void split_mid_fill(int n) { +#pragma omp split counts(2, omp_fill, 3) + for (int i = 0; i < n; ++i) + body(i); +} + +extern "C" void split_first_fill(int n) { +#pragma omp split counts(omp_fill, 4) + for (int i = 0; i < n; ++i) + body(i); +} + +extern "C" void split_only_fill(int n) { +#pragma omp split counts(omp_fill) + for (int i = 0; i < n; ++i) + body(i); +} + +extern "C" void split_neg_start(int n) { +#pragma omp split counts(1, omp_fill, 1) + for (int i = -1; i <= n; ++i) + body(i); +} + +extern "C" void split_zero_first() { +#pragma omp split counts(0, omp_fill) + for (int i = 0; i < 10; ++i) + body(i); +} + +extern "C" void split_three_const(int n) { +#pragma omp split counts(2, 3, omp_fill) + for (int i = 0; i < n; ++i) + body(i); +} + +extern "C" void split_step2(int n) { +#pragma omp split counts(3, omp_fill) + for (int i = 0; i < n; i += 2) + body(i); +} + +extern "C" void split_decrement(int n) { +#pragma omp split counts(omp_fill, 2) + for (int i = n; i > 0; --i) + body(i); +} + +void split_range_for() { + int a[] = {10, 20, 30, 40}; +#pragma omp split counts(2, omp_fill) + for (int x : a) + body(x); +} + +#endif +// CHECK1-LABEL: define dso_local void @body( +// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define internal void @__cxx_global_var_init( +// CHECK1-SAME: ) #[[ATTR1:[0-9]+]] section ".text.startup" { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @_ZN1SC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @s) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define linkonce_odr void @_ZN1SC1Ev( +// CHECK1-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define linkonce_odr void @_ZN1SC2Ev( +// CHECK1-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[I2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[I:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[I3:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I3]], ptr [[I2]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 5 +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2:![0-9]+]], !align [[META3:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2]], !align [[META3]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: store i32 5, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND4:%.*]] +// CHECK1: for.cond4: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP7]], 20 +// CHECK1-NEXT: br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]] +// CHECK1: for.body6: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2]], !align [[META3]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2]], !align [[META3]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP12]]) +// CHECK1-NEXT: br label [[FOR_INC9:%.*]] +// CHECK1: for.inc9: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[INC10:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[INC10]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND4]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1: for.end11: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @split_two_const_trip( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 3 +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP3]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: store i32 3, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND1:%.*]] +// CHECK1: for.cond1: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK1-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END8:%.*]] +// CHECK1: for.body3: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP8]]) +// CHECK1-NEXT: br label [[FOR_INC6:%.*]] +// CHECK1: for.inc6: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[INC7]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1: for.end8: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @split_var_trip( +// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 3 +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: store i32 3, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND3:%.*]] +// CHECK1: for.cond3: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP7]], [[ADD4]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]] +// CHECK1: for.body6: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP11]]) +// CHECK1-NEXT: br label [[FOR_INC9:%.*]] +// CHECK1: for.inc9: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[INC10:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[INC10]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1: for.end11: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @split_mid_fill( +// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 2 +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: store i32 2, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND3:%.*]] +// CHECK1: for.cond3: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i32 [[ADD4]], 3 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP7]], [[SUB5]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[FOR_BODY7:%.*]], label [[FOR_END12:%.*]] +// CHECK1: for.body7: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP11]]) +// CHECK1-NEXT: br label [[FOR_INC10:%.*]] +// CHECK1: for.inc10: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[INC11:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[INC11]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1: for.end12: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[ADD13]], 3 +// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND15:%.*]] +// CHECK1: for.cond15: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp slt i32 [[TMP14]], [[ADD16]] +// CHECK1-NEXT: br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END23:%.*]] +// CHECK1: for.body18: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL19:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL19]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP18]]) +// CHECK1-NEXT: br label [[FOR_INC21:%.*]] +// CHECK1: for.inc21: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: [[INC22:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[INC22]], ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND15]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1: for.end23: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @split_first_fill( +// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[ADD]], 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], [[SUB3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[ADD5]], 4 +// CHECK1-NEXT: store i32 [[SUB6]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND7:%.*]] +// CHECK1: for.cond7: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP9]], [[ADD8]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]] +// CHECK1: for.body10: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK1-NEXT: store i32 [[ADD12]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP13]]) +// CHECK1-NEXT: br label [[FOR_INC13:%.*]] +// CHECK1: for.inc13: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[INC14:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[INC14]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1: for.end15: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @split_only_fill( +// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @split_neg_start( +// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 -1, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP1]], -2 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP2]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP4]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 -1, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP6]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: store i32 1, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND3:%.*]] +// CHECK1: for.cond3: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add i32 [[TMP8]], 1 +// CHECK1-NEXT: [[SUB5:%.*]] = sub i32 [[ADD4]], 1 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP7]], [[SUB5]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[FOR_BODY7:%.*]], label [[FOR_END12:%.*]] +// CHECK1: for.body7: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL8:%.*]] = mul i32 [[TMP10]], 1 +// CHECK1-NEXT: [[ADD9:%.*]] = add i32 -1, [[MUL8]] +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP11]]) +// CHECK1-NEXT: br label [[FOR_INC10:%.*]] +// CHECK1: for.inc10: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[INC11:%.*]] = add i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[INC11]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK1: for.end12: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP13]], 1 +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[ADD13]], 1 +// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND15:%.*]] +// CHECK1: for.cond15: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add i32 [[TMP15]], 1 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP14]], [[ADD16]] +// CHECK1-NEXT: br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END23:%.*]] +// CHECK1: for.body18: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP17]], 1 +// CHECK1-NEXT: [[ADD20:%.*]] = add i32 -1, [[MUL19]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP18]]) +// CHECK1-NEXT: br label [[FOR_INC21:%.*]] +// CHECK1: for.inc21: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: [[INC22:%.*]] = add i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[INC22]], ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND15]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK1: for.end23: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @split_zero_first( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 0 +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP3]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: store i32 0, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND1:%.*]] +// CHECK1: for.cond1: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK1-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END8:%.*]] +// CHECK1: for.body3: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP8]]) +// CHECK1-NEXT: br label [[FOR_INC6:%.*]] +// CHECK1: for.inc6: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[INC7]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK1: for.end8: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @split_three_const( +// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 2 +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: store i32 2, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND3:%.*]] +// CHECK1: for.cond3: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP7]], 5 +// CHECK1-NEXT: br i1 [[CMP4]], label [[FOR_BODY5:%.*]], label [[FOR_END10:%.*]] +// CHECK1: for.body5: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP10]]) +// CHECK1-NEXT: br label [[FOR_INC8:%.*]] +// CHECK1: for.inc8: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[INC9:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[INC9]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK1: for.end10: +// CHECK1-NEXT: store i32 5, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND11:%.*]] +// CHECK1: for.cond11: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP12]], [[ADD12]] +// CHECK1-NEXT: br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END19:%.*]] +// CHECK1: for.body14: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK1-NEXT: store i32 [[ADD16]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK1-NEXT: br label [[FOR_INC17:%.*]] +// CHECK1: for.inc17: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: [[INC18:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[INC18]], ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND11]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK1: for.end19: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @split_step2( +// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP1]], -1 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 2 +// CHECK1-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP2]], 3 +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP4]], 2 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP6]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: store i32 3, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND3:%.*]] +// CHECK1: for.cond3: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add i32 [[TMP8]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP7]], [[ADD4]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]] +// CHECK1: for.body6: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = mul i32 [[TMP10]], 2 +// CHECK1-NEXT: [[ADD8:%.*]] = add i32 0, [[MUL7]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP11]]) +// CHECK1-NEXT: br label [[FOR_INC9:%.*]] +// CHECK1: for.inc9: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[INC10:%.*]] = add i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[INC10]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK1: for.end11: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @split_decrement( +// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[ADD]], 2 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], [[SUB3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP6]], [[MUL]] +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP8]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[ADD5]], 2 +// CHECK1-NEXT: store i32 [[SUB6]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND7:%.*]] +// CHECK1: for.cond7: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP11]], [[ADD8]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]] +// CHECK1: for.body10: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP14]], [[MUL11]] +// CHECK1-NEXT: store i32 [[SUB12]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK1-NEXT: br label [[FOR_INC13:%.*]] +// CHECK1: for.inc13: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: [[INC14:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[INC14]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK1: for.end15: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @_Z15split_range_forv( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A:%.*]] = alloca [4 x i32], align 16 +// CHECK1-NEXT: [[__RANGE1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTSPLIT_IV_0___BEGIN1:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSPLIT_IV_1___BEGIN1:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr align 16 @__const._Z15split_range_forv.a, i64 16, i1 false) +// CHECK1-NEXT: store ptr [[A]], ptr [[__RANGE1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !nonnull [[META2]], !align [[META3]] +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY]], i64 4 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END1]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !nonnull [[META2]], !align [[META3]] +// CHECK1-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY1]], ptr [[__BEGIN1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !nonnull [[META2]], !align [[META3]] +// CHECK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY2]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__END1]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK1-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i64 [[TMP6]], 2 +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8 +// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[MUL]] +// CHECK1-NEXT: store ptr [[ADD_PTR6]], ptr [[__BEGIN1]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__BEGIN1]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[X]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[X]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP12]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP13]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: store i64 2, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8 +// CHECK1-NEXT: br label [[FOR_COND7:%.*]] +// CHECK1: for.cond7: +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP15]], 1 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i64 [[TMP14]], [[ADD8]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]] +// CHECK1: for.body10: +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL11:%.*]] = mul nsw i64 [[TMP18]], 1 +// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[MUL11]] +// CHECK1-NEXT: store ptr [[ADD_PTR12]], ptr [[__BEGIN1]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__BEGIN1]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[X]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[X]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP21]]) +// CHECK1-NEXT: br label [[FOR_INC13:%.*]] +// CHECK1: for.inc13: +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8 +// CHECK1-NEXT: [[INC14:%.*]] = add nsw i64 [[TMP22]], 1 +// CHECK1-NEXT: store i64 [[INC14]], ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8 +// CHECK1-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK1: for.end15: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define internal void @_GLOBAL__sub_I_split_codegen.cpp( +// CHECK1-SAME: ) #[[ATTR1]] section ".text.startup" { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__cxx_global_var_init() +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define internal void @__cxx_global_var_init( +// CHECK2-SAME: ) #[[ATTR0:[0-9]+]] section ".text.startup" { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void @_ZN1SC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @s) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define linkonce_odr void @_ZN1SC1Ev( +// CHECK2-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: call void @_ZN1SC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define linkonce_odr void @_ZN1SC2Ev( +// CHECK2-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[I2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[I:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[I3:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[I3]], ptr [[I2]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 5 +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2:![0-9]+]], !align [[META3:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2]], !align [[META3]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: store i32 5, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND4:%.*]] +// CHECK2: for.cond4: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP7]], 20 +// CHECK2-NEXT: br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]] +// CHECK2: for.body6: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2]], !align [[META3]] +// CHECK2-NEXT: store i32 [[ADD8]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2]], !align [[META3]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP12]]) +// CHECK2-NEXT: br label [[FOR_INC9:%.*]] +// CHECK2: for.inc9: +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[INC10:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: store i32 [[INC10]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND4]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2: for.end11: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @body( +// CHECK2-SAME: ...) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @_Z15split_range_forv( +// CHECK2-SAME: ) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[A:%.*]] = alloca [4 x i32], align 16 +// CHECK2-NEXT: [[__RANGE1:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END1:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN1:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTSPLIT_IV_0___BEGIN1:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_1___BEGIN1:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr align 16 @__const._Z15split_range_forv.a, i64 16, i1 false) +// CHECK2-NEXT: store ptr [[A]], ptr [[__RANGE1]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !nonnull [[META2]], !align [[META3]] +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP0]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY]], i64 4 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END1]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !nonnull [[META2]], !align [[META3]] +// CHECK2-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY1]], ptr [[__BEGIN1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !nonnull [[META2]], !align [[META3]] +// CHECK2-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP2]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY2]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__END1]], align 8 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK2-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i64 [[TMP6]], 2 +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8 +// CHECK2-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP9]], 1 +// CHECK2-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[MUL]] +// CHECK2-NEXT: store ptr [[ADD_PTR6]], ptr [[__BEGIN1]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__BEGIN1]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[X]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[X]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP12]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP13]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: store i64 2, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8 +// CHECK2-NEXT: br label [[FOR_COND7:%.*]] +// CHECK2: for.cond7: +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP15]], 1 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp slt i64 [[TMP14]], [[ADD8]] +// CHECK2-NEXT: br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]] +// CHECK2: for.body10: +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8 +// CHECK2-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[MUL11:%.*]] = mul nsw i64 [[TMP18]], 1 +// CHECK2-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[MUL11]] +// CHECK2-NEXT: store ptr [[ADD_PTR12]], ptr [[__BEGIN1]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__BEGIN1]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], ptr [[X]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[X]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP21]]) +// CHECK2-NEXT: br label [[FOR_INC13:%.*]] +// CHECK2: for.inc13: +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8 +// CHECK2-NEXT: [[INC14:%.*]] = add nsw i64 [[TMP22]], 1 +// CHECK2-NEXT: store i64 [[INC14]], ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8 +// CHECK2-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2: for.end15: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @split_decrement( +// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[ADD]], 2 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], [[SUB3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP6]], [[MUL]] +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP8]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[ADD5]], 2 +// CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND7:%.*]] +// CHECK2: for.cond7: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP11]], [[ADD8]] +// CHECK2-NEXT: br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]] +// CHECK2: for.body10: +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP14]], [[MUL11]] +// CHECK2-NEXT: store i32 [[SUB12]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK2-NEXT: br label [[FOR_INC13:%.*]] +// CHECK2: for.inc13: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[INC14:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK2-NEXT: store i32 [[INC14]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2: for.end15: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @split_first_fill( +// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[ADD]], 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], [[SUB3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD4]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[ADD5]], 4 +// CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND7:%.*]] +// CHECK2: for.cond7: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP9]], [[ADD8]] +// CHECK2-NEXT: br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]] +// CHECK2: for.body10: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK2-NEXT: store i32 [[ADD12]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP13]]) +// CHECK2-NEXT: br label [[FOR_INC13:%.*]] +// CHECK2: for.inc13: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[INC14:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK2-NEXT: store i32 [[INC14]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2: for.end15: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @split_mid_fill( +// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 2 +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: store i32 2, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND3:%.*]] +// CHECK2: for.cond3: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i32 [[ADD4]], 3 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP7]], [[SUB5]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[FOR_BODY7:%.*]], label [[FOR_END12:%.*]] +// CHECK2: for.body7: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP11]]) +// CHECK2-NEXT: br label [[FOR_INC10:%.*]] +// CHECK2: for.inc10: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[INC11:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: store i32 [[INC11]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK2: for.end12: +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[ADD13]], 3 +// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND15:%.*]] +// CHECK2: for.cond15: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK2-NEXT: [[CMP17:%.*]] = icmp slt i32 [[TMP14]], [[ADD16]] +// CHECK2-NEXT: br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END23:%.*]] +// CHECK2: for.body18: +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL19:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL19]] +// CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP18]]) +// CHECK2-NEXT: br label [[FOR_INC21:%.*]] +// CHECK2: for.inc21: +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: [[INC22:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: store i32 [[INC22]], ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND15]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2: for.end23: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @split_neg_start( +// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 -1, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP1]], -2 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP2]], 1 +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP4]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 -1, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP6]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: store i32 1, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND3:%.*]] +// CHECK2: for.cond3: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add i32 [[TMP8]], 1 +// CHECK2-NEXT: [[SUB5:%.*]] = sub i32 [[ADD4]], 1 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP7]], [[SUB5]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[FOR_BODY7:%.*]], label [[FOR_END12:%.*]] +// CHECK2: for.body7: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL8:%.*]] = mul i32 [[TMP10]], 1 +// CHECK2-NEXT: [[ADD9:%.*]] = add i32 -1, [[MUL8]] +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP11]]) +// CHECK2-NEXT: br label [[FOR_INC10:%.*]] +// CHECK2: for.inc10: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[INC11:%.*]] = add i32 [[TMP12]], 1 +// CHECK2-NEXT: store i32 [[INC11]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2: for.end12: +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add i32 [[TMP13]], 1 +// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[ADD13]], 1 +// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND15:%.*]] +// CHECK2: for.cond15: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD16:%.*]] = add i32 [[TMP15]], 1 +// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP14]], [[ADD16]] +// CHECK2-NEXT: br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END23:%.*]] +// CHECK2: for.body18: +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP17]], 1 +// CHECK2-NEXT: [[ADD20:%.*]] = add i32 -1, [[MUL19]] +// CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP18]]) +// CHECK2-NEXT: br label [[FOR_INC21:%.*]] +// CHECK2: for.inc21: +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: [[INC22:%.*]] = add i32 [[TMP19]], 1 +// CHECK2-NEXT: store i32 [[INC22]], ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND15]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2: for.end23: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @split_only_fill( +// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], [[ADD]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @split_step2( +// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP1]], -1 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 2 +// CHECK2-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP2]], 3 +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP4]], 2 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP6]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: store i32 3, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND3:%.*]] +// CHECK2: for.cond3: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add i32 [[TMP8]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP7]], [[ADD4]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]] +// CHECK2: for.body6: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL7:%.*]] = mul i32 [[TMP10]], 2 +// CHECK2-NEXT: [[ADD8:%.*]] = add i32 0, [[MUL7]] +// CHECK2-NEXT: store i32 [[ADD8]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP11]]) +// CHECK2-NEXT: br label [[FOR_INC9:%.*]] +// CHECK2: for.inc9: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[INC10:%.*]] = add i32 [[TMP12]], 1 +// CHECK2-NEXT: store i32 [[INC10]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK2: for.end11: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @split_three_const( +// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 2 +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: store i32 2, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND3:%.*]] +// CHECK2: for.cond3: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP7]], 5 +// CHECK2-NEXT: br i1 [[CMP4]], label [[FOR_BODY5:%.*]], label [[FOR_END10:%.*]] +// CHECK2: for.body5: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP10]]) +// CHECK2-NEXT: br label [[FOR_INC8:%.*]] +// CHECK2: for.inc8: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[INC9:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: store i32 [[INC9]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK2: for.end10: +// CHECK2-NEXT: store i32 5, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND11:%.*]] +// CHECK2: for.cond11: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP12]], [[ADD12]] +// CHECK2-NEXT: br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END19:%.*]] +// CHECK2: for.body14: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK2-NEXT: store i32 [[ADD16]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK2-NEXT: br label [[FOR_INC17:%.*]] +// CHECK2: for.inc17: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: [[INC18:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK2-NEXT: store i32 [[INC18]], ptr [[DOTSPLIT_IV_2_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND11]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK2: for.end19: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @split_two_const_trip( +// CHECK2-SAME: ) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 3 +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP3]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: store i32 3, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND1:%.*]] +// CHECK2: for.cond1: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK2-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END8:%.*]] +// CHECK2: for.body3: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] +// CHECK2-NEXT: store i32 [[ADD5]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP8]]) +// CHECK2-NEXT: br label [[FOR_INC6:%.*]] +// CHECK2: for.inc6: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK2-NEXT: store i32 [[INC7]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK2: for.end8: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @split_var_trip( +// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 3 +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: store i32 3, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND3:%.*]] +// CHECK2: for.cond3: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP7]], [[ADD4]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]] +// CHECK2: for.body6: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK2-NEXT: store i32 [[ADD8]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP11]]) +// CHECK2-NEXT: br label [[FOR_INC9:%.*]] +// CHECK2: for.inc9: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[INC10:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: store i32 [[INC10]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK2: for.end11: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @split_zero_first( +// CHECK2-SAME: ) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 0 +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP3]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: store i32 0, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND1:%.*]] +// CHECK2: for.cond1: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK2-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END8:%.*]] +// CHECK2: for.body3: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] +// CHECK2-NEXT: store i32 [[ADD5]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP8]]) +// CHECK2-NEXT: br label [[FOR_INC6:%.*]] +// CHECK2: for.inc6: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK2-NEXT: store i32 [[INC7]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK2: for.end8: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define internal void @_GLOBAL__sub_I_split_codegen.cpp( +// CHECK2-SAME: ) #[[ATTR0]] section ".text.startup" { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void @__cxx_global_var_init() +// CHECK2-NEXT: ret void +// +//. +// CHECK1: [[META2]] = !{} +// CHECK1: [[META3]] = !{i64 4} +// CHECK1: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]]} +// CHECK1: [[META5]] = !{!"llvm.loop.mustprogress"} +// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META5]]} +// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META5]]} +// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]]} +// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]]} +// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]]} +// CHECK1: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]]} +// CHECK1: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]} +// CHECK1: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]} +// CHECK1: [[LOOP14]] = distinct !{[[LOOP14]], [[META5]]} +// CHECK1: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]]} +// CHECK1: [[LOOP16]] = distinct !{[[LOOP16]], [[META5]]} +// CHECK1: [[LOOP17]] = distinct !{[[LOOP17]], [[META5]]} +// CHECK1: [[LOOP18]] = distinct !{[[LOOP18]], [[META5]]} +// CHECK1: [[LOOP19]] = distinct !{[[LOOP19]], [[META5]]} +// CHECK1: [[LOOP20]] = distinct !{[[LOOP20]], [[META5]]} +// CHECK1: [[LOOP21]] = distinct !{[[LOOP21]], [[META5]]} +// CHECK1: [[LOOP22]] = distinct !{[[LOOP22]], [[META5]]} +// CHECK1: [[LOOP23]] = distinct !{[[LOOP23]], [[META5]]} +// CHECK1: [[LOOP24]] = distinct !{[[LOOP24]], [[META5]]} +// CHECK1: [[LOOP25]] = distinct !{[[LOOP25]], [[META5]]} +// CHECK1: [[LOOP26]] = distinct !{[[LOOP26]], [[META5]]} +// CHECK1: [[LOOP27]] = distinct !{[[LOOP27]], [[META5]]} +// CHECK1: [[LOOP28]] = distinct !{[[LOOP28]], [[META5]]} +// CHECK1: [[LOOP29]] = distinct !{[[LOOP29]], [[META5]]} +// CHECK1: [[LOOP30]] = distinct !{[[LOOP30]], [[META5]]} +//. +// CHECK2: [[META2]] = !{} +// CHECK2: [[META3]] = !{i64 4} +// CHECK2: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]]} +// CHECK2: [[META5]] = !{!"llvm.loop.mustprogress"} +// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META5]]} +// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META5]]} +// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]]} +// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]]} +// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]]} +// CHECK2: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]]} +// CHECK2: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]} +// CHECK2: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]} +// CHECK2: [[LOOP14]] = distinct !{[[LOOP14]], [[META5]]} +// CHECK2: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]]} +// CHECK2: [[LOOP16]] = distinct !{[[LOOP16]], [[META5]]} +// CHECK2: [[LOOP17]] = distinct !{[[LOOP17]], [[META5]]} +// CHECK2: [[LOOP18]] = distinct !{[[LOOP18]], [[META5]]} +// CHECK2: [[LOOP19]] = distinct !{[[LOOP19]], [[META5]]} +// CHECK2: [[LOOP20]] = distinct !{[[LOOP20]], [[META5]]} +// CHECK2: [[LOOP21]] = distinct !{[[LOOP21]], [[META5]]} +// CHECK2: [[LOOP22]] = distinct !{[[LOOP22]], [[META5]]} +// CHECK2: [[LOOP23]] = distinct !{[[LOOP23]], [[META5]]} +// CHECK2: [[LOOP24]] = distinct !{[[LOOP24]], [[META5]]} +// CHECK2: [[LOOP25]] = distinct !{[[LOOP25]], [[META5]]} +// CHECK2: [[LOOP26]] = distinct !{[[LOOP26]], [[META5]]} +// CHECK2: [[LOOP27]] = distinct !{[[LOOP27]], [[META5]]} +// CHECK2: [[LOOP28]] = distinct !{[[LOOP28]], [[META5]]} +// CHECK2: [[LOOP29]] = distinct !{[[LOOP29]], [[META5]]} +// CHECK2: [[LOOP30]] = distinct !{[[LOOP30]], [[META5]]} +//. diff --git a/clang/test/OpenMP/split_composition.cpp b/clang/test/OpenMP/split_composition.cpp new file mode 100644 index 0000000000000..eabe3f8f345f7 --- /dev/null +++ b/clang/test/OpenMP/split_composition.cpp @@ -0,0 +1,17 @@ +// Split nested inside `omp parallel for` outer loop. +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s + +extern "C" void body(int, int); + +// CHECK: define {{.*}} @composition( +// CHECK: .split.iv +// CHECK: call void @body +extern "C" void composition(void) { +#pragma omp parallel for + for (int i = 0; i < 4; ++i) { +#pragma omp split counts(2, omp_fill) + for (int j = 0; j < 10; ++j) + body(i, j); + } +} diff --git a/clang/test/OpenMP/split_compound_associated.cpp b/clang/test/OpenMP/split_compound_associated.cpp new file mode 100644 index 0000000000000..7bbc5107ef672 --- /dev/null +++ b/clang/test/OpenMP/split_compound_associated.cpp @@ -0,0 +1,13 @@ +// Associated statement may be a compound `{ for (...) {} }` (split still finds the loop). +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: define {{.*}} @f( +// CHECK: .split.iv +extern "C" void f(void) { +#pragma omp split counts(2, omp_fill) + { + for (int i = 0; i < 10; ++i) { + } + } +} diff --git a/clang/test/OpenMP/split_counts_constexpr.cpp b/clang/test/OpenMP/split_counts_constexpr.cpp new file mode 100644 index 0000000000000..d304a9ca1b5a1 --- /dev/null +++ b/clang/test/OpenMP/split_counts_constexpr.cpp @@ -0,0 +1,19 @@ +/* C++ `constexpr` locals as `counts` operands (distinct from NTTP in split_template_nttp.cpp). */ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -std=c++17 -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -std=c++17 -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s + +extern "C" void body(int); + +// CHECK-LABEL: define {{.*}} @from_constexpr +// CHECK: .split.iv.0 +// CHECK: icmp slt i32 {{.*}}, 4 +// CHECK: .split.iv.1 +// CHECK: icmp slt i32 {{.*}}, 10 +extern "C" void from_constexpr(void) { + static constexpr int C0 = 4; +#pragma omp split counts(C0, omp_fill) + for (int i = 0; i < 10; ++i) + body(i); +} diff --git a/clang/test/OpenMP/split_counts_ice.c b/clang/test/OpenMP/split_counts_ice.c new file mode 100644 index 0000000000000..c746ef417f049 --- /dev/null +++ b/clang/test/OpenMP/split_counts_ice.c @@ -0,0 +1,56 @@ +/* `counts` operands as ICEs: macros, enumerators, sizeof (not only raw literals). */ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s + +#define M1 2 + +extern void body(int); + +// CHECK-LABEL: define {{.*}} @from_macros +// CHECK: .split.iv.0 +// CHECK: icmp slt i32 {{.*}}, 2 +// CHECK: .split.iv.1 +// CHECK: icmp slt i32 {{.*}}, 10 +void from_macros(void) { +#pragma omp split counts(M1, omp_fill) + for (int i = 0; i < 10; ++i) + body(i); +} + +enum { EFirst = 3 }; + +// CHECK-LABEL: define {{.*}} @from_enum +// CHECK: .split.iv.0 +// CHECK: icmp slt i32 {{.*}}, 3 +// CHECK: .split.iv.1 +// CHECK: icmp slt i32 {{.*}}, 10 +void from_enum(void) { +#pragma omp split counts(EFirst, omp_fill) + for (int i = 0; i < 10; ++i) + body(i); +} + +// CHECK-LABEL: define {{.*}} @from_sizeof +// CHECK: .split.iv.0 +// CHECK: icmp slt i32 {{.*}}, 1 +// CHECK: .split.iv.1 +// CHECK: icmp slt i32 {{.*}}, 10 +void from_sizeof(void) { +#pragma omp split counts(sizeof(char), omp_fill) + for (int i = 0; i < 10; ++i) + body(i); +} + +// CHECK-LABEL: define {{.*}} @from_macro_expr +// CHECK: .split.iv.0 +// CHECK: icmp slt i32 {{.*}}, 4 +// CHECK: .split.iv.1 +// CHECK: icmp slt i32 {{.*}}, 10 +#define BASE 1 +void from_macro_expr(void) { +#pragma omp split counts(BASE + 3, omp_fill) + for (int i = 0; i < 10; ++i) + body(i); +} diff --git a/clang/test/OpenMP/split_counts_verify.c b/clang/test/OpenMP/split_counts_verify.c new file mode 100644 index 0000000000000..7fec1561d8380 --- /dev/null +++ b/clang/test/OpenMP/split_counts_verify.c @@ -0,0 +1,123 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +/* + * Verify #pragma omp split counts(c1, c2, ...) at syntax and IR levels. + * counts(3, omp_fill, 2) with n=10 splits into: [0..3), [3..8), [8..10). + * Sum 0+1+...+9 = 45. + * For end-to-end runtime tests see openmp/runtime/test/transform/split/. + */ + +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR + +int main(void) { + const int n = 10; + int sum = 0; + +#pragma omp split counts(3, omp_fill, 2) + for (int i = 0; i < n; ++i) { + sum += i; + } + + return (sum == 45) ? 0 : 1; +} +// IR-LABEL: define dso_local i32 @main( +// IR-SAME: ) #[[ATTR0:[0-9]+]] { +// IR-NEXT: [[ENTRY:.*:]] +// IR-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// IR-NEXT: [[N:%.*]] = alloca i32, align 4 +// IR-NEXT: [[SUM:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4 +// IR-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-NEXT: store i32 10, ptr [[N]], align 4 +// IR-NEXT: store i32 0, ptr [[SUM]], align 4 +// IR-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4 +// IR-NEXT: br label %[[FOR_COND:.*]] +// IR: [[FOR_COND]]: +// IR-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 3 +// IR-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// IR: [[FOR_BODY]]: +// IR-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// IR-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[SUM]], align 4 +// IR-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], [[TMP3]] +// IR-NEXT: store i32 [[ADD1]], ptr [[SUM]], align 4 +// IR-NEXT: br label %[[FOR_INC:.*]] +// IR: [[FOR_INC]]: +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// IR-NEXT: store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4 +// IR-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// IR: [[FOR_END]]: +// IR-NEXT: store i32 3, ptr [[DOTSPLIT_IV_1_I]], align 4 +// IR-NEXT: br label %[[FOR_COND2:.*]] +// IR: [[FOR_COND2]]: +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// IR-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP6]], 8 +// IR-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END10:.*]] +// IR: [[FOR_BODY4]]: +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP8]], 1 +// IR-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// IR-NEXT: store i32 [[ADD6]], ptr [[I]], align 4 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[SUM]], align 4 +// IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] +// IR-NEXT: store i32 [[ADD7]], ptr [[SUM]], align 4 +// IR-NEXT: br label %[[FOR_INC8:.*]] +// IR: [[FOR_INC8]]: +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4 +// IR-NEXT: [[INC9:%.*]] = add nsw i32 [[TMP11]], 1 +// IR-NEXT: store i32 [[INC9]], ptr [[DOTSPLIT_IV_1_I]], align 4 +// IR-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP4:![0-9]+]] +// IR: [[FOR_END10]]: +// IR-NEXT: store i32 8, ptr [[DOTSPLIT_IV_2_I]], align 4 +// IR-NEXT: br label %[[FOR_COND11:.*]] +// IR: [[FOR_COND11]]: +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// IR-NEXT: [[CMP12:%.*]] = icmp slt i32 [[TMP12]], 10 +// IR-NEXT: br i1 [[CMP12]], label %[[FOR_BODY13:.*]], label %[[FOR_END19:.*]] +// IR: [[FOR_BODY13]]: +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// IR-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP14]], 1 +// IR-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] +// IR-NEXT: store i32 [[ADD15]], ptr [[I]], align 4 +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[SUM]], align 4 +// IR-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// IR-NEXT: store i32 [[ADD16]], ptr [[SUM]], align 4 +// IR-NEXT: br label %[[FOR_INC17:.*]] +// IR: [[FOR_INC17]]: +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4 +// IR-NEXT: [[INC18:%.*]] = add nsw i32 [[TMP17]], 1 +// IR-NEXT: store i32 [[INC18]], ptr [[DOTSPLIT_IV_2_I]], align 4 +// IR-NEXT: br label %[[FOR_COND11]], !llvm.loop [[LOOP5:![0-9]+]] +// IR: [[FOR_END19]]: +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[SUM]], align 4 +// IR-NEXT: [[CMP20:%.*]] = icmp eq i32 [[TMP18]], 45 +// IR-NEXT: [[TMP19:%.*]] = zext i1 [[CMP20]] to i64 +// IR-NEXT: [[COND:%.*]] = select i1 [[CMP20]], i32 0, i32 1 +// IR-NEXT: ret i32 [[COND]] +// +//. +// IR: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]} +// IR: [[META3]] = !{!"llvm.loop.mustprogress"} +// IR: [[LOOP4]] = distinct !{[[LOOP4]], [[META3]]} +// IR: [[LOOP5]] = distinct !{[[LOOP5]], [[META3]]} +//. diff --git a/clang/test/OpenMP/split_diag_errors.c b/clang/test/OpenMP/split_diag_errors.c new file mode 100644 index 0000000000000..98986f3200ff7 --- /dev/null +++ b/clang/test/OpenMP/split_diag_errors.c @@ -0,0 +1,61 @@ +/* + * Error and delayed-transformation cases for #pragma omp split counts(...). + */ +// 1) Required clause missing: err_omp_required_clause +// RUN: not %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -DTEST_REQUIRED_CLAUSE %s 2>&1 | FileCheck %s --check-prefix=REQ +// 2) counts(negative): non-negative diagnostic +// RUN: not %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -DTEST_NEGATIVE %s 2>&1 | FileCheck %s --check-prefix=NEG +// 3) counts(non-integer): integral type diagnostic +// RUN: not %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -DTEST_FLOAT %s 2>&1 | FileCheck %s --check-prefix=FLOAT +// 6) Loop not transformable (while): must be a for loop +// RUN: not %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -DTEST_WHILE %s 2>&1 | FileCheck %s --check-prefix=WHILE +// Two invalid counts — two diagnostics on the clause +// RUN: not %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -DTEST_DOUBLE_NEGATIVE %s 2>&1 | FileCheck %s --check-prefix=DBLNEG + +#ifdef TEST_REQUIRED_CLAUSE +void test_required_clause_missing(void) { +#pragma omp split + for (int i = 0; i < 10; ++i) { + } +} +// REQ: error: {{.*}}requires the 'counts' clause +#endif + +#ifdef TEST_NEGATIVE +void test_negative_count(void) { +#pragma omp split counts(-1, omp_fill) + for (int i = 0; i < 10; ++i) { + } +} +// NEG: error: {{.*}}counts{{.*}}non-negative integer +#endif + +#ifdef TEST_FLOAT +void test_float_count(void) { +#pragma omp split counts(2.5, omp_fill) + for (int i = 0; i < 10; ++i) { + } +} +// FLOAT: error: {{.*}}integer constant expression must have integer type +#endif + +#ifdef TEST_WHILE +void test_while_not_for(void) { + int i = 0; +#pragma omp split counts(5, omp_fill) + while (i < 10) { + ++i; + } +} +// WHILE: error: {{.*}}must be a for loop +#endif + +#ifdef TEST_DOUBLE_NEGATIVE +void test_two_negative_counts(void) { +#pragma omp split counts(-1, -1, omp_fill) + for (int i = 0; i < 10; ++i) { + } +} +// DBLNEG: error: {{.*}}counts{{.*}}non-negative integer +// DBLNEG: error: {{.*}}counts{{.*}}non-negative integer +#endif diff --git a/clang/test/OpenMP/split_distribute_inner_split.cpp b/clang/test/OpenMP/split_distribute_inner_split.cpp new file mode 100644 index 0000000000000..290d0336f1b03 --- /dev/null +++ b/clang/test/OpenMP/split_distribute_inner_split.cpp @@ -0,0 +1,14 @@ +// `distribute` outer loop with inner `split` (combined-construct interop beyond host `teams` case). +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: define {{.*}} @f( +// CHECK: .split.iv +extern "C" void f(void) { +#pragma omp distribute + for (int i = 0; i < 10; ++i) { +#pragma omp split counts(2, omp_fill) + for (int j = 0; j < 10; ++j) { + } + } +} diff --git a/clang/test/OpenMP/split_driver_smoke.c b/clang/test/OpenMP/split_driver_smoke.c new file mode 100644 index 0000000000000..5969611c9b425 --- /dev/null +++ b/clang/test/OpenMP/split_driver_smoke.c @@ -0,0 +1,12 @@ +// Driver forwards `-fopenmp-version=60` with split source (`###` only — no link). +// REQUIRES: x86-registered-target +// +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-version=60 -c %s -o %t.o 2>&1 | FileCheck %s --check-prefix=INVOC + +void f(int n) { +#pragma omp split counts(2, omp_fill) + for (int i = 0; i < n; ++i) { + } +} + +// INVOC: -fopenmp-version=60 diff --git a/clang/test/OpenMP/split_iv_types.c b/clang/test/OpenMP/split_iv_types.c new file mode 100644 index 0000000000000..76606f09fc427 --- /dev/null +++ b/clang/test/OpenMP/split_iv_types.c @@ -0,0 +1,24 @@ +/* Non-int IV types with split. */ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s --check-prefix=U32 +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s --check-prefix=I64 + +extern void body(unsigned int); +extern void body64(long); + +// U32-LABEL: define {{.*}} @unsigned_iv +// U32: .split.iv +// U32-DAG: icmp ult i32 +void unsigned_iv(void) { +#pragma omp split counts(2, omp_fill) + for (unsigned i = 0; i < 10U; ++i) + body(i); +} + +// I64-LABEL: define {{.*}} @long_iv +// I64: .split.iv +// I64-DAG: icmp slt i64 +void long_iv(void) { +#pragma omp split counts(2, omp_fill) + for (long i = 0; i < 10L; ++i) + body64(i); +} diff --git a/clang/test/OpenMP/split_loop_styles.cpp b/clang/test/OpenMP/split_loop_styles.cpp new file mode 100644 index 0000000000000..0aa61b20a87bd --- /dev/null +++ b/clang/test/OpenMP/split_loop_styles.cpp @@ -0,0 +1,14 @@ +// Outer-declared iteration variable + split. +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s + +extern "C" void body(int); + +// CHECK-LABEL: define {{.*}} @outer_iv( +// CHECK: .split.iv +extern "C" void outer_iv(int n) { + int i; +#pragma omp split counts(3, omp_fill) + for (i = 0; i < n; ++i) + body(i); +} diff --git a/clang/test/OpenMP/split_member_ctor.cpp b/clang/test/OpenMP/split_member_ctor.cpp new file mode 100644 index 0000000000000..e869602e1a84f --- /dev/null +++ b/clang/test/OpenMP/split_member_ctor.cpp @@ -0,0 +1,20 @@ +// Split on loop in constructor using member-related bound. +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s + +extern "C" void body(int); + +struct S { + int n; + S() : n(10) { +#pragma omp split counts(3, omp_fill) + for (int i = 0; i < n; ++i) + body(i); + } +}; + +// CHECK-LABEL: define {{.*}} @_ZN1SC1Ev +// CHECK: .split.iv +void use_s() { + S s; +} diff --git a/clang/test/OpenMP/split_messages.cpp b/clang/test/OpenMP/split_messages.cpp new file mode 100644 index 0000000000000..b77a63ada9086 --- /dev/null +++ b/clang/test/OpenMP/split_messages.cpp @@ -0,0 +1,108 @@ +// OpenMP split / counts: parse and semantic diagnostics. +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++17 -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s + +void body(int); + +void parse_and_clause_errors() { + + // Malformed `counts` — missing '(' + // expected-error@+1 {{expected '('}} + #pragma omp split counts + ; + + // Empty `counts` list + // expected-error@+1 {{expected expression}} + #pragma omp split counts() + ; + + // Truncated list / missing ')' + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp split counts(3 + for (int i = 0; i < 7; ++i) + ; + + // Trailing comma only + // expected-error@+1 {{expected expression}} + #pragma omp split counts(3,) + ; + + // Expression after comma missing + // expected-error@+2 {{expected expression}} + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp split counts(3, + ; + + // Incomplete arithmetic in count (like `tile_messages` sizes(5+)) + // expected-error@+2 {{expected expression}} + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp split counts(5+ + ; + + // `for` keyword not a constant-expression operand + // expected-error@+1 {{expected expression}} + #pragma omp split counts(for) + ; + + // Duplicate `counts` clauses + // expected-error@+1 {{directive '#pragma omp split' cannot contain more than one 'counts' clause}} + #pragma omp split counts(2, omp_fill) counts(3, omp_fill) + for (int i = 0; i < 7; ++i) + ; + + // Disallowed extra clause + // expected-error@+1 {{unexpected OpenMP clause 'collapse' in directive '#pragma omp split'}} + #pragma omp split counts(2, omp_fill) collapse(2) + for (int i = 0; i < 7; ++i) + ; + + // Non-relational loop condition (canonical loop check) + #pragma omp split counts(omp_fill) + // expected-error@+1 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}} + for (int i = 0; i / 3 < 7; ++i) + ; + + // More than one `omp_fill` + // expected-error@+1 {{exactly one 'omp_fill' must appear in the 'counts' clause}} + #pragma omp split counts(omp_fill, omp_fill) + for (int i = 0; i < 10; ++i) + body(i); + + // No `omp_fill` at all — also triggers "exactly one" diagnostic. + // expected-error@+1 {{exactly one 'omp_fill' must appear in the 'counts' clause}} + #pragma omp split counts(2, 3) + for (int i = 0; i < 10; ++i) + body(i); + + // Positive: `omp_fill` may appear at any position in `counts` (not required to be last). + #pragma omp split counts(omp_fill, 2) + for (int i = 0; i < 10; ++i) + body(i); + + // OpenMP 6.0: non-`omp_fill` list items must be integral constant expressions. + { + int v = 3; // expected-note {{declared here}} + #pragma omp split counts(v, omp_fill) // expected-error {{expression is not an integral constant expression}} \ + // expected-note {{read of non-const variable 'v' is not allowed in a constant expression}} + for (int i = 0; i < 10; ++i) + body(i); + } +} + +void associated_statement_diagnostics() { + { + // expected-error@+2 {{expected statement}} + #pragma omp split counts(omp_fill) + } + + // Not a `for` loop (contrast `split_diag_errors.c` / `while`) + // expected-error@+2 {{statement after '#pragma omp split' must be a for loop}} + #pragma omp split counts(omp_fill) + int b = 0; + + // expected-warning@+2 {{extra tokens at the end of '#pragma omp split' are ignored}} + // expected-error@+1 {{directive '#pragma omp split' requires the 'counts' clause}} + #pragma omp split foo + for (int i = 0; i < 7; ++i) + ; +} diff --git a/clang/test/OpenMP/split_nested_outer_only.c b/clang/test/OpenMP/split_nested_outer_only.c new file mode 100644 index 0000000000000..578a11212c658 --- /dev/null +++ b/clang/test/OpenMP/split_nested_outer_only.c @@ -0,0 +1,12 @@ +// Split attaches to the outer canonical `for`; inner loop stays unsplit. +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s + +// Exactly one split IV — the outer loop; inner `for` uses plain `i`/`j` control flow. +// CHECK-COUNT-1: .split.iv +void f(void) { +#pragma omp split counts(omp_fill) + for (int i = 0; i < 4; ++i) + for (int j = 0; j < 4; ++j) { + } +} diff --git a/clang/test/OpenMP/split_offload_codegen.cpp b/clang/test/OpenMP/split_offload_codegen.cpp new file mode 100644 index 0000000000000..d212fdad14520 --- /dev/null +++ b/clang/test/OpenMP/split_offload_codegen.cpp @@ -0,0 +1,27 @@ +// Split inside `#pragma omp target` — host and device IR show `.split.iv`. +// +// RUN: %clang_cc1 -DCK_SPLIT -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - 2>&1 | FileCheck -check-prefix=HOST %s +// RUN: %clang_cc1 -DCK_SPLIT -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-split-host.bc +// RUN: %clang_cc1 -DCK_SPLIT -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-split-host.bc -o - 2>&1 | FileCheck -check-prefix=DEVICE %s + +// expected-no-diagnostics + +#ifdef CK_SPLIT +extern "C" void body(int); + +void host_split_in_target(int n) { +#pragma omp target map(to : n) + { +#pragma omp split counts(2, omp_fill) + for (int i = 0; i < n; ++i) + body(i); + } +} + +// HOST: define {{.*}}void {{.*}}host_split_in_target +// HOST: .split.iv +// HOST: __tgt_target_kernel + +// DEVICE: define {{.*}}void @__omp_offloading_ +// DEVICE: .split.iv +#endif diff --git a/clang/test/OpenMP/split_omp_fill.c b/clang/test/OpenMP/split_omp_fill.c new file mode 100644 index 0000000000000..f2e9a132dcf1e --- /dev/null +++ b/clang/test/OpenMP/split_omp_fill.c @@ -0,0 +1,36 @@ +/* Split + counts with omp_fill: syntax, AST dump, ast-print, IR. */ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s --check-prefix=LLVM + +void body(int); + +// PRINT-LABEL: void foo( +// DUMP-LABEL: FunctionDecl {{.*}} foo +void foo(int n) { + // PRINT: #pragma omp split counts(3, omp_fill) + // DUMP: OMPSplitDirective + // DUMP-NEXT: |-OMPCountsClause + // DUMP-NEXT: | |-ConstantExpr {{.*}} 'int' + // DUMP-NEXT: | | |-value: Int 3 + // DUMP-NEXT: | | `-IntegerLiteral {{.*}} 'int' 3 + // DUMP-NEXT: | `-{{.*}} + // DUMP-NEXT: {{.*}}`-ForStmt +#pragma omp split counts(3, omp_fill) + // PRINT: for (int i = 0; i < n; ++i) + for (int i = 0; i < n; ++i) + body(i); +} + +// LLVM-LABEL: define {{.*}}void @foo( +// LLVM: .split.iv.0.i +// LLVM: icmp slt i32 {{.*}}, 3 +// LLVM: call void @body( +// LLVM: store i32 3, ptr %.split.iv.1.i +// LLVM: icmp slt i32 {{.*}}, %{{.*}} +// LLVM: call void @body( diff --git a/clang/test/OpenMP/split_openmp_version.cpp b/clang/test/OpenMP/split_openmp_version.cpp new file mode 100644 index 0000000000000..d49d50970d0db --- /dev/null +++ b/clang/test/OpenMP/split_openmp_version.cpp @@ -0,0 +1,22 @@ +// `#pragma omp split` / `counts` require OpenMP 6.x in this implementation. +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++17 -fopenmp -fopenmp-version=60 -fsyntax-only -DONLY_OK -verify %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++17 -fopenmp -fopenmp-version=52 -fsyntax-only -DONLY_BAD52 -verify=expected52 %s + +#if defined(ONLY_OK) +void ok60(void) { +#pragma omp split counts(omp_fill) + for (int i = 0; i < 10; ++i) { + } +} +// expected-no-diagnostics +#endif + +#if defined(ONLY_BAD52) +// expected52-error@+2 {{unexpected OpenMP clause 'counts' in directive '#pragma omp split'}} +void bad52(void) { +#pragma omp split counts(omp_fill) + for (int i = 0; i < 10; ++i) { + } +} +#endif diff --git a/clang/test/OpenMP/split_opts_simd_debug.cpp b/clang/test/OpenMP/split_opts_simd_debug.cpp new file mode 100644 index 0000000000000..d378707ee66c7 --- /dev/null +++ b/clang/test/OpenMP/split_opts_simd_debug.cpp @@ -0,0 +1,30 @@ +// Optimized split IR at -O1; split + `-fopenmp-simd` syntax-only; -g debug-info smoke. +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O1 -emit-llvm -DTEST_BODY %s -o - | FileCheck %s --check-prefix=O1 +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp-simd -fopenmp-version=60 -fsyntax-only -verify -DTEST_SIMD %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm -debug-info-kind=limited -DTEST_BODY %s -o - | FileCheck %s --check-prefix=DBG + +extern "C" void body(int); + +#if defined(TEST_SIMD) +// expected-no-diagnostics +void simd_ok(int n) { +#pragma omp split counts(2, omp_fill) + for (int i = 0; i < n; ++i) + body(i); +} +#endif + +#if defined(TEST_BODY) +// O1-LABEL: define {{.*}} @_Z4testi +// O1: .split.iv +// DBG-LABEL: define {{.*}} @_Z4testi +// DBG: .split.iv +// DBG: !dbg +void test(int n) { +#pragma omp split counts(2, omp_fill) + for (int i = 0; i < n; ++i) + body(i); +} +#endif + diff --git a/clang/test/OpenMP/split_parallel_split.cpp b/clang/test/OpenMP/split_parallel_split.cpp new file mode 100644 index 0000000000000..bf30373f9bb8c --- /dev/null +++ b/clang/test/OpenMP/split_parallel_split.cpp @@ -0,0 +1,15 @@ +// Valid nesting — `split` inside `omp parallel` (contrast `teams` rejection in split_teams_nesting.cpp). +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: define {{.*}} @f( +// CHECK: __kmpc_fork_call +// CHECK: .split.iv +extern "C" void f(void) { +#pragma omp parallel + { +#pragma omp split counts(2, omp_fill) + for (int i = 0; i < 10; ++i) { + } + } +} diff --git a/clang/test/OpenMP/split_pch_codegen.cpp b/clang/test/OpenMP/split_pch_codegen.cpp new file mode 100644 index 0000000000000..c31028bebe5e0 --- /dev/null +++ b/clang/test/OpenMP/split_pch_codegen.cpp @@ -0,0 +1,43 @@ +// PCH round-trip for AST dump/print and host IR (split + counts). +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 + +#ifndef HEADER +#define HEADER + +extern "C" void body(int); + +// PRINT-LABEL: void foo( +// DUMP-LABEL: FunctionDecl {{.*}} foo +void foo(int n) { + // PRINT: #pragma omp split counts(3, omp_fill) + // DUMP: OMPSplitDirective + // DUMP-NEXT: OMPCountsClause + // DUMP: IntegerLiteral {{.*}} 3 +#pragma omp split counts(3, omp_fill) + // DUMP: ForStmt + for (int i = 0; i < n; ++i) + body(i); +} + +// CHECK1-LABEL: define {{.*}} @_Z3foo +// CHECK1: .split.iv +// CHECK1: icmp +// CHECK1: call void @body + +// CHECK2-LABEL: define {{.*}} @_Z3foo +// CHECK2: .split.iv +// CHECK2: icmp +// CHECK2: call void @body + +#endif /* HEADER */ diff --git a/clang/test/OpenMP/split_range_for_diag.cpp b/clang/test/OpenMP/split_range_for_diag.cpp new file mode 100644 index 0000000000000..2c6a4b50d84bd --- /dev/null +++ b/clang/test/OpenMP/split_range_for_diag.cpp @@ -0,0 +1,25 @@ +// C++ range-for + split: verify syntax, IR, and PreInits (range evaluated once). +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -std=c++17 -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -std=c++17 -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s + +extern "C" void body(int); + +// CHECK-LABEL: define dso_local void @_Z10range_fillv +// CHECK: __range +// CHECK: __begin +// CHECK: __end +// CHECK: .split.iv.0 +// CHECK: icmp slt i64 {{.*}}, 2 +// CHECK: call void @body +// CHECK: .split.iv.1 +// CHECK: icmp slt +// CHECK: call void @body +void range_fill() { + int a[] = {10, 20, 30, 40}; +#pragma omp split counts(2, omp_fill) + for (int x : a) + body(x); +} diff --git a/clang/test/OpenMP/split_serialize_module.cpp b/clang/test/OpenMP/split_serialize_module.cpp new file mode 100644 index 0000000000000..861e9a8bb8034 --- /dev/null +++ b/clang/test/OpenMP/split_serialize_module.cpp @@ -0,0 +1,24 @@ +// C++20 module interface with `#pragma omp split` — emit BMI + import; AST retains directive. +// +// RUN: rm -rf %t && split-file %s %t && cd %t +// RUN: %clang_cc1 -std=c++20 -fopenmp -fopenmp-version=60 -triple x86_64-unknown-linux-gnu %t/SplitMod.cppm -emit-module-interface -o %t/SplitMod.pcm +// RUN: %clang_cc1 -std=c++20 -fopenmp -fopenmp-version=60 -triple x86_64-unknown-linux-gnu %t/UseSplitMod.cpp -fmodule-file=SplitMod=%t/SplitMod.pcm -ast-dump-all | FileCheck %t/SplitMod.cppm + +// expected-no-diagnostics + +//--- SplitMod.cppm +module; +export module SplitMod; + +export void splitfoo(int n) { +// CHECK: OMPSplitDirective +// CHECK: OMPCountsClause +#pragma omp split counts(2, omp_fill) + for (int i = 0; i < n; ++i) { + } +} + +//--- UseSplitMod.cpp +import SplitMod; + +void g(void) { splitfoo(10); } diff --git a/clang/test/OpenMP/split_teams_nesting.cpp b/clang/test/OpenMP/split_teams_nesting.cpp new file mode 100644 index 0000000000000..1120a7ccae671 --- /dev/null +++ b/clang/test/OpenMP/split_teams_nesting.cpp @@ -0,0 +1,13 @@ +// Split is not valid nested inside `teams` (host diagnostic). +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s + +void g(void) { +#pragma omp teams + { +// expected-error@+1 {{region cannot be closely nested inside 'teams' region}} +#pragma omp split counts(2, omp_fill) + for (int i = 0; i < 10; ++i) { + } + } +} diff --git a/clang/test/OpenMP/split_template_nttp.cpp b/clang/test/OpenMP/split_template_nttp.cpp new file mode 100644 index 0000000000000..1cf0ee39c3c73 --- /dev/null +++ b/clang/test/OpenMP/split_template_nttp.cpp @@ -0,0 +1,15 @@ +// Non-type template parameter as counts operand — IR after instantiation. +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -std=c++17 -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: define {{.*}} @_Z1fILi5EEvv +// CHECK: .split.iv +// CHECK: icmp slt i32{{.*}} 5 +template +void f() { +#pragma omp split counts(N, omp_fill) + for (int i = 0; i < 20; ++i) { + } +} + +template void f<5>(); diff --git a/clang/test/OpenMP/split_templates.cpp b/clang/test/OpenMP/split_templates.cpp new file mode 100644 index 0000000000000..f6a4dfbfdc81b --- /dev/null +++ b/clang/test/OpenMP/split_templates.cpp @@ -0,0 +1,30 @@ +// Dependent template defers transformation; explicit instantiation emits IR. +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -std=c++17 -fopenmp -fopenmp-version=60 -ast-dump -DTEST_DEP %s | FileCheck %s --check-prefix=DEP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -std=c++17 -fopenmp -fopenmp-version=60 -O0 -emit-llvm -DTEST_INST %s -o - | FileCheck %s --check-prefix=LLVM + +extern "C" void body(int); + +#if defined(TEST_DEP) +template +void dep_split(T n) { +#pragma omp split counts(2, omp_fill) + for (T i = 0; i < n; ++i) + body((int)i); +} +// DEP-LABEL: dep_split +// DEP: OMPSplitDirective +// DEP: ForStmt +#endif + +#if defined(TEST_INST) +template +void dep_split(T n) { +#pragma omp split counts(2, omp_fill) + for (T i = 0; i < n; ++i) + body((int)i); +} +template void dep_split(int); +// LLVM: .split.iv +// LLVM: call void @body +#endif diff --git a/clang/test/OpenMP/split_trip_volatile.c b/clang/test/OpenMP/split_trip_volatile.c new file mode 100644 index 0000000000000..01b5e7f534d98 --- /dev/null +++ b/clang/test/OpenMP/split_trip_volatile.c @@ -0,0 +1,14 @@ +// Volatile trip count — IR shows `load volatile` of bound + split IVs (omp_fill segment). +// +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s + +volatile int n; + +// CHECK-LABEL: define {{.*}} @f +// CHECK: load volatile i32, ptr @n +// CHECK: .split.iv +void f(void) { +#pragma omp split counts(2, omp_fill) + for (int i = 0; i < n; ++i) { + } +} diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 3ee37ed2dfc27..f1532d0b064b3 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2362,6 +2362,11 @@ void OMPClauseEnqueue::VisitOMPSizesClause(const OMPSizesClause *C) { Visitor->AddStmt(E); } +void OMPClauseEnqueue::VisitOMPCountsClause(const OMPCountsClause *C) { + for (auto E : C->getCountsRefs()) + Visitor->AddStmt(E); +} + void OMPClauseEnqueue::VisitOMPPermutationClause( const OMPPermutationClause *C) { for (auto E : C->getArgsRefs()) @@ -6326,6 +6331,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPInterchangeDirective"); case CXCursor_OMPFuseDirective: return cxstring::createRef("OMPFuseDirective"); + case CXCursor_OMPSplitDirective: + return cxstring::createRef("OMPSplitDirective"); case CXCursor_OMPForDirective: return cxstring::createRef("OMPForDirective"); case CXCursor_OMPForSimdDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index d31d2c0c9bb67..242380c68c667 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -697,6 +697,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPReverseDirectiveClass: K = CXCursor_OMPReverseDirective; break; + case Stmt::OMPSplitDirectiveClass: + K = CXCursor_OMPSplitDirective; + break; case Stmt::OMPInterchangeDirectiveClass: K = CXCursor_OMPInterchangeDirective; break; diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp index 7338ff5f302f6..4190d4703e37d 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp @@ -7,7 +7,9 @@ //===----------------------------------------------------------------------===// #include "ASTMatchersTest.h" +#include "clang/AST/OpenMPClause.h" #include "clang/AST/PrettyPrinter.h" +#include "clang/AST/StmtOpenMP.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Tooling/Tooling.h" @@ -3103,6 +3105,66 @@ TEST(ASTMatchersTestOpenMP, OMPTargetUpdateDirective_CountExpression) { } } +// OpenMP 6 split directive / counts clause +TEST(ASTMatchersTestOpenMP, OMPSplitDirective) { + auto Matcher = stmt(ompSplitDirective(hasStructuredBlock(forStmt()))); + + StringRef SplitOk = R"( +void f() { +#pragma omp split counts(2, omp_fill) + for (int i = 0; i < 10; ++i) {} +} +)"; + EXPECT_TRUE(matchesWithOpenMP60(SplitOk, Matcher)); + + StringRef ParallelOnly = R"( +void f() { +#pragma omp parallel + ; +} +)"; + EXPECT_TRUE(notMatchesWithOpenMP60(ParallelOnly, Matcher)); +} + +TEST(ASTMatchersTestOpenMP, OMPSplitDirective_HasCountsClause) { + auto Matcher = stmt(ompSplitDirective(hasAnyClause(ompCountsClause()))); + + StringRef Source0 = R"( +void f() { +#pragma omp split counts(2, omp_fill) + for (int i = 0; i < 10; ++i) {} +} +)"; + EXPECT_TRUE(matchesWithOpenMP60(Source0, Matcher)); +} + +TEST(ASTMatchersTestOpenMP, OMPCountsClause_OmpFillOperand) { + StringRef Source0 = R"( +void f() { +#pragma omp split counts(1, omp_fill) + for (int i = 0; i < 10; ++i) {} +} +)"; + auto AST = tooling::buildASTFromCodeWithArgs( + Source0, {"-std=gnu++11", "-target", "i386-unknown-unknown", + "-fopenmp=libomp", "-fopenmp-version=60"}); + ASSERT_TRUE(AST); + auto Results = match(ompSplitDirective().bind("split"), AST->getASTContext()); + ASSERT_EQ(Results.size(), 1u); + const auto *Dir = Results[0].getNodeAs("split"); + ASSERT_TRUE(Dir); + const OMPCountsClause *Counts = nullptr; + for (OMPClause *C : Dir->clauses()) { + if ((Counts = dyn_cast(C))) + break; + } + ASSERT_TRUE(Counts); + ASSERT_EQ(Counts->getNumCounts(), 2u); + EXPECT_TRUE(Counts->hasOmpFill()); + EXPECT_EQ(*Counts->getOmpFillIndex(), 1u); + EXPECT_FALSE(Counts->getCountsRefs()[1]); +} + TEST(ASTMatchersTest, Finder_DynamicOnlyAcceptsSomeMatchers) { MatchFinder Finder; EXPECT_TRUE(Finder.addDynamicMatcher(decl(), nullptr)); diff --git a/clang/unittests/ASTMatchers/ASTMatchersTest.h b/clang/unittests/ASTMatchers/ASTMatchersTest.h index c1d4daea2c9f1..932e75360405b 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersTest.h +++ b/clang/unittests/ASTMatchers/ASTMatchersTest.h @@ -289,6 +289,20 @@ testing::AssertionResult notMatchesWithOpenMP51(const Twine &Code, {"-fopenmp=libomp", "-fopenmp-version=51"}); } +template +testing::AssertionResult matchesWithOpenMP60(const Twine &Code, + const T &AMatcher) { + return matchesConditionally(Code, AMatcher, true, + {"-fopenmp=libomp", "-fopenmp-version=60"}); +} + +template +testing::AssertionResult notMatchesWithOpenMP60(const Twine &Code, + const T &AMatcher) { + return matchesConditionally(Code, AMatcher, false, + {"-fopenmp=libomp", "-fopenmp-version=60"}); +} + template testing::AssertionResult matchesWithFixedpoint(const std::string &Code, const T &AMatcher) { diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index d1dddf76152ec..0f2074c549c83 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -142,6 +142,7 @@ def OMPC_CopyPrivate : Clause<[Spelling<"copyprivate">]> { let flangClass = "OmpObjectList"; } def OMPC_Counts : Clause<[Spelling<"counts">]> { + let clangClass = "OMPCountsClause"; } def OMPC_Default : Clause<[Spelling<"default">]> { let clangClass = "OMPDefaultClause"; @@ -1203,16 +1204,6 @@ def OMP_EndSingle : Directive<[Spelling<"end single">]> { let category = OMP_Single.category; let languages = [L_Fortran]; } -def OMP_Split : Directive<[Spelling<"split">]> { - let allowedClauses = [ - VersionedClause, - ]; - let allowedOnceClauses = [ - VersionedClause, - ]; - let association = AS_LoopNest; - let category = CA_Executable; -} def OMP_Target : Directive<[Spelling<"target">]> { let allowedClauses = [ VersionedClause, @@ -1435,6 +1426,16 @@ def OMP_Stripe : Directive<[Spelling<"stripe">]> { let association = AS_LoopNest; let category = CA_Executable; } +def OMP_Split : Directive<[Spelling<"split">]> { + let allowedOnceClauses = [ + VersionedClause, + ]; + let requiredClauses = [ + VersionedClause, + ]; + let association = AS_LoopNest; + let category = CA_Executable; +} def OMP_Unknown : Directive<[Spelling<"unknown">]> { let isDefault = true; let association = AS_None; diff --git a/openmp/runtime/test/transform/split/fill_first.c b/openmp/runtime/test/transform/split/fill_first.c new file mode 100644 index 0000000000000..12568f6896d18 --- /dev/null +++ b/openmp/runtime/test/transform/split/fill_first.c @@ -0,0 +1,23 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#include +#include + +int main() { + printf("do\n"); +#pragma omp split counts(omp_fill, 2) + for (int i = 0; i < 7; ++i) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: do +// CHECK-NEXT: i=0 +// CHECK-NEXT: i=1 +// CHECK-NEXT: i=2 +// CHECK-NEXT: i=3 +// CHECK-NEXT: i=4 +// CHECK-NEXT: i=5 +// CHECK-NEXT: i=6 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/split/foreach.cpp b/openmp/runtime/test/transform/split/foreach.cpp new file mode 100644 index 0000000000000..846449b9d86ca --- /dev/null +++ b/openmp/runtime/test/transform/split/foreach.cpp @@ -0,0 +1,24 @@ +// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines + +#include +#include +#include + +int main() { + std::vector v = {10, 20, 30, 40, 50, 60}; + printf("do\n"); +#pragma omp split counts(2, omp_fill) + for (int x : v) + printf("x=%d\n", x); + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: do +// CHECK-NEXT: x=10 +// CHECK-NEXT: x=20 +// CHECK-NEXT: x=30 +// CHECK-NEXT: x=40 +// CHECK-NEXT: x=50 +// CHECK-NEXT: x=60 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/split/intfor.c b/openmp/runtime/test/transform/split/intfor.c new file mode 100644 index 0000000000000..321fa5ca51f08 --- /dev/null +++ b/openmp/runtime/test/transform/split/intfor.c @@ -0,0 +1,26 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#include +#include + +int main() { + printf("do\n"); +#pragma omp split counts(3, omp_fill, 2) + for (int i = 0; i < 10; ++i) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: do +// CHECK-NEXT: i=0 +// CHECK-NEXT: i=1 +// CHECK-NEXT: i=2 +// CHECK-NEXT: i=3 +// CHECK-NEXT: i=4 +// CHECK-NEXT: i=5 +// CHECK-NEXT: i=6 +// CHECK-NEXT: i=7 +// CHECK-NEXT: i=8 +// CHECK-NEXT: i=9 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/split/intfor_negstart.c b/openmp/runtime/test/transform/split/intfor_negstart.c new file mode 100644 index 0000000000000..1e3860bba2d53 --- /dev/null +++ b/openmp/runtime/test/transform/split/intfor_negstart.c @@ -0,0 +1,27 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#include +#include + +int main() { + int n = 8; + printf("do\n"); +#pragma omp split counts(1, omp_fill, 1) + for (int i = -1; i <= n; ++i) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: do +// CHECK-NEXT: i=-1 +// CHECK-NEXT: i=0 +// CHECK-NEXT: i=1 +// CHECK-NEXT: i=2 +// CHECK-NEXT: i=3 +// CHECK-NEXT: i=4 +// CHECK-NEXT: i=5 +// CHECK-NEXT: i=6 +// CHECK-NEXT: i=7 +// CHECK-NEXT: i=8 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/split/iterfor.cpp b/openmp/runtime/test/transform/split/iterfor.cpp new file mode 100644 index 0000000000000..60ecbb374f6e3 --- /dev/null +++ b/openmp/runtime/test/transform/split/iterfor.cpp @@ -0,0 +1,139 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", this->pos, that.pos); + return this->pos == that.pos; + } + + bool operator!=(const Iterator &that) const { + owner->print("iterator %d != %d", this->pos, that.pos); + return this->pos != that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos += 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos += 1; + return result; + } + + int operator*() const { + owner->print("iterator deref: %d", pos); + return pos; + } + + size_t operator-(const Iterator &that) const { + int result = this->pos - that.pos; + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %d += %d", this->pos, steps); + return Iterator(owner, pos + steps); + } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 0); + } + + Iterator end() const { + print("end()"); + return Iterator(this, 4); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); + Reporter range("range"); +#pragma omp split counts(1, omp_fill, 1) + for (auto it = range.begin(); it != range.end(); ++it) + printf("v=%d\n", *it); + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: do +// CHECK: [range] ctor +// CHECK: v=0 +// CHECK: v=1 +// CHECK: v=2 +// CHECK: v=3 +// CHECK: done +// CHECK: [range] dtor diff --git a/openmp/runtime/test/transform/split/leq_bound.c b/openmp/runtime/test/transform/split/leq_bound.c new file mode 100644 index 0000000000000..81061fe430ae4 --- /dev/null +++ b/openmp/runtime/test/transform/split/leq_bound.c @@ -0,0 +1,22 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#include +#include + +int main() { + int n = 4; + printf("do\n"); +#pragma omp split counts(2, omp_fill) + for (int i = 0; i <= n; ++i) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: do +// CHECK-NEXT: i=0 +// CHECK-NEXT: i=1 +// CHECK-NEXT: i=2 +// CHECK-NEXT: i=3 +// CHECK-NEXT: i=4 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/split/lit.local.cfg b/openmp/runtime/test/transform/split/lit.local.cfg new file mode 100644 index 0000000000000..beb73ea04165d --- /dev/null +++ b/openmp/runtime/test/transform/split/lit.local.cfg @@ -0,0 +1,5 @@ +# The split directive's counts clause requires OpenMP 6.0. +for i, (pattern, replacement) in enumerate(config.substitutions): + if pattern == "%openmp_flags": + config.substitutions[i] = (pattern, replacement + " -fopenmp-version=60") + break diff --git a/openmp/runtime/test/transform/split/negative_incr.c b/openmp/runtime/test/transform/split/negative_incr.c new file mode 100644 index 0000000000000..ce537db067f06 --- /dev/null +++ b/openmp/runtime/test/transform/split/negative_incr.c @@ -0,0 +1,22 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#include +#include + +int main() { + printf("do\n"); +#pragma omp split counts(1, omp_fill, 1) + for (int i = 5; i >= 0; --i) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: do +// CHECK-NEXT: i=5 +// CHECK-NEXT: i=4 +// CHECK-NEXT: i=3 +// CHECK-NEXT: i=2 +// CHECK-NEXT: i=1 +// CHECK-NEXT: i=0 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/split/nonconstant_incr.c b/openmp/runtime/test/transform/split/nonconstant_incr.c new file mode 100644 index 0000000000000..d594a437f2ab2 --- /dev/null +++ b/openmp/runtime/test/transform/split/nonconstant_incr.c @@ -0,0 +1,22 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#include +#include + +int main() { + int n = 19; + int c = 3; + printf("do\n"); +#pragma omp split counts(1, omp_fill, 1) + for (int i = 7; i < n; i += c) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: do +// CHECK-NEXT: i=7 +// CHECK-NEXT: i=10 +// CHECK-NEXT: i=13 +// CHECK-NEXT: i=16 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/split/parallel-split-intfor.c b/openmp/runtime/test/transform/split/parallel-split-intfor.c new file mode 100644 index 0000000000000..0b9bd7df5027e --- /dev/null +++ b/openmp/runtime/test/transform/split/parallel-split-intfor.c @@ -0,0 +1,27 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#include +#include + +int main() { + printf("do\n"); +#pragma omp parallel num_threads(1) + { +#pragma omp split counts(2, omp_fill, 2) + for (int i = 0; i < 8; ++i) + printf("i=%d\n", i); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: do +// CHECK-NEXT: i=0 +// CHECK-NEXT: i=1 +// CHECK-NEXT: i=2 +// CHECK-NEXT: i=3 +// CHECK-NEXT: i=4 +// CHECK-NEXT: i=5 +// CHECK-NEXT: i=6 +// CHECK-NEXT: i=7 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/split/single_fill.c b/openmp/runtime/test/transform/split/single_fill.c new file mode 100644 index 0000000000000..4ef10bf3b4d26 --- /dev/null +++ b/openmp/runtime/test/transform/split/single_fill.c @@ -0,0 +1,23 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#include +#include + +int main() { + int n = 6; + printf("do\n"); +#pragma omp split counts(omp_fill) + for (int i = 0; i < n; ++i) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: do +// CHECK-NEXT: i=0 +// CHECK-NEXT: i=1 +// CHECK-NEXT: i=2 +// CHECK-NEXT: i=3 +// CHECK-NEXT: i=4 +// CHECK-NEXT: i=5 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/split/three_segments.c b/openmp/runtime/test/transform/split/three_segments.c new file mode 100644 index 0000000000000..f34b640a86710 --- /dev/null +++ b/openmp/runtime/test/transform/split/three_segments.c @@ -0,0 +1,26 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#include +#include + +int main() { + printf("do\n"); +#pragma omp split counts(2, 2, omp_fill) + for (int i = 0; i < 10; ++i) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: do +// CHECK-NEXT: i=0 +// CHECK-NEXT: i=1 +// CHECK-NEXT: i=2 +// CHECK-NEXT: i=3 +// CHECK-NEXT: i=4 +// CHECK-NEXT: i=5 +// CHECK-NEXT: i=6 +// CHECK-NEXT: i=7 +// CHECK-NEXT: i=8 +// CHECK-NEXT: i=9 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/split/trip_one.c b/openmp/runtime/test/transform/split/trip_one.c new file mode 100644 index 0000000000000..5f00d25239685 --- /dev/null +++ b/openmp/runtime/test/transform/split/trip_one.c @@ -0,0 +1,32 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +// Tiny trip counts: trip=1 with counts(1, omp_fill) and trip=0. + +#include +#include + +int main() { + int n; + + n = 1; + printf("trip1\n"); +#pragma omp split counts(1, omp_fill) + for (int i = 0; i < n; ++i) + printf("i=%d\n", i); + printf("end1\n"); + + n = 0; + printf("trip0\n"); +#pragma omp split counts(omp_fill) + for (int i = 0; i < n; ++i) + printf("i=%d\n", i); + printf("end0\n"); + + return EXIT_SUCCESS; +} + +// CHECK: trip1 +// CHECK-NEXT: i=0 +// CHECK-NEXT: end1 +// CHECK-NEXT: trip0 +// CHECK-NEXT: end0 diff --git a/openmp/runtime/test/transform/split/unsigned_iv.c b/openmp/runtime/test/transform/split/unsigned_iv.c new file mode 100644 index 0000000000000..ad096122d8cea --- /dev/null +++ b/openmp/runtime/test/transform/split/unsigned_iv.c @@ -0,0 +1,24 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#include +#include + +int main() { + printf("do\n"); +#pragma omp split counts(3, omp_fill) + for (unsigned i = 0; i < 8; ++i) + printf("i=%u\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: do +// CHECK-NEXT: i=0 +// CHECK-NEXT: i=1 +// CHECK-NEXT: i=2 +// CHECK-NEXT: i=3 +// CHECK-NEXT: i=4 +// CHECK-NEXT: i=5 +// CHECK-NEXT: i=6 +// CHECK-NEXT: i=7 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/split/zero_first_segment.c b/openmp/runtime/test/transform/split/zero_first_segment.c new file mode 100644 index 0000000000000..09cc526ed81ec --- /dev/null +++ b/openmp/runtime/test/transform/split/zero_first_segment.c @@ -0,0 +1,21 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#include +#include + +int main() { + printf("do\n"); +#pragma omp split counts(0, omp_fill) + for (int i = 0; i < 5; ++i) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: do +// CHECK-NEXT: i=0 +// CHECK-NEXT: i=1 +// CHECK-NEXT: i=2 +// CHECK-NEXT: i=3 +// CHECK-NEXT: i=4 +// CHECK-NEXT: done