diff --git a/flang/include/flang/Parser/openmp-utils.h b/flang/include/flang/Parser/openmp-utils.h index 36556f8dd7f4a..7396e57144b90 100644 --- a/flang/include/flang/Parser/openmp-utils.h +++ b/flang/include/flang/Parser/openmp-utils.h @@ -123,6 +123,9 @@ template OmpDirectiveName GetOmpDirectiveName(const T &x) { const OpenMPDeclarativeConstruct *GetOmp(const DeclarationConstruct &x); const OpenMPConstruct *GetOmp(const ExecutionPartConstruct &x); +const OpenMPLoopConstruct *GetOmpLoop(const ExecutionPartConstruct &x); +const DoConstruct *GetDoConstruct(const ExecutionPartConstruct &x); + const OmpObjectList *GetOmpObjectList(const OmpClause &clause); template diff --git a/flang/include/flang/Semantics/openmp-directive-sets.h b/flang/include/flang/Semantics/openmp-directive-sets.h index 01e8481e05721..609a7be700c28 100644 --- a/flang/include/flang/Semantics/openmp-directive-sets.h +++ b/flang/include/flang/Semantics/openmp-directive-sets.h @@ -275,10 +275,17 @@ static const OmpDirectiveSet loopConstructSet{ Directive::OMPD_teams_distribute_parallel_do_simd, Directive::OMPD_teams_distribute_simd, Directive::OMPD_teams_loop, + Directive::OMPD_fuse, Directive::OMPD_tile, Directive::OMPD_unroll, }; +static const OmpDirectiveSet loopTransformationSet{ + Directive::OMPD_tile, + Directive::OMPD_unroll, + Directive::OMPD_fuse, +}; + static const OmpDirectiveSet nonPartialVarSet{ Directive::OMPD_allocate, Directive::OMPD_allocators, diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 4a392381287d5..ab3a174c7ad69 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -279,6 +279,7 @@ bool ClauseProcessor::processCollapse( llvm::SmallVectorImpl &iv) const { int64_t numCollapse = collectLoopRelatedInfo(converter, currentLocation, eval, + eval.getFirstNestedEvaluation(), clauses, loopResult, iv); fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); collapseResult.collapseNumLoops = firOpBuilder.getI64IntegerAttr(numCollapse); diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index b1a3c3d3c5439..f2defc62dce91 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -1063,7 +1063,10 @@ Link make(const parser::OmpClause::Link &inp, LoopRange make(const parser::OmpClause::Looprange &inp, semantics::SemanticsContext &semaCtx) { - llvm_unreachable("Unimplemented: looprange"); + auto &t0 = std::get<0>(inp.v.t); + auto &t1 = std::get<1>(inp.v.t); + return LoopRange{{/*First*/ makeExpr(t0, semaCtx), + /*Count*/ makeExpr(t1, semaCtx)}}; } Map make(const parser::OmpClause::Map &inp, diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index 83c2eda0a2dc7..da9480123513f 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -347,7 +347,8 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { mlir::omp::LoopRelatedClauseOps result; llvm::SmallVector iv; collectLoopRelatedInfo(converter, converter.getCurrentLocation(), eval, - clauses, result, iv); + eval.getFirstNestedEvaluation(), clauses, result, + iv); // Update the original variable just before exiting the worksharing // loop. Conversion as follows: diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index c6487349c4056..2d981f421a4ae 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1982,9 +1982,9 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, static void genCanonicalLoopNest( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, size_t numLoops, - llvm::SmallVectorImpl &loops) { + lower::pft::Evaluation &nestedEval, mlir::Location loc, + const ConstructQueue &queue, ConstructQueue::const_iterator item, + size_t numLoops, llvm::SmallVectorImpl &loops) { assert(loops.empty() && "Expecting empty list to fill"); assert(numLoops >= 1 && "Expecting at least one loop"); @@ -1992,7 +1992,8 @@ static void genCanonicalLoopNest( mlir::omp::LoopRelatedClauseOps loopInfo; llvm::SmallVector ivs; - collectLoopRelatedInfo(converter, loc, eval, numLoops, loopInfo, ivs); + collectLoopRelatedInfo(converter, loc, eval, nestedEval, numLoops, loopInfo, + ivs); assert(ivs.size() == numLoops && "Expected to parse as many loop variables as there are loops"); @@ -2014,7 +2015,7 @@ static void genCanonicalLoopNest( // Step 1: Loop prologues // Computing the trip count must happen before entering the outermost loop - lower::pft::Evaluation *innermostEval = &eval.getFirstNestedEvaluation(); + lower::pft::Evaluation *innermostEval = &nestedEval; for ([[maybe_unused]] auto iv : ivs) { if (innermostEval->getIf()->IsDoConcurrent()) { // OpenMP specifies DO CONCURRENT only with the `!omp loop` construct. @@ -2186,7 +2187,8 @@ static void genTileOp(Fortran::lower::AbstractConverter &converter, llvm::SmallVector canonLoops; canonLoops.reserve(numLoops); - genCanonicalLoopNest(converter, symTable, semaCtx, eval, loc, queue, item, + genCanonicalLoopNest(converter, symTable, semaCtx, eval, + eval.getFirstNestedEvaluation(), loc, queue, item, numLoops, canonLoops); assert((canonLoops.size() == numLoops) && "Expecting the predetermined number of loops"); @@ -2217,6 +2219,58 @@ static void genTileOp(Fortran::lower::AbstractConverter &converter, sizesClause.sizes); } +static void genFuseOp(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + lower::StatementContext &stmtCtx, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + int32_t first = 0; + int32_t count = 0; + auto iter = llvm::find_if(item->clauses, [](const Clause &clause) { + return clause.id == llvm::omp::Clause::OMPC_looprange; + }); + if (iter != item->clauses.end()) { + const auto &looprange = std::get(iter->u); + first = evaluate::ToInt64(std::get<0>(looprange.t)).value(); + count = evaluate::ToInt64(std::get<1>(looprange.t)).value(); + } + + llvm::SmallVector applyees; + for (auto &child : eval.getNestedEvaluations()) { + // Skip OmpEndLoopDirective + if (&child == &eval.getLastNestedEvaluation()) + break; + + // Emit the associated loop + llvm::SmallVector canonLoops; + genCanonicalLoopNest(converter, symTable, semaCtx, eval, child, loc, queue, + item, 1, canonLoops); + + auto cli = llvm::getSingleElement(canonLoops).getCli(); + applyees.push_back(cli); + } + // One generated loop + one for each loop not inside the specified looprange + // if present + llvm::SmallVector generatees; + int64_t numGeneratees = count == 0 ? 1 : applyees.size() - count + 1; + for (int i = 0; i < numGeneratees; i++) { + auto fusedCLI = mlir::omp::NewCliOp::create(firOpBuilder, loc); + generatees.push_back(fusedCLI); + } + auto op = mlir::omp::FuseOp::create(firOpBuilder, loc, generatees, applyees); + + if (count != 0) { + mlir::IntegerAttr firstAttr = firOpBuilder.getI32IntegerAttr(first); + mlir::IntegerAttr countAttr = firOpBuilder.getI32IntegerAttr(count); + op->setAttr("first", firstAttr); + op->setAttr("count", countAttr); + } +} + static void genUnrollOp(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, lower::StatementContext &stmtCtx, @@ -2233,7 +2287,8 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, // Emit the associated loop llvm::SmallVector canonLoops; - genCanonicalLoopNest(converter, symTable, semaCtx, eval, loc, queue, item, 1, + genCanonicalLoopNest(converter, symTable, semaCtx, eval, + eval.getFirstNestedEvaluation(), loc, queue, item, 1, canonLoops); llvm::SmallVector applyees; @@ -3507,6 +3562,9 @@ static void genOMPDispatch(lower::AbstractConverter &converter, case llvm::omp::Directive::OMPD_tile: genTileOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; + case llvm::omp::Directive::OMPD_fuse: + genFuseOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); + break; case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; @@ -3962,22 +4020,24 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, mlir::Location currentLocation = converter.genLocation(beginSpec.source); - if (const parser::OpenMPLoopConstruct *ompNestedLoopCons = - loopConstruct.GetNestedConstruct()) { - llvm::omp::Directive nestedDirective = - parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; - switch (nestedDirective) { - case llvm::omp::Directive::OMPD_tile: - // Skip OMPD_tile since the tile sizes will be retrieved when - // generating the omp.loop_nest op. - break; - default: { - unsigned version = semaCtx.langOptions().OpenMPVersion; - TODO(currentLocation, - "Applying a loop-associated on the loop generated by the " + - llvm::omp::getOpenMPDirectiveName(nestedDirective, version) + - " construct"); - } + for (auto &construct : std::get(loopConstruct.t)) { + if (const parser::OpenMPLoopConstruct *ompNestedLoopCons = + parser::omp::GetOmpLoop(construct)) { + llvm::omp::Directive nestedDirective = + parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; + switch (nestedDirective) { + case llvm::omp::Directive::OMPD_tile: + // Skip OMPD_tile since the tile sizes will be retrieved when + // generating the omp.loop_nest op. + break; + default: { + unsigned version = semaCtx.langOptions().OpenMPVersion; + TODO(currentLocation, + "Applying a loop-associated on the loop generated by the " + + llvm::omp::getOpenMPDirectiveName(nestedDirective, version) + + " construct"); + } + } } } diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 7d7a4869ab3a6..913e4d1e69500 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -812,13 +812,14 @@ void collectTileSizesFromOpenMPConstruct( int64_t collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, - lower::pft::Evaluation &eval, const omp::List &clauses, + lower::pft::Evaluation &eval, lower::pft::Evaluation &nestedEval, + const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { int64_t numCollapse = 1; // Collect the loops to collapse. - lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); + lower::pft::Evaluation *doConstructEval = &nestedEval; if (doConstructEval->getIf()->IsDoConcurrent()) { TODO(currentLocation, "Do Concurrent in Worksharing loop construct"); } @@ -830,21 +831,21 @@ int64_t collectLoopRelatedInfo( numCollapse = collapseValue; } - collectLoopRelatedInfo(converter, currentLocation, eval, numCollapse, result, - iv); + collectLoopRelatedInfo(converter, currentLocation, eval, nestedEval, + numCollapse, result, iv); return numCollapse; } void collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, - lower::pft::Evaluation &eval, int64_t numCollapse, - mlir::omp::LoopRelatedClauseOps &result, + lower::pft::Evaluation &eval, lower::pft::Evaluation &nestedEval, + int64_t numCollapse, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // Collect the loops to collapse. - lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); + lower::pft::Evaluation *doConstructEval = &nestedEval; if (doConstructEval->getIf()->IsDoConcurrent()) { TODO(currentLocation, "Do Concurrent in Worksharing loop construct"); } @@ -852,10 +853,15 @@ void collectLoopRelatedInfo( // Collect sizes from tile directive if present. std::int64_t sizesLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { - processTileSizesFromOpenMPConstruct( - ompCons, [&](const parser::OmpClause::Sizes *tclause) { - sizesLengthValue = tclause->v.size(); - }); + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const parser::OmpDirectiveSpecification &beginSpec{ompLoop->BeginDir()}; + if (beginSpec.DirId() == llvm::omp::Directive::OMPD_tile) { + processTileSizesFromOpenMPConstruct( + ompCons, [&](const parser::OmpClause::Sizes *tclause) { + sizesLengthValue = tclause->v.size(); + }); + } + } } std::int64_t collapseValue = std::max(numCollapse, sizesLengthValue); diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 2960b663b08b2..886a5c1835f7e 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -169,13 +169,15 @@ void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp, int64_t collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, - lower::pft::Evaluation &eval, const omp::List &clauses, + lower::pft::Evaluation &eval, lower::pft::Evaluation &nestedEval, + const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv); void collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, - lower::pft::Evaluation &eval, std::int64_t collapseValue, + lower::pft::Evaluation &eval, lower::pft::Evaluation &nestedEval, + std::int64_t collapseValue, // const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv); diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index e2da60ed19de8..231eea8841d4b 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -2260,6 +2260,7 @@ static constexpr DirectiveSet GetLoopDirectives() { unsigned(Directive::OMPD_teams_distribute_parallel_do_simd), unsigned(Directive::OMPD_teams_distribute_simd), unsigned(Directive::OMPD_teams_loop), + unsigned(Directive::OMPD_fuse), unsigned(Directive::OMPD_tile), unsigned(Directive::OMPD_unroll), }; diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp index 2424828293c73..dfe8dbdd5ac9e 100644 --- a/flang/lib/Parser/openmp-utils.cpp +++ b/flang/lib/Parser/openmp-utils.cpp @@ -41,6 +41,23 @@ const OpenMPConstruct *GetOmp(const ExecutionPartConstruct &x) { return nullptr; } +const OpenMPLoopConstruct *GetOmpLoop(const ExecutionPartConstruct &x) { + if (auto *construct{GetOmp(x)}) { + if (auto *omp{std::get_if(&construct->u)}) { + return omp; + } + } + return nullptr; +} +const DoConstruct *GetDoConstruct(const ExecutionPartConstruct &x) { + if (auto *y{std::get_if(&x.u)}) { + if (auto *z{std::get_if>(&y->u)}) { + return &z->value(); + } + } + return nullptr; +} + const OmpObjectList *GetOmpObjectList(const OmpClause &clause) { // Clauses with OmpObjectList as its data member using MemberObjectListClauses = std::tuple(x.t)}; nextIt = it; - while (++nextIt != block.end()) { + nextIt++; + while (nextIt != block.end()) { // Ignore compiler directives. - if (GetConstructIf(*nextIt)) + if (GetConstructIf(*nextIt)) { + nextIt++; continue; + } if (auto *doCons{GetConstructIf(*nextIt)}) { if (doCons->GetLoopControl()) { @@ -160,9 +171,12 @@ class CanonicalizationOfOmp { if (nextIt != block.end()) { if (auto *endDir{ GetConstructIf(*nextIt)}) { - std::get>(x.t) = - std::move(*endDir); - nextIt = block.erase(nextIt); + auto &endDirName = endDir->DirName(); + if (endDirName.v != llvm::omp::Directive::OMPD_fuse) { + std::get>(x.t) = + std::move(*endDir); + nextIt = block.erase(nextIt); + } } } } else { @@ -172,50 +186,45 @@ class CanonicalizationOfOmp { } } else if (auto *ompLoopCons{ GetOmpIf(*nextIt)}) { - // We should allow UNROLL and TILE constructs to be inserted between an - // OpenMP Loop Construct and the DO loop itself + // We should allow loop transformation constructs to be inserted between + // an OpenMP Loop Construct and the DO loop itself auto &nestedBeginDirective = ompLoopCons->BeginDir(); auto &nestedBeginName = nestedBeginDirective.DirName(); - if ((nestedBeginName.v == llvm::omp::Directive::OMPD_unroll || - nestedBeginName.v == llvm::omp::Directive::OMPD_tile) && - !(nestedBeginName.v == llvm::omp::Directive::OMPD_unroll && - beginName.v == llvm::omp::Directive::OMPD_tile)) { - // iterate through the remaining block items to find the end directive - // for the unroll/tile directive. - parser::Block::iterator endIt; - endIt = nextIt; - while (endIt != block.end()) { - if (auto *endDir{ - GetConstructIf(*endIt)}) { - auto &endDirName = endDir->DirName(); - if (endDirName.v == beginName.v) { - std::get>(x.t) = - std::move(*endDir); - endIt = block.erase(endIt); - continue; + if (llvm::omp::loopTransformationSet.test(nestedBeginName.v)) { + if (nestedBeginName.v == llvm::omp::Directive::OMPD_unroll && + llvm::omp::loopTransformationSet.test(beginName.v)) { + // if a loop has been unrolled, the user can not then transform that + // loop as it has been unrolled + const parser::OmpClauseList &unrollClauseList{ + nestedBeginDirective.Clauses()}; + if (unrollClauseList.v.empty()) { + // if the clause list is empty for an unroll construct, we assume + // the loop is being fully unrolled + transformUnrollError(beginName, messages_); + } else { + // parse the clauses for the unroll directive to find the full + // clause + for (auto &clause : unrollClauseList.v) { + if (clause.Id() == llvm::omp::OMPC_full) { + transformUnrollError(beginName, messages_); + } } } - ++endIt; } RewriteOpenMPLoopConstruct(*ompLoopCons, block, nextIt); body.push_back(std::move(*nextIt)); nextIt = block.erase(nextIt); - } else if (nestedBeginName.v == llvm::omp::Directive::OMPD_unroll && - beginName.v == llvm::omp::Directive::OMPD_tile) { - // if a loop has been unrolled, the user can not then tile that loop - // as it has been unrolled - const parser::OmpClauseList &unrollClauseList{ - nestedBeginDirective.Clauses()}; - if (unrollClauseList.v.empty()) { - // if the clause list is empty for an unroll construct, we assume - // the loop is being fully unrolled - tileUnrollError(beginName, messages_); - } else { - // parse the clauses for the unroll directive to find the full - // clause - for (auto &clause : unrollClauseList.v) { - if (clause.Id() == llvm::omp::OMPC_full) { - tileUnrollError(beginName, messages_); + // check the following block item to find the end directive + // for the loop transform directive. + if (nextIt != block.end()) { + if (auto *endDir{ + GetConstructIf(*nextIt)}) { + auto &endDirName = endDir->DirName(); + if (endDirName.v == beginName.v && + endDirName.v != llvm::omp::Directive::OMPD_fuse) { + std::get>(x.t) = + std::move(*endDir); + nextIt = block.erase(nextIt); } } } @@ -227,11 +236,29 @@ class CanonicalizationOfOmp { } else { missingDoConstruct(beginName, messages_); } + + if (endFuseNeeded && nextIt != block.end()) { + if (auto *endDir{ + GetConstructIf(*nextIt)}) { + auto &endDirName = endDir->DirName(); + if (endDirName.v == llvm::omp::Directive::OMPD_fuse) { + endFuseNeeded = false; + std::get>(x.t) = + std::move(*endDir); + nextIt = block.erase(nextIt); + } + } + } + if (endFuseNeeded) + continue; // If we get here, we either found a loop, or issued an error message. return; } if (nextIt == block.end()) { - missingDoConstruct(beginName, messages_); + if (endFuseNeeded) + missingEndFuse(beginName, messages_); + else + missingDoConstruct(beginName, messages_); } } diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp index 3d3596b500880..13581008433a6 100644 --- a/flang/lib/Semantics/check-omp-loop.cpp +++ b/flang/lib/Semantics/check-omp-loop.cpp @@ -285,9 +285,11 @@ void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) { } SetLoopInfo(x); - if (const auto *doConstruct{x.GetNestedLoop()}) { - const auto &doBlock{std::get(doConstruct->t)}; - CheckNoBranching(doBlock, beginName.v, beginName.source); + for (auto &construct : std::get(x.t)) { + if (const auto *doConstruct{parser::omp::GetDoConstruct(construct)}) { + const auto &doBlock{std::get(doConstruct->t)}; + CheckNoBranching(doBlock, beginName.v, beginName.source); + } } CheckLoopItrVariableIsInt(x); CheckAssociatedLoopConstraints(x); @@ -301,6 +303,11 @@ void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) { beginName.v == llvm::omp::Directive::OMPD_distribute_simd) { CheckDistLinear(x); } + if (beginName.v == llvm::omp::Directive::OMPD_fuse) { + CheckLooprangeBounds(x); + } else { + CheckNestedFuse(x); + } } const parser::Name OmpStructureChecker::GetLoopIndex( @@ -320,24 +327,28 @@ void OmpStructureChecker::SetLoopInfo(const parser::OpenMPLoopConstruct &x) { void OmpStructureChecker::CheckLoopItrVariableIsInt( const parser::OpenMPLoopConstruct &x) { - for (const parser::DoConstruct *loop{x.GetNestedLoop()}; loop;) { - if (loop->IsDoNormal()) { - const parser::Name &itrVal{GetLoopIndex(loop)}; - if (itrVal.symbol) { - const auto *type{itrVal.symbol->GetType()}; - if (!type->IsNumeric(TypeCategory::Integer)) { - context_.Say(itrVal.source, - "The DO loop iteration" - " variable must be of the type integer."_err_en_US, - itrVal.ToString()); + for (auto &construct : std::get(x.t)) { + for (const parser::DoConstruct *loop{ + parser::omp::GetDoConstruct(construct)}; + loop;) { + if (loop->IsDoNormal()) { + const parser::Name &itrVal{GetLoopIndex(loop)}; + if (itrVal.symbol) { + const auto *type{itrVal.symbol->GetType()}; + if (!type->IsNumeric(TypeCategory::Integer)) { + context_.Say(itrVal.source, + "The DO loop iteration" + " variable must be of the type integer."_err_en_US, + itrVal.ToString()); + } } } + // Get the next DoConstruct if block is not empty. + const auto &block{std::get(loop->t)}; + const auto it{block.begin()}; + loop = it != block.end() ? parser::Unwrap(*it) + : nullptr; } - // Get the next DoConstruct if block is not empty. - const auto &block{std::get(loop->t)}; - const auto it{block.begin()}; - loop = - it != block.end() ? parser::Unwrap(*it) : nullptr; } } @@ -401,23 +412,28 @@ void OmpStructureChecker::CheckDistLinear( // Match the loop index variables with the collected symbols from linear // clauses. - for (const parser::DoConstruct *loop{x.GetNestedLoop()}; loop;) { - if (loop->IsDoNormal()) { - const parser::Name &itrVal{GetLoopIndex(loop)}; - if (itrVal.symbol) { - // Remove the symbol from the collected set - indexVars.erase(&itrVal.symbol->GetUltimate()); - } - collapseVal--; - if (collapseVal == 0) { - break; + for (auto &construct : std::get(x.t)) { + std::int64_t curCollapseVal{collapseVal}; + for (const parser::DoConstruct *loop{ + parser::omp::GetDoConstruct(construct)}; + loop;) { + if (loop->IsDoNormal()) { + const parser::Name &itrVal{GetLoopIndex(loop)}; + if (itrVal.symbol) { + // Remove the symbol from the collected set + indexVars.erase(&itrVal.symbol->GetUltimate()); + } + curCollapseVal--; + if (curCollapseVal == 0) { + break; + } } + // Get the next DoConstruct if block is not empty. + const auto &block{std::get(loop->t)}; + const auto it{block.begin()}; + loop = it != block.end() ? parser::Unwrap(*it) + : nullptr; } - // Get the next DoConstruct if block is not empty. - const auto &block{std::get(loop->t)}; - const auto it{block.begin()}; - loop = it != block.end() ? parser::Unwrap(*it) - : nullptr; } // Show error for the remaining variables @@ -430,6 +446,63 @@ void OmpStructureChecker::CheckDistLinear( } } +void OmpStructureChecker::CheckLooprangeBounds( + const parser::OpenMPLoopConstruct &x) { + const parser::OmpClauseList &clauseList{x.BeginDir().Clauses()}; + if (clauseList.v.empty()) { + return; + } + for (auto &clause : clauseList.v) { + if (const auto *lrClause{ + std::get_if(&clause.u)}) { + auto first{GetIntValue(std::get<0>((lrClause->v).t))}; + auto count{GetIntValue(std::get<1>((lrClause->v).t))}; + if (!first || !count) { + return; + } + auto &loopConsList{std::get(x.t)}; + if (*first > 0 && *count > 0 && + loopConsList.size() < (unsigned)(*first + *count - 1)) { + context_.Say(clause.source, + "The loop range indicated in the %s clause must not be out of the bounds of the Loop Sequence following the construct."_err_en_US, + parser::ToUpperCaseLetters(clause.source.ToString())); + } + return; + } + } +} + +void OmpStructureChecker::CheckNestedFuse( + const parser::OpenMPLoopConstruct &x) { + auto &loopConsList{std::get(x.t)}; + assert(loopConsList.size() == 1 && "Not Expecting a loop sequence"); + const auto *ompConstruct{parser::omp::GetOmpLoop(loopConsList.front())}; + if (!ompConstruct) { + return; + } + const parser::OmpClauseList &clauseList{ompConstruct->BeginDir().Clauses()}; + if (clauseList.v.empty()) { + return; + } + for (auto &clause : clauseList.v) { + if (const auto *lrClause{ + std::get_if(&clause.u)}) { + auto count{GetIntValue(std::get<1>((lrClause->v).t))}; + if (!count) { + return; + } + auto &nestedLoopConsList{std::get(ompConstruct->t)}; + if (nestedLoopConsList.size() > (unsigned)(*count)) { + context_.Say(x.BeginDir().DirName().source, + "The loop sequence following the %s construct must be fully fused first."_err_en_US, + parser::ToUpperCaseLetters( + x.BeginDir().DirName().source.ToString())); + } + return; + } + } +} + void OmpStructureChecker::Leave(const parser::OpenMPLoopConstruct &x) { const parser::OmpClauseList &clauseList{x.BeginDir().Clauses()}; diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 37b4404cc598f..63751fd0c8abd 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -3401,9 +3401,11 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Sizes &c) { } void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) { - context_.Say(GetContext().clauseSource, - "LOOPRANGE clause is not implemented yet"_err_en_US, - ContextDirectiveAsFortran()); + CheckAllowedClause(llvm::omp::Clause::OMPC_looprange); + auto &first = std::get<0>(x.v.t); + auto &count = std::get<1>(x.v.t); + RequiresConstantPositiveParameter(llvm::omp::Clause::OMPC_looprange, count); + RequiresConstantPositiveParameter(llvm::omp::Clause::OMPC_looprange, first); } // Restrictions specific to each clause are implemented apart from the diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 1b84bc5dda471..a4d74398378d2 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -316,6 +316,8 @@ class OmpStructureChecker : public OmpStructureCheckerBase { void CheckAtomicWrite(const parser::OpenMPAtomicConstruct &x); void CheckAtomicUpdate(const parser::OpenMPAtomicConstruct &x); + void CheckLooprangeBounds(const parser::OpenMPLoopConstruct &x); + void CheckNestedFuse(const parser::OpenMPLoopConstruct &x); void CheckDistLinear(const parser::OpenMPLoopConstruct &x); void CheckSIMDNest(const parser::OpenMPConstruct &x); void CheckTargetNest(const parser::OpenMPConstruct &x); diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index c4d103613b587..48b23ad077626 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -525,7 +525,10 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { void Post(const parser::OpenMPSimpleStandaloneConstruct &) { PopContext(); } bool Pre(const parser::OpenMPLoopConstruct &); - void Post(const parser::OpenMPLoopConstruct &) { PopContext(); } + void Post(const parser::OpenMPLoopConstruct &) { + ordCollapseLevel++; + PopContext(); + } void Post(const parser::OmpBeginLoopDirective &) { GetContext().withinConstruct = true; } @@ -2028,6 +2031,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { case llvm::omp::Directive::OMPD_teams_distribute_parallel_do_simd: case llvm::omp::Directive::OMPD_teams_distribute_simd: case llvm::omp::Directive::OMPD_teams_loop: + case llvm::omp::Directive::OMPD_fuse: case llvm::omp::Directive::OMPD_tile: case llvm::omp::Directive::OMPD_unroll: PushContext(beginName.source, beginName.v); @@ -2205,8 +2209,11 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { - if (auto *innerConstruct{x.GetNestedConstruct()}) { - CollectNumAffectedLoopsFromLoopConstruct(*innerConstruct, levels, clauses); + for (auto &construct : std::get(x.t)) { + if (auto *innerConstruct{parser::omp::GetOmpLoop(construct)}) { + CollectNumAffectedLoopsFromLoopConstruct( + *innerConstruct, levels, clauses); + } } } @@ -2271,74 +2278,74 @@ void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop( // Find the associated region by skipping nested loop-associated constructs // such as loop transformations - const parser::OpenMPLoopConstruct *innermostConstruct{&x}; - while (auto *nested{innermostConstruct->GetNestedConstruct()}) { - innermostConstruct = nested; - } - - const auto *outer{innermostConstruct->GetNestedLoop()}; - if (!outer) - return; - - llvm::SmallVector ivs; - int curLevel{0}; - const parser::DoConstruct *loop{outer}; - while (true) { - auto [iv, lb, ub, step] = GetLoopBounds(*loop); - - if (lb) - checkExprHasSymbols(ivs, lb); - if (ub) - checkExprHasSymbols(ivs, ub); - if (step) - checkExprHasSymbols(ivs, step); - if (iv) { - if (auto *symbol{currScope().FindSymbol(iv->source)}) - ivs.push_back(symbol); - } + for (auto &construct : std::get(x.t)) { + if (const auto *innermostConstruct{parser::omp::GetOmpLoop(construct)}) { + CheckPerfectNestAndRectangularLoop(*innermostConstruct); + } else if (const auto *doConstruct{ + parser::omp::GetDoConstruct(construct)}) { + + llvm::SmallVector ivs; + int curLevel{0}; + const auto *loop{doConstruct}; + while (true) { + auto [iv, lb, ub, step] = GetLoopBounds(*loop); + + if (lb) + checkExprHasSymbols(ivs, lb); + if (ub) + checkExprHasSymbols(ivs, ub); + if (step) + checkExprHasSymbols(ivs, step); + if (iv) { + if (auto *symbol{currScope().FindSymbol(iv->source)}) + ivs.push_back(symbol); + } - // Stop after processing all affected loops - if (curLevel + 1 >= dirDepth) - break; + // Stop after processing all affected loops + if (curLevel + 1 >= dirDepth) + break; - // Recurse into nested loop - const auto &block{std::get(loop->t)}; - if (block.empty()) { - // Insufficient number of nested loops already reported by - // CheckAssocLoopLevel() - break; - } + // Recurse into nested loop + const auto &block{std::get(loop->t)}; + if (block.empty()) { + // Insufficient number of nested loops already reported by + // CheckAssocLoopLevel() + break; + } - loop = GetDoConstructIf(block.front()); - if (!loop) { - // Insufficient number of nested loops already reported by - // CheckAssocLoopLevel() - break; - } + loop = GetDoConstructIf(block.front()); + if (!loop) { + // Insufficient number of nested loops already reported by + // CheckAssocLoopLevel() + break; + } - auto checkPerfectNest = [&, this]() { - if (block.empty()) - return; - auto last = block.end(); - --last; + auto checkPerfectNest = [&, this]() { + if (block.empty()) + return; + auto last = block.end(); + --last; - // A trailing CONTINUE is not considered part of the loop body - if (parser::Unwrap(*last)) - --last; + // A trailing CONTINUE is not considered part of the loop body + if (parser::Unwrap(*last)) + --last; - // In a perfectly nested loop, the nested loop must be the only statement - if (last == block.begin()) - return; + // In a perfectly nested loop, the nested loop must be the only + // statement + if (last == block.begin()) + return; - // Non-perfectly nested loop - // TODO: Point to non-DO statement, directiveSource as a note - context_.Say(dirContext.directiveSource, - "Canonical loop nest must be perfectly nested."_err_en_US); - }; + // Non-perfectly nested loop + // TODO: Point to non-DO statement, directiveSource as a note + context_.Say(dirContext.directiveSource, + "Canonical loop nest must be perfectly nested."_err_en_US); + }; - checkPerfectNest(); + checkPerfectNest(); - ++curLevel; + ++curLevel; + } + } } } @@ -2372,50 +2379,51 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( bool hasCollapseClause{ clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false}; - const parser::OpenMPLoopConstruct *innerMostNest = &x; - while (auto *nested{innerMostNest->GetNestedConstruct()}) { - innerMostNest = nested; - } - - if (const auto *outer{innerMostNest->GetNestedLoop()}) { - for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; --level) { - if (loop->IsDoConcurrent()) { - // DO CONCURRENT is explicitly allowed for the LOOP construct so long - // as there isn't a COLLAPSE clause - if (isLoopConstruct) { - if (hasCollapseClause) { - // hasCollapseClause implies clause != nullptr - context_.Say(clause->source, - "DO CONCURRENT loops cannot be used with the COLLAPSE clause."_err_en_US); + for (auto &construct : std::get(x.t)) { + if (const auto *innermostConstruct{parser::omp::GetOmpLoop(construct)}) { + PrivatizeAssociatedLoopIndexAndCheckLoopLevel(*innermostConstruct); + } else if (const auto *doConstruct{ + parser::omp::GetDoConstruct(construct)}) { + for (const parser::DoConstruct *loop{&*doConstruct}; loop && level > 0; + --level) { + if (loop->IsDoConcurrent()) { + // DO CONCURRENT is explicitly allowed for the LOOP construct so long + // as there isn't a COLLAPSE clause + if (isLoopConstruct) { + if (hasCollapseClause) { + // hasCollapseClause implies clause != nullptr + context_.Say(clause->source, + "DO CONCURRENT loops cannot be used with the COLLAPSE clause."_err_en_US); + } + } else { + auto &stmt = + std::get>(loop->t); + context_.Say(stmt.source, + "DO CONCURRENT loops cannot form part of a loop nest."_err_en_US); } - } else { - auto &stmt = - std::get>(loop->t); - context_.Say(stmt.source, - "DO CONCURRENT loops cannot form part of a loop nest."_err_en_US); - } - } - // go through all the nested do-loops and resolve index variables - const parser::Name *iv{GetLoopIndex(*loop)}; - if (iv) { - if (auto *symbol{ResolveOmp(*iv, ivDSA, currScope())}) { - SetSymbolDSA(*symbol, {Symbol::Flag::OmpPreDetermined, ivDSA}); - iv->symbol = symbol; // adjust the symbol within region - AddToContextObjectWithDSA(*symbol, ivDSA); } + // go through all the nested do-loops and resolve index variables + const parser::Name *iv{GetLoopIndex(*loop)}; + if (iv) { + if (auto *symbol{ResolveOmp(*iv, ivDSA, currScope())}) { + SetSymbolDSA(*symbol, {Symbol::Flag::OmpPreDetermined, ivDSA}); + iv->symbol = symbol; // adjust the symbol within region + AddToContextObjectWithDSA(*symbol, ivDSA); + } - const auto &block{std::get(loop->t)}; - const auto it{block.begin()}; - loop = it != block.end() ? GetDoConstructIf(*it) : nullptr; + const auto &block{std::get(loop->t)}; + const auto it{block.begin()}; + loop = it != block.end() ? GetDoConstructIf(*it) : nullptr; + } } + CheckAssocLoopLevel(level, GetAssociatedClause()); + } else { + context_.Say(GetContext().directiveSource, + "A DO loop must follow the %s directive"_err_en_US, + parser::ToUpperCaseLetters( + llvm::omp::getOpenMPDirectiveName(GetContext().directive, version) + .str())); } - CheckAssocLoopLevel(level, GetAssociatedClause()); - } else { - context_.Say(GetContext().directiveSource, - "A DO loop must follow the %s directive"_err_en_US, - parser::ToUpperCaseLetters( - llvm::omp::getOpenMPDirectiveName(GetContext().directive, version) - .str())); } } diff --git a/flang/lib/Semantics/rewrite-parse-tree.cpp b/flang/lib/Semantics/rewrite-parse-tree.cpp index b5a07680a3377..285eaac1e2c8f 100644 --- a/flang/lib/Semantics/rewrite-parse-tree.cpp +++ b/flang/lib/Semantics/rewrite-parse-tree.cpp @@ -9,6 +9,7 @@ #include "rewrite-parse-tree.h" #include "flang/Common/indirection.h" +#include "flang/Parser/openmp-utils.h" #include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parse-tree.h" #include "flang/Parser/tools.h" @@ -195,18 +196,24 @@ void RewriteMutator::OpenMPSimdOnly( ++it; continue; } - if (auto *doConstruct = - const_cast(ompLoop->GetNestedLoop())) { - auto &loopBody = std::get(doConstruct->t); - // We can only remove some constructs from a loop when it's _not_ a - // OpenMP simd loop - OpenMPSimdOnly(const_cast(loopBody), - /*isNonSimdLoopBody=*/true); - - auto newLoop = parser::ExecutionPartConstruct{ - parser::ExecutableConstruct{std::move(*doConstruct)}}; + std::list doList; + for (auto &construct : std::get(ompLoop->t)) { + if (auto *doConstruct = const_cast( + parser::omp::GetDoConstruct(construct))) { + auto &loopBody = std::get(doConstruct->t); + // We can only remove some constructs from a loop when it's _not_ + // a OpenMP simd loop + OpenMPSimdOnly(const_cast(loopBody), + /*isNonSimdLoopBody=*/true); + auto newLoop = parser::ExecutionPartConstruct{ + parser::ExecutableConstruct{std::move(*doConstruct)}}; + doList.insert(doList.end(), std::move(newLoop)); + } + } + if (!doList.empty()) { it = block.erase(it); - block.insert(it, std::move(newLoop)); + for (auto &newLoop : doList) + block.insert(it, std::move(newLoop)); continue; } } else if (auto *ompCon{std::get_if( @@ -384,10 +391,12 @@ bool RewriteMutator::Pre(parser::OpenMPLoopConstruct &ompLoop) { // If we're looking at a non-simd OpenMP loop, we need to explicitly // call OpenMPSimdOnly on the nested loop block while indicating where // the block comes from. - if (auto *doConstruct = - const_cast(ompLoop.GetNestedLoop())) { - auto &innerBlock = std::get(doConstruct->t); - OpenMPSimdOnly(innerBlock, /*isNonSimdLoopBody=*/true); + for (auto &construct : std::get(ompLoop.t)) { + if (auto *doConstruct = parser::omp::GetDoConstruct(construct)) { + auto &innerBlock = std::get(doConstruct->t); + OpenMPSimdOnly(const_cast(innerBlock), + /*isNonSimdLoopBody=*/true); + } } } return true; diff --git a/flang/test/Lower/OpenMP/fuse01.f90 b/flang/test/Lower/OpenMP/fuse01.f90 new file mode 100644 index 0000000000000..1377bf3e9c529 --- /dev/null +++ b/flang/test/Lower/OpenMP/fuse01.f90 @@ -0,0 +1,93 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s | FileCheck %s + + +subroutine omp_fuse01(lb1, ub1, inc1, lb2, ub2, inc2) + integer res, i, j + integer lb1, ub1, inc1 + integer lb2, ub2, inc2 + + !$omp fuse + do i = lb1, ub1, inc1 + res = i + end do + do j = lb2, ub2, inc2 + res = j + end do + !$omp end fuse + +end subroutine omp_fuse01 + + +! CHECK-LABEL: func.func @_QPomp_fuse01( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "lb1"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "ub1"}, +! CHECK-SAME: %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "inc1"}, +! CHECK-SAME: %[[ARG3:.*]]: !fir.ref {fir.bindc_name = "lb2"}, +! CHECK-SAME: %[[ARG4:.*]]: !fir.ref {fir.bindc_name = "ub2"}, +! CHECK-SAME: %[[ARG5:.*]]: !fir.ref {fir.bindc_name = "inc2"}) { +! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[ALLOCA_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_fuse01Ei"} +! CHECK: %[[DECLARE_0:.*]]:2 = hlfir.declare %[[ALLOCA_0]] {uniq_name = "_QFomp_fuse01Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[DECLARE_1:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Einc1"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[DECLARE_2:.*]]:2 = hlfir.declare %[[ARG5]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Einc2"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOCA_1:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFomp_fuse01Ej"} +! CHECK: %[[DECLARE_3:.*]]:2 = hlfir.declare %[[ALLOCA_1]] {uniq_name = "_QFomp_fuse01Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[DECLARE_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Elb1"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[DECLARE_5:.*]]:2 = hlfir.declare %[[ARG3]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Elb2"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOCA_2:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_fuse01Eres"} +! CHECK: %[[DECLARE_6:.*]]:2 = hlfir.declare %[[ALLOCA_2]] {uniq_name = "_QFomp_fuse01Eres"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[DECLARE_7:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Eub1"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[DECLARE_8:.*]]:2 = hlfir.declare %[[ARG4]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Eub2"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[LOAD_0:.*]] = fir.load %[[DECLARE_4]]#0 : !fir.ref +! CHECK: %[[LOAD_1:.*]] = fir.load %[[DECLARE_7]]#0 : !fir.ref +! CHECK: %[[LOAD_2:.*]] = fir.load %[[DECLARE_1]]#0 : !fir.ref +! CHECK: %[[CONSTANT_0:.*]] = arith.constant 0 : i32 +! CHECK: %[[CONSTANT_1:.*]] = arith.constant 1 : i32 +! CHECK: %[[CMPI_0:.*]] = arith.cmpi slt, %[[LOAD_2]], %[[CONSTANT_0]] : i32 +! CHECK: %[[SUBI_0:.*]] = arith.subi %[[CONSTANT_0]], %[[LOAD_2]] : i32 +! CHECK: %[[SELECT_0:.*]] = arith.select %[[CMPI_0]], %[[SUBI_0]], %[[LOAD_2]] : i32 +! CHECK: %[[SELECT_1:.*]] = arith.select %[[CMPI_0]], %[[LOAD_1]], %[[LOAD_0]] : i32 +! CHECK: %[[SELECT_2:.*]] = arith.select %[[CMPI_0]], %[[LOAD_0]], %[[LOAD_1]] : i32 +! CHECK: %[[SUBI_1:.*]] = arith.subi %[[SELECT_2]], %[[SELECT_1]] overflow : i32 +! CHECK: %[[DIVUI_0:.*]] = arith.divui %[[SUBI_1]], %[[SELECT_0]] : i32 +! CHECK: %[[ADDI_0:.*]] = arith.addi %[[DIVUI_0]], %[[CONSTANT_1]] overflow : i32 +! CHECK: %[[CMPI_1:.*]] = arith.cmpi slt, %[[SELECT_2]], %[[SELECT_1]] : i32 +! CHECK: %[[SELECT_3:.*]] = arith.select %[[CMPI_1]], %[[CONSTANT_0]], %[[ADDI_0]] : i32 +! CHECK: %[[NEW_CLI_0:.*]] = omp.new_cli +! CHECK: omp.canonical_loop(%[[NEW_CLI_0]]) %[[VAL_0:.*]] : i32 in range(%[[SELECT_3]]) { +! CHECK: %[[MULI_0:.*]] = arith.muli %[[VAL_0]], %[[LOAD_2]] : i32 +! CHECK: %[[ADDI_1:.*]] = arith.addi %[[LOAD_0]], %[[MULI_0]] : i32 +! CHECK: hlfir.assign %[[ADDI_1]] to %[[DECLARE_0]]#0 : i32, !fir.ref +! CHECK: %[[LOAD_3:.*]] = fir.load %[[DECLARE_0]]#0 : !fir.ref +! CHECK: hlfir.assign %[[LOAD_3]] to %[[DECLARE_6]]#0 : i32, !fir.ref +! CHECK: omp.terminator +! CHECK: } +! CHECK: %[[LOAD_4:.*]] = fir.load %[[DECLARE_5]]#0 : !fir.ref +! CHECK: %[[LOAD_5:.*]] = fir.load %[[DECLARE_8]]#0 : !fir.ref +! CHECK: %[[LOAD_6:.*]] = fir.load %[[DECLARE_2]]#0 : !fir.ref +! CHECK: %[[CONSTANT_2:.*]] = arith.constant 0 : i32 +! CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : i32 +! CHECK: %[[CMPI_2:.*]] = arith.cmpi slt, %[[LOAD_6]], %[[CONSTANT_2]] : i32 +! CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONSTANT_2]], %[[LOAD_6]] : i32 +! CHECK: %[[SELECT_4:.*]] = arith.select %[[CMPI_2]], %[[SUBI_2]], %[[LOAD_6]] : i32 +! CHECK: %[[SELECT_5:.*]] = arith.select %[[CMPI_2]], %[[LOAD_5]], %[[LOAD_4]] : i32 +! CHECK: %[[SELECT_6:.*]] = arith.select %[[CMPI_2]], %[[LOAD_4]], %[[LOAD_5]] : i32 +! CHECK: %[[SUBI_3:.*]] = arith.subi %[[SELECT_6]], %[[SELECT_5]] overflow : i32 +! CHECK: %[[DIVUI_1:.*]] = arith.divui %[[SUBI_3]], %[[SELECT_4]] : i32 +! CHECK: %[[ADDI_2:.*]] = arith.addi %[[DIVUI_1]], %[[CONSTANT_3]] overflow : i32 +! CHECK: %[[CMPI_3:.*]] = arith.cmpi slt, %[[SELECT_6]], %[[SELECT_5]] : i32 +! CHECK: %[[SELECT_7:.*]] = arith.select %[[CMPI_3]], %[[CONSTANT_2]], %[[ADDI_2]] : i32 +! CHECK: %[[NEW_CLI_1:.*]] = omp.new_cli +! CHECK: omp.canonical_loop(%[[NEW_CLI_1]]) %[[VAL_1:.*]] : i32 in range(%[[SELECT_7]]) { +! CHECK: %[[MULI_1:.*]] = arith.muli %[[VAL_1]], %[[LOAD_6]] : i32 +! CHECK: %[[ADDI_3:.*]] = arith.addi %[[LOAD_4]], %[[MULI_1]] : i32 +! CHECK: hlfir.assign %[[ADDI_3]] to %[[DECLARE_3]]#0 : i32, !fir.ref +! CHECK: %[[LOAD_7:.*]] = fir.load %[[DECLARE_3]]#0 : !fir.ref +! CHECK: hlfir.assign %[[LOAD_7]] to %[[DECLARE_6]]#0 : i32, !fir.ref +! CHECK: omp.terminator +! CHECK: } +! CHECK: %[[NEW_CLI_2:.*]] = omp.new_cli +! CHECK: omp.fuse (%[[NEW_CLI_2]]) <- (%[[NEW_CLI_0]], %[[NEW_CLI_1]]) +! CHECK: return +! CHECK: } + diff --git a/flang/test/Lower/OpenMP/fuse02.f90 b/flang/test/Lower/OpenMP/fuse02.f90 new file mode 100644 index 0000000000000..5a0f37827c36a --- /dev/null +++ b/flang/test/Lower/OpenMP/fuse02.f90 @@ -0,0 +1,123 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s | FileCheck %s + + +subroutine omp_fuse02(lb1, ub1, inc1, lb2, ub2, inc2) + integer res, i, j, k + integer lb1, ub1, inc1 + integer lb2, ub2, inc2 + + !$omp fuse looprange(2,2) + do i = lb1, ub1, inc1 + res = i + end do + do j = lb2, ub2, inc2 + res = j + end do + do k = lb1, ub2, inc1 + res = k + end do + !$omp end fuse + +end subroutine omp_fuse02 + + +! CHECK-LABEL: func.func @_QPomp_fuse02( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "lb1"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "ub1"}, +! CHECK-SAME: %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "inc1"}, +! CHECK-SAME: %[[ARG3:.*]]: !fir.ref {fir.bindc_name = "lb2"}, +! CHECK-SAME: %[[ARG4:.*]]: !fir.ref {fir.bindc_name = "ub2"}, +! CHECK-SAME: %[[ARG5:.*]]: !fir.ref {fir.bindc_name = "inc2"}) { +! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[ALLOCA_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_fuse02Ei"} +! CHECK: %[[DECLARE_0:.*]]:2 = hlfir.declare %[[ALLOCA_0]] {uniq_name = "_QFomp_fuse02Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[DECLARE_1:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Einc1"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[DECLARE_2:.*]]:2 = hlfir.declare %[[ARG5]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Einc2"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOCA_1:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFomp_fuse02Ej"} +! CHECK: %[[DECLARE_3:.*]]:2 = hlfir.declare %[[ALLOCA_1]] {uniq_name = "_QFomp_fuse02Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOCA_2:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFomp_fuse02Ek"} +! CHECK: %[[DECLARE_4:.*]]:2 = hlfir.declare %[[ALLOCA_2]] {uniq_name = "_QFomp_fuse02Ek"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[DECLARE_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Elb1"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[DECLARE_6:.*]]:2 = hlfir.declare %[[ARG3]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Elb2"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOCA_3:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_fuse02Eres"} +! CHECK: %[[DECLARE_7:.*]]:2 = hlfir.declare %[[ALLOCA_3]] {uniq_name = "_QFomp_fuse02Eres"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[DECLARE_8:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Eub1"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[DECLARE_9:.*]]:2 = hlfir.declare %[[ARG4]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Eub2"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[LOAD_0:.*]] = fir.load %[[DECLARE_5]]#0 : !fir.ref +! CHECK: %[[LOAD_1:.*]] = fir.load %[[DECLARE_8]]#0 : !fir.ref +! CHECK: %[[LOAD_2:.*]] = fir.load %[[DECLARE_1]]#0 : !fir.ref +! CHECK: %[[CONSTANT_0:.*]] = arith.constant 0 : i32 +! CHECK: %[[CONSTANT_1:.*]] = arith.constant 1 : i32 +! CHECK: %[[CMPI_0:.*]] = arith.cmpi slt, %[[LOAD_2]], %[[CONSTANT_0]] : i32 +! CHECK: %[[SUBI_0:.*]] = arith.subi %[[CONSTANT_0]], %[[LOAD_2]] : i32 +! CHECK: %[[SELECT_0:.*]] = arith.select %[[CMPI_0]], %[[SUBI_0]], %[[LOAD_2]] : i32 +! CHECK: %[[SELECT_1:.*]] = arith.select %[[CMPI_0]], %[[LOAD_1]], %[[LOAD_0]] : i32 +! CHECK: %[[SELECT_2:.*]] = arith.select %[[CMPI_0]], %[[LOAD_0]], %[[LOAD_1]] : i32 +! CHECK: %[[SUBI_1:.*]] = arith.subi %[[SELECT_2]], %[[SELECT_1]] overflow : i32 +! CHECK: %[[DIVUI_0:.*]] = arith.divui %[[SUBI_1]], %[[SELECT_0]] : i32 +! CHECK: %[[ADDI_0:.*]] = arith.addi %[[DIVUI_0]], %[[CONSTANT_1]] overflow : i32 +! CHECK: %[[CMPI_1:.*]] = arith.cmpi slt, %[[SELECT_2]], %[[SELECT_1]] : i32 +! CHECK: %[[SELECT_3:.*]] = arith.select %[[CMPI_1]], %[[CONSTANT_0]], %[[ADDI_0]] : i32 +! CHECK: %[[NEW_CLI_0:.*]] = omp.new_cli +! CHECK: omp.canonical_loop(%[[NEW_CLI_0]]) %[[VAL_0:.*]] : i32 in range(%[[SELECT_3]]) { +! CHECK: %[[MULI_0:.*]] = arith.muli %[[VAL_0]], %[[LOAD_2]] : i32 +! CHECK: %[[ADDI_1:.*]] = arith.addi %[[LOAD_0]], %[[MULI_0]] : i32 +! CHECK: hlfir.assign %[[ADDI_1]] to %[[DECLARE_0]]#0 : i32, !fir.ref +! CHECK: %[[LOAD_3:.*]] = fir.load %[[DECLARE_0]]#0 : !fir.ref +! CHECK: hlfir.assign %[[LOAD_3]] to %[[DECLARE_7]]#0 : i32, !fir.ref +! CHECK: omp.terminator +! CHECK: } +! CHECK: %[[LOAD_4:.*]] = fir.load %[[DECLARE_6]]#0 : !fir.ref +! CHECK: %[[LOAD_5:.*]] = fir.load %[[DECLARE_9]]#0 : !fir.ref +! CHECK: %[[LOAD_6:.*]] = fir.load %[[DECLARE_2]]#0 : !fir.ref +! CHECK: %[[CONSTANT_2:.*]] = arith.constant 0 : i32 +! CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : i32 +! CHECK: %[[CMPI_2:.*]] = arith.cmpi slt, %[[LOAD_6]], %[[CONSTANT_2]] : i32 +! CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONSTANT_2]], %[[LOAD_6]] : i32 +! CHECK: %[[SELECT_4:.*]] = arith.select %[[CMPI_2]], %[[SUBI_2]], %[[LOAD_6]] : i32 +! CHECK: %[[SELECT_5:.*]] = arith.select %[[CMPI_2]], %[[LOAD_5]], %[[LOAD_4]] : i32 +! CHECK: %[[SELECT_6:.*]] = arith.select %[[CMPI_2]], %[[LOAD_4]], %[[LOAD_5]] : i32 +! CHECK: %[[SUBI_3:.*]] = arith.subi %[[SELECT_6]], %[[SELECT_5]] overflow : i32 +! CHECK: %[[DIVUI_1:.*]] = arith.divui %[[SUBI_3]], %[[SELECT_4]] : i32 +! CHECK: %[[ADDI_2:.*]] = arith.addi %[[DIVUI_1]], %[[CONSTANT_3]] overflow : i32 +! CHECK: %[[CMPI_3:.*]] = arith.cmpi slt, %[[SELECT_6]], %[[SELECT_5]] : i32 +! CHECK: %[[SELECT_7:.*]] = arith.select %[[CMPI_3]], %[[CONSTANT_2]], %[[ADDI_2]] : i32 +! CHECK: %[[NEW_CLI_1:.*]] = omp.new_cli +! CHECK: omp.canonical_loop(%[[NEW_CLI_1]]) %[[VAL_1:.*]] : i32 in range(%[[SELECT_7]]) { +! CHECK: %[[MULI_1:.*]] = arith.muli %[[VAL_1]], %[[LOAD_6]] : i32 +! CHECK: %[[ADDI_3:.*]] = arith.addi %[[LOAD_4]], %[[MULI_1]] : i32 +! CHECK: hlfir.assign %[[ADDI_3]] to %[[DECLARE_3]]#0 : i32, !fir.ref +! CHECK: %[[LOAD_7:.*]] = fir.load %[[DECLARE_3]]#0 : !fir.ref +! CHECK: hlfir.assign %[[LOAD_7]] to %[[DECLARE_7]]#0 : i32, !fir.ref +! CHECK: omp.terminator +! CHECK: } +! CHECK: %[[LOAD_8:.*]] = fir.load %[[DECLARE_5]]#0 : !fir.ref +! CHECK: %[[LOAD_9:.*]] = fir.load %[[DECLARE_9]]#0 : !fir.ref +! CHECK: %[[LOAD_10:.*]] = fir.load %[[DECLARE_1]]#0 : !fir.ref +! CHECK: %[[CONSTANT_4:.*]] = arith.constant 0 : i32 +! CHECK: %[[CONSTANT_5:.*]] = arith.constant 1 : i32 +! CHECK: %[[CMPI_4:.*]] = arith.cmpi slt, %[[LOAD_10]], %[[CONSTANT_4]] : i32 +! CHECK: %[[SUBI_4:.*]] = arith.subi %[[CONSTANT_4]], %[[LOAD_10]] : i32 +! CHECK: %[[SELECT_8:.*]] = arith.select %[[CMPI_4]], %[[SUBI_4]], %[[LOAD_10]] : i32 +! CHECK: %[[SELECT_9:.*]] = arith.select %[[CMPI_4]], %[[LOAD_9]], %[[LOAD_8]] : i32 +! CHECK: %[[SELECT_10:.*]] = arith.select %[[CMPI_4]], %[[LOAD_8]], %[[LOAD_9]] : i32 +! CHECK: %[[SUBI_5:.*]] = arith.subi %[[SELECT_10]], %[[SELECT_9]] overflow : i32 +! CHECK: %[[DIVUI_2:.*]] = arith.divui %[[SUBI_5]], %[[SELECT_8]] : i32 +! CHECK: %[[ADDI_4:.*]] = arith.addi %[[DIVUI_2]], %[[CONSTANT_5]] overflow : i32 +! CHECK: %[[CMPI_5:.*]] = arith.cmpi slt, %[[SELECT_10]], %[[SELECT_9]] : i32 +! CHECK: %[[SELECT_11:.*]] = arith.select %[[CMPI_5]], %[[CONSTANT_4]], %[[ADDI_4]] : i32 +! CHECK: %[[NEW_CLI_2:.*]] = omp.new_cli +! CHECK: omp.canonical_loop(%[[NEW_CLI_2]]) %[[VAL_2:.*]] : i32 in range(%[[SELECT_11]]) { +! CHECK: %[[MULI_2:.*]] = arith.muli %[[VAL_2]], %[[LOAD_10]] : i32 +! CHECK: %[[ADDI_5:.*]] = arith.addi %[[LOAD_8]], %[[MULI_2]] : i32 +! CHECK: hlfir.assign %[[ADDI_5]] to %[[DECLARE_4]]#0 : i32, !fir.ref +! CHECK: %[[LOAD_11:.*]] = fir.load %[[DECLARE_4]]#0 : !fir.ref +! CHECK: hlfir.assign %[[LOAD_11]] to %[[DECLARE_7]]#0 : i32, !fir.ref +! CHECK: omp.terminator +! CHECK: } +! CHECK: %[[NEW_CLI_3:.*]] = omp.new_cli +! CHECK: %[[NEW_CLI_4:.*]] = omp.new_cli +! CHECK: omp.fuse (%[[NEW_CLI_3]], %[[NEW_CLI_4]]) <- (%[[NEW_CLI_0]], %[[NEW_CLI_1]], %[[NEW_CLI_2]]) {count = 2 : i32, first = 2 : i32} +! CHECK: return +! CHECK: } + diff --git a/flang/test/Parser/OpenMP/fail-looprange.f90 b/flang/test/Parser/OpenMP/fail-looprange.f90 new file mode 100644 index 0000000000000..ebe3480b44f12 --- /dev/null +++ b/flang/test/Parser/OpenMP/fail-looprange.f90 @@ -0,0 +1,11 @@ +! RUN: not %flang_fc1 -fsyntax-only -fopenmp %s 2>&1 | FileCheck %s + +! CHECK: error: expected end of line +!$omp fuse looprange + +! CHECK: error: expected end of line +!$omp fuse looprange(1) + +! CHECK: error: expected end of line +!$omp fuse looprange(1,2,3) +end diff --git a/flang/test/Parser/OpenMP/fuse-looprange.f90 b/flang/test/Parser/OpenMP/fuse-looprange.f90 new file mode 100644 index 0000000000000..75ec15fddd65f --- /dev/null +++ b/flang/test/Parser/OpenMP/fuse-looprange.f90 @@ -0,0 +1,38 @@ +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=60 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=60 %s | FileCheck --check-prefix="PARSE-TREE" %s + +subroutine openmp_fuse(x) + + integer, intent(inout)::x + +!CHECK: !$omp fuse looprange +!$omp fuse looprange(1,2) +!CHECK: do + do x = 1, 100 + call F1() +!CHECK: end do + end do +!CHECK: do + do x = 1, 100 + call F1() +!CHECK: end do + end do +!CHECK: do + do x = 1, 100 + call F1() +!CHECK: end do + end do +!CHECK: !$omp end fuse +!$omp end fuse + +!PARSE-TREE: OpenMPConstruct -> OpenMPLoopConstruct +!PARSE-TREE: OmpBeginLoopDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = fuse +!PARSE-TREE: OmpClauseList -> OmpClause -> Looprange -> OmpLoopRangeClause +!PARSE-TREE: Scalar -> Integer -> Constant -> Expr = '1_4' +!PARSE-TREE: LiteralConstant -> IntLiteralConstant = '1' +!PARSE-TREE: Scalar -> Integer -> Constant -> Expr = '2_4' +!PARSE-TREE: LiteralConstant -> IntLiteralConstant = '2' + +END subroutine openmp_fuse + diff --git a/flang/test/Parser/OpenMP/fuse01.f90 b/flang/test/Parser/OpenMP/fuse01.f90 new file mode 100644 index 0000000000000..98ce0e33797b5 --- /dev/null +++ b/flang/test/Parser/OpenMP/fuse01.f90 @@ -0,0 +1,28 @@ +! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s + +subroutine openmp_fuse(x) + + integer, intent(inout)::x + +!CHECK: !$omp fuse +!$omp fuse +!CHECK: do + do x = 1, 100 + call F1() +!CHECK: end do + end do +!CHECK: do + do x = 1, 100 + call F1() +!CHECK: end do + end do +!CHECK: !$omp end fuse +!$omp end fuse + +!PARSE-TREE: OpenMPConstruct -> OpenMPLoopConstruct +!PARSE-TREE: OmpBeginLoopDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = fuse + +END subroutine openmp_fuse + diff --git a/flang/test/Parser/OpenMP/fuse02.f90 b/flang/test/Parser/OpenMP/fuse02.f90 new file mode 100644 index 0000000000000..cc3de48dd658a --- /dev/null +++ b/flang/test/Parser/OpenMP/fuse02.f90 @@ -0,0 +1,97 @@ +! Test the Parse Tree to ensure the OpenMP Loop Transformation Construct Fuse can be constructed on another Fuse + +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-PARSE +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-UNPARSE + +subroutine fuse_on_fuse + implicit none + integer :: I = 10 + integer :: j + + !$omp fuse + !$omp fuse + do i = 1, I + continue + end do + do j = 1, I + continue + end do + !$omp end fuse + do j = 1, I + continue + end do + !$omp end fuse +end subroutine + +!CHECK-PARSE: | ExecutionPart -> Block +!CHECK-PARSE-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | OmpBeginLoopDirective +!CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = fuse +!CHECK-PARSE-NEXT: | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | Flags = None +!CHECK-PARSE-NEXT: | | | Block +!CHECK-PARSE-NEXT: | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | | | OmpBeginLoopDirective +!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = fuse +!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | | | Flags = None +!CHECK-PARSE-NEXT: | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct +!CHECK-PARSE-NEXT: | | | | | | | NonLabelDoStmt +!CHECK-PARSE-NEXT: | | | | | | | | LoopControl -> LoopBounds +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = '1_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt +!CHECK-PARSE-NEXT: | | | | | | | EndDoStmt -> +!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct +!CHECK-PARSE-NEXT: | | | | | | | NonLabelDoStmt +!CHECK-PARSE-NEXT: | | | | | | | | LoopControl -> LoopBounds +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Name = 'j' +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = '1_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt +!CHECK-PARSE-NEXT: | | | | | | | EndDoStmt -> +!CHECK-PARSE-NEXT: | | | | | OmpEndLoopDirective +!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = fuse +!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | | | Flags = None +!CHECK-PARSE-NEXT: | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct +!CHECK-PARSE-NEXT: | | | | | NonLabelDoStmt +!CHECK-PARSE-NEXT: | | | | | | LoopControl -> LoopBounds +!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Name = 'j' +!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Expr = '1_4' +!CHECK-PARSE-NEXT: | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt +!CHECK-PARSE-NEXT: | | | | | EndDoStmt -> +!CHECK-PARSE-NEXT: | | | OmpEndLoopDirective +!CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = fuse +!CHECK-PARSE-NEXT: | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | Flags = None + +!CHECK-UNPARSE: SUBROUTINE fuse_on_fuse +!CHECK-UNPARSE-NEXT: IMPLICIT NONE +!CHECK-UNPARSE-NEXT: INTEGER :: i = 10_4 +!CHECK-UNPARSE-NEXT: INTEGER j +!CHECK-UNPARSE-NEXT: !$OMP FUSE +!CHECK-UNPARSE-NEXT: !$OMP FUSE +!CHECK-UNPARSE-NEXT: DO i=1_4,i +!CHECK-UNPARSE-NEXT: CONTINUE +!CHECK-UNPARSE-NEXT: END DO +!CHECK-UNPARSE-NEXT: DO j=1_4,i +!CHECK-UNPARSE-NEXT: CONTINUE +!CHECK-UNPARSE-NEXT: END DO +!CHECK-UNPARSE-NEXT: !$OMP END FUSE +!CHECK-UNPARSE-NEXT: DO j=1_4,i +!CHECK-UNPARSE-NEXT: CONTINUE +!CHECK-UNPARSE-NEXT: END DO +!CHECK-UNPARSE-NEXT: !$OMP END FUSE diff --git a/flang/test/Parser/OpenMP/loop-transformation-construct04.f90 b/flang/test/Parser/OpenMP/loop-transformation-construct04.f90 new file mode 100644 index 0000000000000..e37e2bbfe155b --- /dev/null +++ b/flang/test/Parser/OpenMP/loop-transformation-construct04.f90 @@ -0,0 +1,80 @@ +! Test the Parse Tree to ensure the OpenMP Loop Transformation Construct Fuse constructs a correct sequence. + +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-PARSE +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-UNPARSE + +subroutine loop_transformation_construct + implicit none + integer :: I = 10 + integer :: j + + !$omp do + !$omp fuse + do i = 1, I + continue + end do + do j = 1, I + continue + end do + !$omp end fuse + !$omp end do +end subroutine + +!CHECK-PARSE: | ExecutionPart -> Block +!CHECK-PARSE-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | OmpBeginLoopDirective +!CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = do +!CHECK-PARSE-NEXT: | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | Flags = None +!CHECK-PARSE-NEXT: | | | Block +!CHECK-PARSE-NEXT: | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | | | OmpBeginLoopDirective +!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = fuse +!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | | | Flags = None +!CHECK-PARSE-NEXT: | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct +!CHECK-PARSE-NEXT: | | | | | | | NonLabelDoStmt +!CHECK-PARSE-NEXT: | | | | | | | | LoopControl -> LoopBounds +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = '1_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt +!CHECK-PARSE-NEXT: | | | | | | | EndDoStmt -> +!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct +!CHECK-PARSE-NEXT: | | | | | | | NonLabelDoStmt +!CHECK-PARSE-NEXT: | | | | | | | | LoopControl -> LoopBounds +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Name = 'j' +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = '1_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt +!CHECK-PARSE-NEXT: | | | | | | | EndDoStmt -> +!CHECK-PARSE-NEXT: | | | | | OmpEndLoopDirective +!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = fuse +!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | | | Flags = None +!CHECK-PARSE-NEXT: | | | OmpEndLoopDirective +!CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = do +!CHECK-PARSE-NEXT: | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | Flags = None + +!CHECK-UNPARSE: SUBROUTINE loop_transformation_construct +!CHECK-UNPARSE-NEXT: IMPLICIT NONE +!CHECK-UNPARSE-NEXT: INTEGER :: i = 10_4 +!CHECK-UNPARSE-NEXT: INTEGER j +!CHECK-UNPARSE-NEXT: !$OMP DO +!CHECK-UNPARSE-NEXT: !$OMP FUSE +!CHECK-UNPARSE-NEXT: DO i=1_4,i +!CHECK-UNPARSE-NEXT: CONTINUE +!CHECK-UNPARSE-NEXT: END DO +!CHECK-UNPARSE-NEXT: DO j=1_4,i +!CHECK-UNPARSE-NEXT: CONTINUE +!CHECK-UNPARSE-NEXT: END DO +!CHECK-UNPARSE-NEXT: !$OMP END FUSE +!CHECK-UNPARSE-NEXT: !$OMP END DO diff --git a/flang/test/Parser/OpenMP/loop-transformation-construct05.f90 b/flang/test/Parser/OpenMP/loop-transformation-construct05.f90 new file mode 100644 index 0000000000000..6d3303841d506 --- /dev/null +++ b/flang/test/Parser/OpenMP/loop-transformation-construct05.f90 @@ -0,0 +1,90 @@ +! Test the Parse Tree to ensure the OpenMP Loop Transformation Construct Fuse constructs a correct sequence +! and can correctly combine with loop nests + +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-PARSE +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-UNPARSE + +subroutine loop_transformation_construct + implicit none + integer :: I = 10 + integer :: j + + !$omp do + !$omp fuse + do i = 1, I + continue + end do + !$omp tile sizes(2) + do j = 1, I + continue + end do + !$omp end fuse + !$omp end do +end subroutine + +!CHECK-PARSE: | ExecutionPart -> Block +!CHECK-PARSE-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | OmpBeginLoopDirective +!CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = do +!CHECK-PARSE-NEXT: | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | Flags = None +!CHECK-PARSE-NEXT: | | | Block +!CHECK-PARSE-NEXT: | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | | | OmpBeginLoopDirective +!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = fuse +!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | | | Flags = None +!CHECK-PARSE-NEXT: | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct +!CHECK-PARSE-NEXT: | | | | | | | NonLabelDoStmt +!CHECK-PARSE-NEXT: | | | | | | | | LoopControl -> LoopBounds +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = '1_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt +!CHECK-PARSE-NEXT: | | | | | | | EndDoStmt -> +!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | | | | | OmpBeginLoopDirective +!CHECK-PARSE-NEXT: | | | | | | | | OmpDirectiveName -> llvm::omp::Directive = tile +!CHECK-PARSE-NEXT: | | | | | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' +!CHECK-PARSE-NEXT: | | | | | | | | | LiteralConstant -> IntLiteralConstant = '2' +!CHECK-PARSE-NEXT: | | | | | | | | Flags = None +!CHECK-PARSE-NEXT: | | | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct +!CHECK-PARSE-NEXT: | | | | | | | | | NonLabelDoStmt +!CHECK-PARSE-NEXT: | | | | | | | | | | LoopControl -> LoopBounds +!CHECK-PARSE-NEXT: | | | | | | | | | | | Scalar -> Name = 'j' +!CHECK-PARSE-NEXT: | | | | | | | | | | | Scalar -> Expr = '1_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK-PARSE-NEXT: | | | | | | | | | | | Scalar -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt +!CHECK-PARSE-NEXT: | | | | | | | | | EndDoStmt -> +!CHECK-PARSE-NEXT: | | | | | OmpEndLoopDirective +!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = fuse +!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | | | Flags = None +!CHECK-PARSE-NEXT: | | | OmpEndLoopDirective +!CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = do +!CHECK-PARSE-NEXT: | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | Flags = None + +!CHECK-UNPARSE: SUBROUTINE loop_transformation_construct +!CHECK-UNPARSE-NEXT: IMPLICIT NONE +!CHECK-UNPARSE-NEXT: INTEGER :: i = 10_4 +!CHECK-UNPARSE-NEXT: INTEGER j +!CHECK-UNPARSE-NEXT: !$OMP DO +!CHECK-UNPARSE-NEXT: !$OMP FUSE +!CHECK-UNPARSE-NEXT: DO i=1_4,i +!CHECK-UNPARSE-NEXT: CONTINUE +!CHECK-UNPARSE-NEXT: END DO +!CHECK-UNPARSE-NEXT: !$OMP TILE +!CHECK-UNPARSE-NEXT: DO j=1_4,i +!CHECK-UNPARSE-NEXT: CONTINUE +!CHECK-UNPARSE-NEXT: END DO +!CHECK-UNPARSE-NEXT: !$OMP END FUSE +!CHECK-UNPARSE-NEXT: !$OMP END DO diff --git a/flang/test/Semantics/OpenMP/loop-transformation-clauses01.f90 b/flang/test/Semantics/OpenMP/loop-transformation-clauses01.f90 new file mode 100644 index 0000000000000..9ca0e8cfc9af1 --- /dev/null +++ b/flang/test/Semantics/OpenMP/loop-transformation-clauses01.f90 @@ -0,0 +1,66 @@ +! Testing the Semantics of clauses on loop transformation directives + +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60 + + +subroutine loop_transformation_construct1 + implicit none + integer, parameter:: i = 5 + integer :: x + integer :: a + integer :: v(i) + + !ERROR: At most one LOOPRANGE clause can appear on the FUSE directive + !$omp fuse looprange(1,2) looprange(1,2) + do x = 1, i + v(x) = x * 2 + end do + do x = 1, i + v(x) = x * 2 + end do + !$omp end fuse + + !ERROR: The loop range indicated in the LOOPRANGE(5,2) clause must not be out of the bounds of the Loop Sequence following the construct. + !$omp fuse looprange(5,2) + do x = 1, i + v(x) = x * 2 + end do + do x = 1, i + v(x) = x * 2 + end do + !$omp end fuse + + !ERROR: The parameter of the LOOPRANGE clause must be a constant positive integer expression + !$omp fuse looprange(0,1) + do x = 1, i + v(x) = x * 2 + end do + do x = 1, i + v(x) = x * 2 + end do + !$omp end fuse + + !ERROR: The parameter of the LOOPRANGE clause must be a constant positive integer expression + !$omp fuse looprange(1,-1) + do x = 1, i + v(x) = x * 2 + end do + do x = 1, i + v(x) = x * 2 + end do + !$omp end fuse + + !ERROR: Must be a constant value + !$omp fuse looprange(a,2) + do x = 1, i + v(x) = x * 2 + end do + !$omp end fuse + + !ERROR: Must be a constant value + !$omp fuse looprange(1,a) + do x = 1, i + v(x) = x * 2 + end do + !$omp end fuse +end subroutine diff --git a/flang/test/Semantics/OpenMP/loop-transformation-construct01.f90 b/flang/test/Semantics/OpenMP/loop-transformation-construct01.f90 index f718efc32aabf..927831a06d5fa 100644 --- a/flang/test/Semantics/OpenMP/loop-transformation-construct01.f90 +++ b/flang/test/Semantics/OpenMP/loop-transformation-construct01.f90 @@ -62,7 +62,7 @@ subroutine loop_transformation_construct4 integer :: v(i) !$omp do - !ERROR: If a loop construct has been fully unrolled, it cannot then be tiled + !ERROR: If a loop construct has been fully unrolled, it cannot then be further transformed !$omp tile !$omp unroll full do x = 1, i @@ -77,7 +77,7 @@ subroutine loop_transformation_construct5 integer :: v(i) !$omp do - !ERROR: If a loop construct has been fully unrolled, it cannot then be tiled + !ERROR: If a loop construct has been fully unrolled, it cannot then be further transformed !$omp tile !$omp unroll do x = 1, i diff --git a/flang/test/Semantics/OpenMP/loop-transformation-construct02.f90 b/flang/test/Semantics/OpenMP/loop-transformation-construct02.f90 new file mode 100644 index 0000000000000..d82fc3668198d --- /dev/null +++ b/flang/test/Semantics/OpenMP/loop-transformation-construct02.f90 @@ -0,0 +1,93 @@ +! Testing the Semantics of loop sequences combined with +! nested Loop Transformation Constructs + +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60 + +subroutine loop_transformation_construct1 + implicit none + + !$omp do + !ERROR: The FUSE construct requires the END FUSE directive + !$omp fuse +end subroutine + +subroutine loop_transformation_construct2 + implicit none + + !$omp do + !ERROR: A DO loop must follow the FUSE directive + !$omp fuse + !$omp end fuse +end subroutine + +subroutine loop_transformation_construct3 + implicit none + integer :: i = 5 + integer :: y + integer :: v(i) + + !$omp do + !$omp fuse + do x = 1, i + v(x) = x(x) * 2 + end do + do x = 1, i + v(x) = x(x) * 2 + end do + !$omp end fuse + !$omp end do + !ERROR: The END FUSE directive must follow the DO loop associated with the loop construct + !$omp end fuse +end subroutine + +subroutine loop_transformation_construct4 + implicit none + integer :: i = 5 + integer :: y + integer :: v(i) + + !$omp do + do x = 1, i + v(x) = x(x) * 2 + end do + !ERROR: A DO loop must follow the FUSE directive + !$omp fuse + !$omp end fuse +end subroutine + +subroutine loop_transformation_construct5 + implicit none + integer :: i = 5 + integer :: y + integer :: v(i) + + !$omp do + !ERROR: If a loop construct has been fully unrolled, it cannot then be further transformed + !$omp fuse + !$omp unroll full + do x = 1, i + v(x) = x(x) * 2 + end do + do x = 1, i + v(x) = x(x) * 2 + end do + !$omp end fuse +end subroutine + +subroutine loop_transformation_construct6 + implicit none + integer :: i = 5 + integer :: y + integer :: v(i) + + !$omp do + !$omp fuse looprange(1,1) + !$omp unroll partial(2) + do x = 1, i + v(x) = x(x) * 2 + end do + do x = 1, i + v(x) = x(x) * 2 + end do + !$omp end fuse +end subroutine diff --git a/flang/test/Semantics/OpenMP/loop-transformation-construct03.f90 b/flang/test/Semantics/OpenMP/loop-transformation-construct03.f90 new file mode 100644 index 0000000000000..5e459c7985523 --- /dev/null +++ b/flang/test/Semantics/OpenMP/loop-transformation-construct03.f90 @@ -0,0 +1,39 @@ +! Testing the Semantic failure of forming loop sequences under regular OpenMP directives + +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60 + +subroutine loop_transformation_construct1 + implicit none + integer :: i = 5 + integer :: y + integer :: v(i) + + ! Only 1 do loop is associated with the OMP DO directive so the END DO directive is unmatched + !$omp do + do x = 1, i + v(x) = x(x) * 2 + end do + do x = 1, i + v(x) = x(x) * 2 + end do + !ERROR: The END DO directive must follow the DO loop associated with the loop construct + !$omp end do +end subroutine + +subroutine loop_transformation_construct2 + implicit none + integer :: i = 5 + integer :: y + integer :: v(i) + + ! Only 1 do loop is associated with the OMP TILE directive so the END TILE directive is unmatched + !$omp tile sizes(2) + do x = 1, i + v(x) = x(x) * 2 + end do + do x = 1, i + v(x) = x(x) * 2 + end do + !ERROR: The END TILE directive must follow the DO loop associated with the loop construct + !$omp end tile +end subroutine diff --git a/flang/test/Semantics/OpenMP/loop-transformation-construct04.f90 b/flang/test/Semantics/OpenMP/loop-transformation-construct04.f90 new file mode 100644 index 0000000000000..2856247329f3b --- /dev/null +++ b/flang/test/Semantics/OpenMP/loop-transformation-construct04.f90 @@ -0,0 +1,47 @@ +! Testing the Semantic failure of forming loop sequences under regular OpenMP directives + +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60 + +subroutine loop_transformation_construct3 + implicit none + integer, parameter :: i = 5 + integer :: x + integer :: v(i) + + !ERROR: The loop sequence following the DO construct must be fully fused first. + !$omp do + !$omp fuse looprange(1,2) + do x = 1, i + v(x) = x * 2 + end do + do x = 1, i + v(x) = x * 2 + end do + do x = 1, i + v(x) = x * 2 + end do + !$omp end fuse + !$omp end do +end subroutine + +subroutine loop_transformation_construct4 + implicit none + integer, parameter :: i = 5 + integer :: x + integer :: v(i) + + !ERROR: The loop sequence following the TILE construct must be fully fused first. + !$omp tile sizes(2) + !$omp fuse looprange(1,2) + do x = 1, i + v(x) = x * 2 + end do + do x = 1, i + v(x) = x * 2 + end do + do x = 1, i + v(x) = x * 2 + end do + !$omp end fuse + !$omp end tile +end subroutine diff --git a/flang/test/Semantics/OpenMP/tile02.f90 b/flang/test/Semantics/OpenMP/tile02.f90 index 676796375353f..096a0f349932e 100644 --- a/flang/test/Semantics/OpenMP/tile02.f90 +++ b/flang/test/Semantics/OpenMP/tile02.f90 @@ -6,7 +6,7 @@ subroutine on_unroll implicit none integer i - !ERROR: If a loop construct has been fully unrolled, it cannot then be tiled + !ERROR: If a loop construct has been fully unrolled, it cannot then be further transformed !$omp tile sizes(2) !$omp unroll do i = 1, 5 diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 9f77c24d0b27b..d8f45a4c69059 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1272,6 +1272,59 @@ class OpenMPIRBuilder { tileLoops(DebugLoc DL, ArrayRef Loops, ArrayRef TileSizes); + /// Fuse a sequence of loops. + /// + /// Fuses the loops of \p Loops. + /// The merging of the loops is done in the following structure: + /// + /// Example: + /// \code + /// for (int i = lb0; i < ub0; i += st0) // trip count is calculated as: + /// body(i) // tc0 = (ub0 - lb0 + st0) / st0 + /// for (int j = lb1; j < ub1; j += st1) + /// body(j); + /// + /// ... + /// + /// for (int k = lbk; j < ubk; j += stk) + /// body(k); + /// \endcode + /// + /// After fusing the loops a single loop is left: + /// \code + /// for (fuse.index = 0; fuse.index < max(tc0, tc1, ... tck); ++fuse.index) { + /// if (fuse.index < tc0){ + /// iv0 = lb0 + st0 * fuse.index; + /// original.index0 = iv0 + /// body(0); + /// } + /// if (fuse.index < tc1){ + /// iv1 = lb1 + st1 * fuse.index; + /// original.index1 = iv1 + /// body(1); + /// } + /// + /// ... + /// + /// if (fuse.index < tck){ + /// ivk = lbk + stk * fuse.index; + /// original.indexk = ivk + /// body(k); + /// } + /// } + /// \endcode + /// + /// + /// @param DL Debug location for instructions added by fusion. + /// + /// @param Loops Loops to fuse. The CanonicalLoopInfo objects are + /// invalidated by this method, i.e. should not used after + /// fusion. + /// + /// \returns A single loop generated by the loop fusion + LLVM_ABI CanonicalLoopInfo *fuseLoops(DebugLoc DL, + ArrayRef Loops); + /// Fully unroll a loop. /// /// Instead of unrolling the loop immediately (and duplicating its body diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index ac86fa859967e..a3cb98456b249 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -5806,6 +5806,117 @@ static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, } } +CanonicalLoopInfo * +OpenMPIRBuilder::fuseLoops(DebugLoc DL, ArrayRef Loops) { + + CanonicalLoopInfo *firstLoop = Loops.front(); + CanonicalLoopInfo *lastLoop = Loops.back(); + Function *F = firstLoop->getPreheader()->getParent(); + + // Loop control blocks that will become orphaned later + SmallVector oldControlBBs; + for (CanonicalLoopInfo *Loop : Loops) + Loop->collectControlBlocks(oldControlBBs); + + // Collect original trip counts + SmallVector origTripCounts; + for (CanonicalLoopInfo *L : Loops) { + assert(L->isValid() && "All input loops must be valid canonical loops"); + origTripCounts.push_back(L->getTripCount()); + } + + Builder.SetCurrentDebugLocation(DL); + + // Compute max trip count. + // The fused loop will be from 0 to max(origTripCounts) + BasicBlock *TCBlock = BasicBlock::Create(F->getContext(), "omp.fuse.comp.tc", + F, firstLoop->getHeader()); + Builder.SetInsertPoint(TCBlock); + Value *fusedTripCount = nullptr; + for (CanonicalLoopInfo *L : Loops) { + assert(L->isValid() && "All loops to fuse must be valid canonical loops"); + Value *origTripCount = L->getTripCount(); + if (!fusedTripCount) { + fusedTripCount = origTripCount; + continue; + } + Value *condTP = Builder.CreateICmpSGT(fusedTripCount, origTripCount); + fusedTripCount = Builder.CreateSelect(condTP, fusedTripCount, origTripCount, + Twine(".omp.fuse.tc")); + } + + // Generate new loop + CanonicalLoopInfo *fused = + createLoopSkeleton(DL, fusedTripCount, F, firstLoop->getBody(), + lastLoop->getLatch(), "fused"); + + // Replace original loops with the fused loop + // Preheader and After are not considered inside the CLI. + // These are used to compute the individual TCs of the loops + // so they have to be put before the resulting fused loop. + // Moving them up for readability. + for (size_t i = 0; i < Loops.size() - 1; ++i) { + Loops[i]->getPreheader()->moveBefore(TCBlock); + Loops[i]->getAfter()->moveBefore(TCBlock); + } + lastLoop->getPreheader()->moveBefore(TCBlock); + + for (size_t i = 0; i < Loops.size() - 1; ++i) { + redirectTo(Loops[i]->getPreheader(), Loops[i]->getAfter(), DL); + redirectTo(Loops[i]->getAfter(), Loops[i + 1]->getPreheader(), DL); + } + redirectTo(lastLoop->getPreheader(), TCBlock, DL); + redirectTo(TCBlock, fused->getPreheader(), DL); + redirectTo(fused->getAfter(), lastLoop->getAfter(), DL); + + // Build the fused body + // Create new Blocks with conditions that jump to the original loop bodies + SmallVector condBBs; + SmallVector condValues; + for (size_t i = 0; i < Loops.size(); ++i) { + BasicBlock *condBlock = BasicBlock::Create( + F->getContext(), "omp.fused.inner.cond", F, Loops[i]->getBody()); + Builder.SetInsertPoint(condBlock); + Value *condValue = + Builder.CreateICmpSLT(fused->getIndVar(), origTripCounts[i]); + condBBs.push_back(condBlock); + condValues.push_back(condValue); + } + // Join the condition blocks with the bodies of the original loops + redirectTo(fused->getBody(), condBBs[0], DL); + for (size_t i = 0; i < Loops.size() - 1; ++i) { + Builder.SetInsertPoint(condBBs[i]); + Builder.CreateCondBr(condValues[i], Loops[i]->getBody(), condBBs[i + 1]); + redirectAllPredecessorsTo(Loops[i]->getLatch(), condBBs[i + 1], DL); + // Replace the IV with the fused IV + Loops[i]->getIndVar()->replaceAllUsesWith(fused->getIndVar()); + } + // Last body jumps to the created end body block + Builder.SetInsertPoint(condBBs.back()); + Builder.CreateCondBr(condValues.back(), lastLoop->getBody(), + fused->getLatch()); + redirectAllPredecessorsTo(lastLoop->getLatch(), fused->getLatch(), DL); + // Replace the IV with the fused IV + lastLoop->getIndVar()->replaceAllUsesWith(fused->getIndVar()); + + // The loop latch must have only one predecessor. Currently it is branched to + // from both the last condition block and the last loop body + fused->getLatch()->splitBasicBlock(fused->getLatch()->begin(), + "omp.fused.pre_latch", /*Before=*/true); + + // Remove unused parts + removeUnusedBlocksFromParent(oldControlBBs); + + // Invalidate old CLIs + for (CanonicalLoopInfo *L : Loops) + L->invalidate(); + +#ifndef NDEBUG + fused->assertOK(); +#endif + return fused; +} + void OpenMPIRBuilder::unrollLoopFull(DebugLoc, CanonicalLoopInfo *Loop) { LLVMContext &Ctx = Builder.getContext(); addLoopMetadata( diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 377f1febf6b8f..2752c2a806847 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -550,6 +550,40 @@ def TileOp : OpenMPTransformBase_Op<"tile", let hasVerifier = 1; } +//===----------------------------------------------------------------------===// +// OpenMP fuse operation +//===----------------------------------------------------------------------===// + +def FuseOp : OpenMPTransformBase_Op<"fuse"> { + let summary = "OpenMP fuse operation"; + let description = [{ + Represents the OpenMP fuse directive introduced in OpenMP 6.0. + + The construct takes a loop sequence and merges the loops specifed by the + first and count attributes and generates a loop sequence with the loops + before the first attribute untouched, the generated fused loop, and the loops + after the the first + count attribute untouched mantaining the orignal + order. If no attributes are specified all the loops in the sequence are + fused generating a single loop. + Each logical iteration of the fused loop executes a logical iteration of + each affected loop. The fused loop has the number of logical iterations + equal to the affected loop with most logical iterations. + + The first and count attributes are constant and known beforehand. + }]#clausesDescription; + + let extraClassDeclaration = [{ + IntegerAttr getFirst() { + return this->getOperation()->getAttrOfType("first"); + } + IntegerAttr getCount() { + return this->getOperation()->getAttrOfType("count"); + } + }]#clausesExtraClassDeclaration; + + let hasVerifier = 1; +} + //===----------------------------------------------------------------------===// // 2.8.3 Workshare Construct //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 1b069c62a8be9..8373a18df281a 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -3429,6 +3429,20 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) { .Case([&](UnrollHeuristicOp op) -> std::string { llvm_unreachable("heuristic unrolling does not generate a loop"); }) + .Case([&](FuseOp op) -> std::string { + unsigned int first = 0; + unsigned int count = 0; + if (op.getFirst() && op.getCount()) { + first = op.getFirst().getInt(); + count = op.getCount().getInt(); + } + unsigned opnum = generator->getOperandNumber(); + if ((first != 0 && opnum <= first - 1) || + (count != 0 && opnum >= first + 1)) + return "canonloop_fuse"; + else + return "fused"; + }) .Case([&](TileOp op) -> std::string { auto [generateesFirst, generateesCount] = op.getGenerateesODSOperandIndexAndLength(); @@ -3804,6 +3818,60 @@ std::pair TileOp::getGenerateesODSOperandIndexAndLength() { return getODSOperandIndexAndLength(odsIndex_generatees); } +//===----------------------------------------------------------------------===// +// FuseOp +//===----------------------------------------------------------------------===// + +static void printLoopTransformClis(OpAsmPrinter &p, FuseOp op, + OperandRange generatees, + OperandRange applyees) { + if (!generatees.empty()) + p << '(' << llvm::interleaved(generatees) << ')'; + + if (!applyees.empty()) + p << " <- (" << llvm::interleaved(applyees) << ')'; +} + +LogicalResult FuseOp::verify() { + if (getApplyees().size() < 2) + return emitOpError() << "must apply to at least two loops"; + + if (getFirst() && getCount()) { + unsigned int first = getFirst().getInt(); + unsigned int count = getCount().getInt(); + if (first + count - 1 > getApplyees().size()) + return emitOpError() << "the numbers of applyees must be at least first " + "minus one plus count attributes"; + if (!getGeneratees().empty() && + getGeneratees().size() != getApplyees().size() + 1 - count) + return emitOpError() << "the number of generatees must be the number of " + "aplyees plus one minus count"; + + } else { + if (!getGeneratees().empty() && getGeneratees().size() != 1) + return emitOpError() + << "in a complete fuse the number of generatees must be exactly 1"; + } + for (auto &&applyee : getApplyees()) { + auto [create, gen, cons] = decodeCli(applyee); + + if (!gen) + return emitOpError() << "applyee CLI has no generator"; + auto loop = dyn_cast_or_null(gen->getOwner()); + if (!loop) + return emitOpError() + << "currently only supports omp.canonical_loop as applyee"; + } + return success(); +} +std::pair FuseOp ::getApplyeesODSOperandIndexAndLength() { + return getODSOperandIndexAndLength(odsIndex_applyees); +} + +std::pair FuseOp::getGenerateesODSOperandIndexAndLength() { + return getODSOperandIndexAndLength(odsIndex_generatees); +} + //===----------------------------------------------------------------------===// // Critical construct (2.17.1) //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 8edec990eaaba..e6880ce33b061 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3207,6 +3207,57 @@ static LogicalResult applyTile(omp::TileOp op, llvm::IRBuilderBase &builder, return success(); } +/// Apply a `#pragma omp fuse` / `!$omp fuse` transformation using the +/// OpenMPIRBuilder. +static LogicalResult applyFuse(omp::FuseOp op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + llvm::OpenMPIRBuilder::LocationDescription loc(builder); + + unsigned int first = 0; + unsigned int count = 0; + if (op.getFirst() && op.getCount()) { + first = op.getFirst().getInt(); + count = op.getCount().getInt(); + } + + // Select what CLIs are going to be fused + SmallVector beforeFuse, toFuse, afterFuse; + for (size_t i = 0; i < op.getApplyees().size(); i++) { + Value applyee = op.getApplyees()[i]; + llvm::CanonicalLoopInfo *consBuilderCLI = + moduleTranslation.lookupOMPLoop(applyee); + assert(applyee && "Canonical loop must already been translated"); + if (first != 0 && i < first - 1) + beforeFuse.push_back(consBuilderCLI); + else if (count != 0 && i >= first + count - 1) + afterFuse.push_back(consBuilderCLI); + else + toFuse.push_back(consBuilderCLI); + } + assert( + (op.getGeneratees().empty() || + beforeFuse.size() + afterFuse.size() + 1 == op.getGeneratees().size()) && + "Wrong number of generatees"); + + // do the fuse + auto generatedLoop = ompBuilder->fuseLoops(loc.DL, toFuse); + if (!op.getGeneratees().empty()) { + size_t i = 0; + for (; i < beforeFuse.size(); i++) + moduleTranslation.mapOmpLoop(op.getGeneratees()[i], beforeFuse[i]); + moduleTranslation.mapOmpLoop(op.getGeneratees()[i++], generatedLoop); + for (; i < afterFuse.size(); i++) + moduleTranslation.mapOmpLoop(op.getGeneratees()[i], afterFuse[i]); + } + + // CLIs can only be consumed once + for (Value applyee : op.getApplyees()) + moduleTranslation.invalidateOmpLoop(applyee); + + return success(); +} + /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. static llvm::AtomicOrdering convertAtomicOrdering(std::optional ao) { @@ -6288,6 +6339,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, .Case([&](omp::TileOp op) { return applyTile(op, builder, moduleTranslation); }) + .Case([&](omp::FuseOp op) { + return applyFuse(op, builder, moduleTranslation); + }) .Case([&](omp::TargetAllocMemOp) { return convertTargetAllocMemOp(*op, builder, moduleTranslation); }) diff --git a/mlir/test/Dialect/OpenMP/cli-fuse.mlir b/mlir/test/Dialect/OpenMP/cli-fuse.mlir new file mode 100644 index 0000000000000..284b8c914ae1f --- /dev/null +++ b/mlir/test/Dialect/OpenMP/cli-fuse.mlir @@ -0,0 +1,114 @@ +// RUN: mlir-opt %s | FileCheck %s --enable-var-scope +// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope + + +// Raw syntax check (MLIR output is always pretty-printed) +// CHECK-LABEL: @omp_fuse_raw( +// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32) { +func.func @omp_fuse_raw(%tc1 : i32, %tc2 : i32) -> () { + // CHECK-NEXT: %canonloop_s0 = omp.new_cli + %canonloop_s0 = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: %canonloop_s1 = omp.new_cli + %canonloop_s1 = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: %fused = omp.new_cli + %fused = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc1]]) { + "omp.canonical_loop" (%tc1, %canonloop_s0) ({ + ^bb0(%iv_s0: i32): + // CHECK: omp.terminator + omp.terminator + }) : (i32, !omp.cli) -> () + // CHECK: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc2]]) { + "omp.canonical_loop" (%tc2, %canonloop_s1) ({ + ^bb0(%iv_s1: i32): + // CHECK: omp.terminator + omp.terminator + }) : (i32, !omp.cli) -> () + // CHECK: omp.fuse (%fused) <- (%canonloop_s0, %canonloop_s1) + "omp.fuse"(%fused, %canonloop_s0, %canonloop_s1) <{operandSegmentSizes = array}> : (!omp.cli, !omp.cli, !omp.cli) -> () + return +} + +// Pretty syntax check +// CHECK-LABEL: @omp_fuse_pretty( +// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32) { +func.func @omp_fuse_pretty(%tc1 : i32, %tc2 : i32) -> () { + // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli + %canonloop_s0 = omp.new_cli + // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli + %canonloop_s1 = omp.new_cli + // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli + %fused = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc1]]) { + omp.canonical_loop (%canonloop_s0) %iv_s0 : i32 in range(%tc1) { + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc2]]) { + omp.canonical_loop (%canonloop_s1) %iv_s1 : i32 in range(%tc2) { + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.fuse (%fused) <- (%canonloop_s0, %canonloop_s1) + omp.fuse(%fused) <- (%canonloop_s0, %canonloop_s1) + return +} + +// Specifying the generatees for omp.fuse is optional +// CHECK-LABEL: @omp_fuse_optionalgen_pretty( +// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32) { +func.func @omp_fuse_optionalgen_pretty(%tc1 : i32, %tc2 : i32) -> () { + // CHECK-NEXT: %canonloop_s0 = omp.new_cli + %canonloop_s0 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc1]]) { + omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc1) { + // CHECK: omp.terminator + omp.terminator + } + // CHECK: %canonloop_s1 = omp.new_cli + %canonloop_s1 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc2]]) { + omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc2) { + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.fuse <- (%canonloop_s0, %canonloop_s1) + omp.fuse <- (%canonloop_s0, %canonloop_s1) + return +} + +// Fuse with looprange attributes +// CHECK-LABEL: @omp_fuse_looprange( +// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32, %[[tc3:.+]]: i32) { +func.func @omp_fuse_looprange(%tc1 : i32, %tc2 : i32, %tc3 : i32) -> () { + // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli + %canonloop_s0 = omp.new_cli + // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli + %canonloop_s1 = omp.new_cli + // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli + %canonloop_s2 = omp.new_cli + // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli + %canonloop_fuse = omp.new_cli + // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli + %fused = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc1]]) { + omp.canonical_loop (%canonloop_s0) %iv_s0 : i32 in range(%tc1) { + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc2]]) { + omp.canonical_loop (%canonloop_s1) %iv_s1 : i32 in range(%tc2) { + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%[[tc3]]) { + omp.canonical_loop (%canonloop_s2) %iv_s2 : i32 in range(%tc3) { + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.fuse (%canonloop_fuse, %fused) <- (%canonloop_s0, + // %canonloop_s1, %canonloop_s2) {count = 2 : i32, first = 1 : i32} + omp.fuse(%fused, %canonloop_fuse) <- (%canonloop_s0, %canonloop_s1, %canonloop_s2) {count = 2 : i32, first = 1 : i32} + return +} + diff --git a/mlir/test/Dialect/OpenMP/invalid-fuse.mlir b/mlir/test/Dialect/OpenMP/invalid-fuse.mlir new file mode 100644 index 0000000000000..d763ffcea71a2 --- /dev/null +++ b/mlir/test/Dialect/OpenMP/invalid-fuse.mlir @@ -0,0 +1,100 @@ +// RUN: mlir-opt -split-input-file -verify-diagnostics %s + + +func.func @no_loops(%tc1 : i32, %tc2 : i32) { + // expected-error@+1 {{'omp.fuse' op must apply to at least two loops}} + omp.fuse <-() + + return +} + +// ----- + +func.func @one_loop(%tc1 : i32, %tc2 : i32) { + %canonloop = omp.new_cli + omp.canonical_loop(%canonloop) %iv : i32 in range(%tc1) { + omp.terminator + } + // expected-error@+1 {{'omp.fuse' op must apply to at least two loops}} + omp.fuse <-(%canonloop) + + return +} + +// ----- + +func.func @missing_generator(%tc1 : i32, %tc2 : i32) { + // expected-error@+1 {{'omp.new_cli' op CLI has no generator}} + %canonloop = omp.new_cli + + // expected-note@+1 {{see consumer here: "omp.fuse"(%0) <{operandSegmentSizes = array}> : (!omp.cli) -> ()}} + omp.fuse <-(%canonloop) + + return +} + +// ----- + +func.func @wrong_generatees1(%tc1 : i32, %tc2 : i32) { + %canonloop1 = omp.new_cli + %canonloop2 = omp.new_cli + omp.canonical_loop(%canonloop1) %iv : i32 in range(%tc1) { + omp.terminator + } + omp.canonical_loop(%canonloop2) %iv : i32 in range(%tc2) { + omp.terminator + } + + %fused1 = omp.new_cli + %fused2 = omp.new_cli + // expected-error@+1 {{'omp.fuse' op in a complete fuse the number of generatees must be exactly 1}} + omp.fuse (%fused1, %fused2) <-(%canonloop1, %canonloop2) + + llvm.return +} + +// ----- + +func.func @wrong_generatees2(%tc1 : i32, %tc2 : i32, %tc3 : i32) { + %canonloop1 = omp.new_cli + %canonloop2 = omp.new_cli + %canonloop3 = omp.new_cli + omp.canonical_loop(%canonloop1) %iv : i32 in range(%tc1) { + omp.terminator + } + omp.canonical_loop(%canonloop2) %iv : i32 in range(%tc2) { + omp.terminator + } + omp.canonical_loop(%canonloop3) %iv : i32 in range(%tc3) { + omp.terminator + } + + %fused = omp.new_cli + // expected-error@+1 {{'omp.fuse' op the number of generatees must be the number of aplyees plus one minus count}} + omp.fuse (%fused) <-(%canonloop1, %canonloop2, %canonloop3) {first = 1 : i32, count = 2 : i32} + + llvm.return +} + +func.func @wrong_applyees(%tc1 : i32, %tc2 : i32, %tc3 : i32) { + %canonloop1 = omp.new_cli + %canonloop2 = omp.new_cli + %canonloop3 = omp.new_cli + omp.canonical_loop(%canonloop1) %iv : i32 in range(%tc1) { + omp.terminator + } + omp.canonical_loop(%canonloop2) %iv : i32 in range(%tc2) { + omp.terminator + } + omp.canonical_loop(%canonloop3) %iv : i32 in range(%tc3) { + omp.terminator + } + + %fused = omp.new_cli + %canonloop_fuse = omp.new_cli + // expected-error@+1 {{'omp.fuse' op the numbers of applyees must be at least first minus one plus count attributes}} + omp.fuse (%fused, %canonloop_fuse) <-(%canonloop1, %canonloop2, %canonloop3) {first = 1 : i32, count = 5 : i32} + + llvm.return +} + diff --git a/mlir/test/Target/LLVMIR/openmp-cli-fuse01.mlir b/mlir/test/Target/LLVMIR/openmp-cli-fuse01.mlir new file mode 100644 index 0000000000000..0754572b24771 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-cli-fuse01.mlir @@ -0,0 +1,100 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --enable-var-scope + + +llvm.func @fuse_trivial_loops(%baseptr: !llvm.ptr, %tc1: i32, %tc2: i32) -> () { + %literal_cli1 = omp.new_cli + omp.canonical_loop(%literal_cli1) %iv1 : i32 in range(%tc1) { + %ptr = llvm.getelementptr inbounds %baseptr[%iv1] : (!llvm.ptr, i32) -> !llvm.ptr, f32 + %val = llvm.mlir.constant(42.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + %literal_cli2 = omp.new_cli + omp.canonical_loop(%literal_cli2) %iv2 : i32 in range(%tc2) { + %ptr = llvm.getelementptr inbounds %baseptr[%iv2] : (!llvm.ptr, i32) -> !llvm.ptr, f32 + %val = llvm.mlir.constant(21.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + omp.fuse <- (%literal_cli1, %literal_cli2) + llvm.return +} + +// CHECK-LABEL: define void @fuse_trivial_loops( +// CHECK-SAME: ptr %[[VAL_11:.+]], i32 %[[VAL_5:.+]], i32 %[[VAL_16:.+]]) { +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_AFTER]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER1:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER1]]: +// CHECK-NEXT: br label %[[OMP_FUSE_COMP_TC:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSE_COMP_TC]]: +// CHECK-NEXT: %[[VAL_15:.+]] = icmp sgt i32 %[[VAL_5:.+]], %[[VAL_16:.+]] +// CHECK-NEXT: %[[VAL_17:.+]] = select i1 %[[VAL_15:.+]], i32 %[[VAL_5:.+]], i32 %[[VAL_16:.+]] +// CHECK-NEXT: br label %[[OMP_FUSED_PREHEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_PREHEADER]]: +// CHECK-NEXT: br label %[[OMP_FUSED_HEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_HEADER]]: +// CHECK-NEXT: %[[VAL_4:.+]] = phi i32 [ 0, %[[VAL_18:.+]] ], [ %[[VAL_27:.+]], %[[VAL_26:.+]] ] +// CHECK-NEXT: br label %[[OMP_FUSED_COND:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_COND]]: +// CHECK-NEXT: %[[VAL_29:.+]] = icmp ult i32 %[[VAL_4:.+]], %[[VAL_17:.+]] +// CHECK-NEXT: br i1 %[[VAL_29:.+]], label %[[OMP_FUSED_BODY:.+]], label %[[OMP_FUSED_EXIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_BODY]]: +// CHECK-NEXT: br label %[[OMP_FUSED_INNER_COND:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_INNER_COND]]: +// CHECK-NEXT: %[[VAL_3:.+]] = icmp slt i32 %[[VAL_4:.+]], %[[VAL_5:.+]] +// CHECK-NEXT: br i1 %[[VAL_3:.+]], label %[[OMP_OMP_LOOP_BODY:.+]], label %[[OMP_FUSED_INNER_COND13:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_BODY]]: +// CHECK-NEXT: br label %[[OMP_LOOP_REGION:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_REGION]]: +// CHECK-NEXT: %[[VAL_10:.+]] = getelementptr inbounds float, ptr %[[VAL_11:.+]], i32 %[[VAL_4:.+]] +// CHECK-NEXT: store float 4.200000e+01, ptr %[[VAL_10:.+]], align 4 +// CHECK-NEXT: br label %[[OMP_REGION_CONT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_REGION_CONT]]: +// CHECK-NEXT: br label %[[OMP_FUSED_INNER_COND13:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_INNER_COND13]]: +// CHECK-NEXT: %[[VAL_19:.+]] = icmp slt i32 %[[VAL_4:.+]], %[[VAL_16:.+]] +// CHECK-NEXT: br i1 %[[VAL_19:.+]], label %[[OMP_OMP_LOOP_BODY4:.+]], label %[[OMP_FUSED_PRE_LATCH:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_BODY4]]: +// CHECK-NEXT: br label %[[OMP_LOOP_REGION12:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_REGION12]]: +// CHECK-NEXT: %[[VAL_23:.+]] = getelementptr inbounds float, ptr %[[VAL_11:.+]], i32 %[[VAL_4:.+]] +// CHECK-NEXT: store float 2.100000e+01, ptr %[[VAL_23:.+]], align 4 +// CHECK-NEXT: br label %[[OMP_REGION_CONT11:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_REGION_CONT11]]: +// CHECK-NEXT: br label %[[OMP_FUSED_PRE_LATCH:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_PRE_LATCH]]: +// CHECK-NEXT: br label %[[OMP_FUSED_INC:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_INC]]: +// CHECK-NEXT: %[[VAL_27:.+]] = add nuw i32 %[[VAL_4:.+]], 1 +// CHECK-NEXT: br label %[[OMP_FUSED_HEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_EXIT]]: +// CHECK-NEXT: br label %[[OMP_FUSED_AFTER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_AFTER]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER7:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_AFTER7]]: +// CHECK-NEXT: ret void + diff --git a/mlir/test/Target/LLVMIR/openmp-cli-fuse02.mlir b/mlir/test/Target/LLVMIR/openmp-cli-fuse02.mlir new file mode 100644 index 0000000000000..0032bd86501d0 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-cli-fuse02.mlir @@ -0,0 +1,140 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --enable-var-scope + + +llvm.func @fuse_looprange_loops(%baseptr: !llvm.ptr, %tc1: i32, %tc2: i32, %tc3: i32) -> () { + %literal_cli1 = omp.new_cli + omp.canonical_loop(%literal_cli1) %iv1 : i32 in range(%tc1) { + %ptr = llvm.getelementptr inbounds %baseptr[%iv1] : (!llvm.ptr, i32) -> !llvm.ptr, f32 + %val = llvm.mlir.constant(42.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + %literal_cli2 = omp.new_cli + omp.canonical_loop(%literal_cli2) %iv2 : i32 in range(%tc2) { + %ptr = llvm.getelementptr inbounds %baseptr[%iv2] : (!llvm.ptr, i32) -> !llvm.ptr, f32 + %val = llvm.mlir.constant(21.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + %literal_cli3 = omp.new_cli + omp.canonical_loop(%literal_cli3) %iv3 : i32 in range(%tc3) { + %ptr = llvm.getelementptr inbounds %baseptr[%iv3] : (!llvm.ptr, i32) -> !llvm.ptr, f32 + %val = llvm.mlir.constant(63.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + omp.fuse <- (%literal_cli1, %literal_cli2, %literal_cli3) {first = 1 : i32, count = 2 : i32} + llvm.return +} + + +// CHECK-LABEL: define void @fuse_looprange_loops( +// CHECK-SAME: ptr %[[VAL_23:.+]], i32 %[[VAL_5:.+]], i32 %[[VAL_6:.+]], i32 %[[VAL_40:.+]]) { +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_AFTER]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER1:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER1]]: +// CHECK-NEXT: br label %[[OMP_FUSE_COMP_TC:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSE_COMP_TC]]: +// CHECK-NEXT: %[[VAL_4:.+]] = icmp sgt i32 %[[VAL_5:.+]], %[[VAL_6:.+]] +// CHECK-NEXT: %[[VAL_7:.+]] = select i1 %[[VAL_4:.+]], i32 %[[VAL_5:.+]], i32 %[[VAL_6:.+]] +// CHECK-NEXT: br label %[[OMP_FUSED_PREHEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_PREHEADER]]: +// CHECK-NEXT: br label %[[OMP_FUSED_HEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_HEADER]]: +// CHECK-NEXT: %[[VAL_11:.+]] = phi i32 [ 0, %[[VAL_8:.+]] ], [ %[[VAL_12:.+]], %[[VAL_10:.+]] ] +// CHECK-NEXT: br label %[[OMP_FUSED_COND:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_COND]]: +// CHECK-NEXT: %[[VAL_14:.+]] = icmp ult i32 %[[VAL_11:.+]], %[[VAL_7:.+]] +// CHECK-NEXT: br i1 %[[VAL_14:.+]], label %[[OMP_FUSED_BODY:.+]], label %[[OMP_FUSED_EXIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_BODY]]: +// CHECK-NEXT: br label %[[OMP_FUSED_INNER_COND:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_INNER_COND]]: +// CHECK-NEXT: %[[VAL_18:.+]] = icmp slt i32 %[[VAL_11:.+]], %[[VAL_5:.+]] +// CHECK-NEXT: br i1 %[[VAL_18:.+]], label %[[OMP_OMP_LOOP_BODY:.+]], label %[[OMP_FUSED_INNER_COND25:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_BODY]]: +// CHECK-NEXT: br label %[[OMP_LOOP_REGION:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_REGION]]: +// CHECK-NEXT: %[[VAL_22:.+]] = getelementptr inbounds float, ptr %[[VAL_23:.+]], i32 %[[VAL_11:.+]] +// CHECK-NEXT: store float 4.200000e+01, ptr %[[VAL_22:.+]], align 4 +// CHECK-NEXT: br label %[[OMP_REGION_CONT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_REGION_CONT]]: +// CHECK-NEXT: br label %[[OMP_FUSED_INNER_COND25:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_INNER_COND25]]: +// CHECK-NEXT: %[[VAL_25:.+]] = icmp slt i32 %[[VAL_11:.+]], %[[VAL_6:.+]] +// CHECK-NEXT: br i1 %[[VAL_25:.+]], label %[[OMP_OMP_LOOP_BODY4:.+]], label %[[OMP_FUSED_PRE_LATCH:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_BODY4]]: +// CHECK-NEXT: br label %[[OMP_LOOP_REGION12:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_REGION12]]: +// CHECK-NEXT: %[[VAL_29:.+]] = getelementptr inbounds float, ptr %[[VAL_23:.+]], i32 %[[VAL_11:.+]] +// CHECK-NEXT: store float 2.100000e+01, ptr %[[VAL_29:.+]], align 4 +// CHECK-NEXT: br label %[[OMP_REGION_CONT11:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_REGION_CONT11]]: +// CHECK-NEXT: br label %[[OMP_FUSED_PRE_LATCH:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_PRE_LATCH]]: +// CHECK-NEXT: br label %[[OMP_FUSED_INC:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_INC]]: +// CHECK-NEXT: %[[VAL_12:.+]] = add nuw i32 %[[VAL_11:.+]], 1 +// CHECK-NEXT: br label %[[OMP_FUSED_HEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_EXIT]]: +// CHECK-NEXT: br label %[[OMP_FUSED_AFTER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_FUSED_AFTER]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER7:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_AFTER7]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER13:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER13]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_HEADER14:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_HEADER14]]: +// CHECK-NEXT: %[[VAL_36:.+]] = phi i32 [ 0, %[[VAL_33:.+]] ], [ %[[VAL_37:.+]], %[[VAL_35:.+]] ] +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_COND15:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_COND15]]: +// CHECK-NEXT: %[[VAL_39:.+]] = icmp ult i32 %[[VAL_36:.+]], %[[VAL_40:.+]] +// CHECK-NEXT: br i1 %[[VAL_39:.+]], label %[[OMP_OMP_LOOP_BODY16:.+]], label %[[OMP_OMP_LOOP_EXIT18:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_BODY16]]: +// CHECK-NEXT: br label %[[OMP_LOOP_REGION24:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_REGION24]]: +// CHECK-NEXT: %[[VAL_44:.+]] = getelementptr inbounds float, ptr %[[VAL_23:.+]], i32 %[[VAL_36:.+]] +// CHECK-NEXT: store float 6.300000e+01, ptr %[[VAL_44:.+]], align 4 +// CHECK-NEXT: br label %[[OMP_REGION_CONT23:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_REGION_CONT23]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_INC17:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_INC17]]: +// CHECK-NEXT: %[[VAL_37:.+]] = add nuw i32 %[[VAL_36:.+]], 1 +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_HEADER14:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_EXIT18]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER19:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_AFTER19]]: +// CHECK-NEXT: ret void + diff --git a/openmp/runtime/test/transform/fuse/do-looprange.f90 b/openmp/runtime/test/transform/fuse/do-looprange.f90 new file mode 100644 index 0000000000000..8c62b24c4744f --- /dev/null +++ b/openmp/runtime/test/transform/fuse/do-looprange.f90 @@ -0,0 +1,60 @@ +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program fuse_full + implicit none + integer i, j, k, u + + print *, 'do' + + !$OMP FUSE LOOPRANGE(2,2) + do i=5, 25, 5 + print '("i=", I0)', i + end do + do j=10, 100, 10 + print '("j=", I0)', j + end do + do k=10, 0, -1 + print '("k=", I0)', k + end do + do u=5, 25, 5 + print '("u=", I0)', u + end do + !$OMP END FUSE + + print *, 'done' +end program + +! CHECK: do +! CHECK-NEXT: i=5 +! CHECK-NEXT: i=10 +! CHECK-NEXT: i=15 +! CHECK-NEXT: i=20 +! CHECK-NEXT: i=25 +! CHECK-NEXT: j=10 +! CHECK-NEXT: k=10 +! CHECK-NEXT: j=20 +! CHECK-NEXT: k=9 +! CHECK-NEXT: j=30 +! CHECK-NEXT: k=8 +! CHECK-NEXT: j=40 +! CHECK-NEXT: k=7 +! CHECK-NEXT: j=50 +! CHECK-NEXT: k=6 +! CHECK-NEXT: j=60 +! CHECK-NEXT: k=5 +! CHECK-NEXT: j=70 +! CHECK-NEXT: k=4 +! CHECK-NEXT: j=80 +! CHECK-NEXT: k=3 +! CHECK-NEXT: j=90 +! CHECK-NEXT: k=2 +! CHECK-NEXT: j=100 +! CHECK-NEXT: k=1 +! CHECK-NEXT: k=0 +! CHECK-NEXT: u=5 +! CHECK-NEXT: u=10 +! CHECK-NEXT: u=15 +! CHECK-NEXT: u=20 +! CHECK-NEXT: u=25 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/fuse/do.f90 b/openmp/runtime/test/transform/fuse/do.f90 new file mode 100644 index 0000000000000..d4496bce4d723 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/do.f90 @@ -0,0 +1,52 @@ +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program fuse_full + implicit none + integer i, j, k + + print *, 'do' + + !$OMP FUSE + do i=5, 25, 5 + print '("i=", I0)', i + end do + do j=10, 100, 10 + print '("j=", I0)', j + end do + do k=10, 0, -1 + print '("k=", I0)', k + end do + !$OMP END FUSE + + print *, 'done' +end program + +! CHECK: do +! CHECK-NEXT: i=5 +! CHECK-NEXT: j=10 +! CHECK-NEXT: k=10 +! CHECK-NEXT: i=10 +! CHECK-NEXT: j=20 +! CHECK-NEXT: k=9 +! CHECK-NEXT: i=15 +! CHECK-NEXT: j=30 +! CHECK-NEXT: k=8 +! CHECK-NEXT: i=20 +! CHECK-NEXT: j=40 +! CHECK-NEXT: k=7 +! CHECK-NEXT: i=25 +! CHECK-NEXT: j=50 +! CHECK-NEXT: k=6 +! CHECK-NEXT: j=60 +! CHECK-NEXT: k=5 +! CHECK-NEXT: j=70 +! CHECK-NEXT: k=4 +! CHECK-NEXT: j=80 +! CHECK-NEXT: k=3 +! CHECK-NEXT: j=90 +! CHECK-NEXT: k=2 +! CHECK-NEXT: j=100 +! CHECK-NEXT: k=1 +! CHECK-NEXT: k=0 +! CHECK-NEXT: done