diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 0e07e6fcaac9d..a6a19d41e99ce 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -258,7 +258,7 @@ def fir_FreeMemOp : fir_Op<"freemem", [MemoryEffects<[MemFree]>]> { let assemblyFormat = "$heapref attr-dict `:` qualified(type($heapref))"; } -def fir_LoadOp : fir_OneResultOp<"load", [MemoryEffects<[MemRead]>]> { +def fir_LoadOp : fir_OneResultOp<"load", []> { let summary = "load a value from a memory reference"; let description = [{ Load a value from a memory reference into an ssa-value (virtual register). @@ -285,7 +285,7 @@ def fir_LoadOp : fir_OneResultOp<"load", [MemoryEffects<[MemRead]>]> { }]; } -def fir_StoreOp : fir_Op<"store", [MemoryEffects<[MemWrite]>]> { +def fir_StoreOp : fir_Op<"store", []> { let summary = "store an SSA-value to a memory location"; let description = [{ @@ -693,7 +693,7 @@ def fir_UnreachableOp : fir_Op<"unreachable", [Terminator]> { } -def fir_FirEndOp : fir_Op<"end", [Terminator]> { +def fir_FirEndOp : fir_Op<"end", [Terminator, NoMemoryEffect]> { let summary = "the end instruction"; let description = [{ diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td index 924e868d32afd..0aed2778698d3 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -170,7 +170,7 @@ def fir_AssignOp : hlfir_Op<"assign", [MemoryEffects<[MemWrite]>]> { } def hlfir_DesignateOp : hlfir_Op<"designate", [AttrSizedOperandSegments, - DeclareOpInterfaceMethods]> { + DeclareOpInterfaceMethods, NoMemoryEffect]> { let summary = "Designate a Fortran variable"; let description = [{ @@ -593,7 +593,7 @@ def hlfir_NoReassocOp : hlfir_Op<"no_reassoc", [NoMemoryEffect, SameOperandsAndR let assemblyFormat = "$val attr-dict `:` type($val)"; } -def hlfir_ElementalOp : hlfir_Op<"elemental", []> { +def hlfir_ElementalOp : hlfir_Op<"elemental", [RecursiveMemoryEffects]> { let summary = "elemental expression"; let description = [{ Represent an elemental expression as a function of the indices. @@ -652,7 +652,7 @@ def hlfir_ElementalOp : hlfir_Op<"elemental", []> { } -def hlfir_YieldElementOp : hlfir_Op<"yield_element", [Terminator, HasParent<"ElementalOp">]> { +def hlfir_YieldElementOp : hlfir_Op<"yield_element", [Terminator, HasParent<"ElementalOp">, Pure]> { let summary = "Yield the elemental value in an ElementalOp"; let description = [{ Yield the element value of the current elemental expression iteration @@ -717,7 +717,7 @@ def hlfir_NullOp : hlfir_Op<"null", [NoMemoryEffect, fir_FortranVariableOpInterf }]; } -def hlfir_DestroyOp : hlfir_Op<"destroy", []> { +def hlfir_DestroyOp : hlfir_Op<"destroy", [MemoryEffects<[MemFree]>]> { let summary = "Mark the last use of an hlfir.expr"; let description = [{ Mark the last use of an hlfir.expr. This will be the point at which the @@ -913,6 +913,11 @@ def hlfir_OrderedAssignmentTreeOpInterface : OpInterface<"OrderedAssignmentTreeO let extraClassDeclaration = [{ /// Interface verifier imlementation. mlir::LogicalResult verifyImpl(); + + mlir::Block* getSubTreeBlock() { + mlir::Region* region = getSubTreeRegion(); + return region && !region->empty()? ®ion->front() : nullptr; + } }]; let verify = [{ @@ -987,7 +992,8 @@ def hlfir_RegionAssignOp : hlfir_Op<"region_assign", [hlfir_OrderedAssignmentTre def hlfir_YieldOp : hlfir_Op<"yield", [Terminator, ParentOneOf<["RegionAssignOp", "ElementalAddrOp", "ForallOp", "ForallMaskOp", "WhereOp", "ElseWhereOp"]>, - SingleBlockImplicitTerminator<"fir::FirEndOp">]> { + SingleBlockImplicitTerminator<"fir::FirEndOp">, RecursivelySpeculatable, + RecursiveMemoryEffects]> { let summary = "Yield a value or variable inside a forall, where or region assignment"; @@ -1116,6 +1122,13 @@ def hlfir_ForallOp : hlfir_Op<"forall", [hlfir_OrderedAssignmentTreeOpInterface] A Fortran forall with several indices is represented as a nest of hlfir.forall. + All the regions contained in the hlfir.forall must only contain + code that is pure from a Fortran point of view, except for the + assignment effect of the hlfir.region_assign. + This matches Fortran constraint C1037, but requires the outer + controls to be evaluated outside of the hlfir.forall (these + controls may have side effects as per Fortran 2018 10.1.4 section). + Example: FORALL(I=1:10) X(I) = FOO(I) ``` hlfir.forall lb { diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.td b/flang/include/flang/Optimizer/HLFIR/Passes.td index e2cf8447dff47..493240928869e 100644 --- a/flang/include/flang/Optimizer/HLFIR/Passes.td +++ b/flang/include/flang/Optimizer/HLFIR/Passes.td @@ -28,6 +28,14 @@ def LowerHLFIRIntrinsics : Pass<"lower-hlfir-intrinsics", "::mlir::ModuleOp"> { def LowerHLFIROrderedAssignments : Pass<"lower-hlfir-ordered-assignments", "::mlir::ModuleOp"> { let summary = "Lower HLFIR ordered assignments like forall and where operations"; let constructor = "hlfir::createLowerHLFIROrderedAssignmentsPass()"; + let options = [ + Option<"tryFusingAssignments", "fuse-assignments", + "bool", /*default=*/"false", + "Fuse Forall and Where assignments in the same loop nest when legal." + "It is not clear yet if this is always beneficial. It may be best to" + "leave this to later loop optimizations." + "Hence this is off by default."> + ]; } def SimplifyHLFIRIntrinsics : Pass<"simplify-hlfir-intrinsics", "::mlir::func::FuncOp"> { diff --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt index df2e2bd68f4ed..f7e51dce11e97 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt @@ -5,6 +5,7 @@ add_flang_library(HLFIRTransforms ConvertToFIR.cpp LowerHLFIRIntrinsics.cpp LowerHLFIROrderedAssignments.cpp + ScheduleOrderedAssignments.cpp SimplifyHLFIRIntrinsics.cpp DEPENDS @@ -13,6 +14,7 @@ add_flang_library(HLFIRTransforms ${dialect_libs} LINK_LIBS + FIRAnalysis FIRDialect FIRBuilder FIRDialectSupport diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp index 85e32ffa0ec2d..a0dbd46975cdc 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp @@ -12,37 +12,76 @@ // where. // The pass lowers these operations to regular hlfir.assign, loops and, if // needed, introduces temporary storage to fulfill Fortran semantics. +// +// For each rewrite, an analysis builds an evaluation schedule, and then the +// new code is generated by following the evaluation schedule. //===----------------------------------------------------------------------===// +#include "ScheduleOrderedAssignments.h" #include "flang/Optimizer/Builder/Todo.h" -#include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" #include "mlir/Transforms/DialectConversion.h" +#include "llvm/Support/Debug.h" namespace hlfir { #define GEN_PASS_DEF_LOWERHLFIRORDEREDASSIGNMENTS #include "flang/Optimizer/HLFIR/Passes.h.inc" } // namespace hlfir -using namespace mlir; +#define DEBUG_TYPE "flang-ordered-assignment" + +// Test option only to test the scheduling part only (operations are erased +// without codegen). The only goal is to allow printing and testing the debug +// info. +static llvm::cl::opt dbgScheduleOnly( + "flang-dbg-order-assignment-schedule-only", + llvm::cl::desc("Only run ordered assignment scheduling with no codegen"), + llvm::cl::init(false)); + +/// Shared rewrite entry point for all the ordered assignment tree root +/// operations. It calls the scheduler and then apply the schedule. +static mlir::LogicalResult +rewrite(hlfir::OrderedAssignmentTreeOpInterface &root, + bool tryFusingAssignments, mlir::PatternRewriter &rewriter) { + (void)hlfir::buildEvaluationSchedule(root, tryFusingAssignments); + + LLVM_DEBUG( + /// Debug option to print the scheduling debug info without doing + /// any code generation. The operations are simply erased to avoid + /// failing and calling the rewrite patterns on nested operations. + /// The only purpose of this is to help testing scheduling without + /// having to test generated code. + if (dbgScheduleOnly) { + rewriter.eraseOp(root); + return mlir::success(); + }); + // TODO: lower to loops according to schedule. + return mlir::failure(); +} namespace { class ForallOpConversion : public mlir::OpRewritePattern { public: - explicit ForallOpConversion(mlir::MLIRContext *ctx) : OpRewritePattern{ctx} {} + explicit ForallOpConversion(mlir::MLIRContext *ctx, bool tryFusingAssignments) + : OpRewritePattern{ctx}, tryFusingAssignments{tryFusingAssignments} {} mlir::LogicalResult matchAndRewrite(hlfir::ForallOp forallOp, mlir::PatternRewriter &rewriter) const override { - TODO(forallOp.getLoc(), "FORALL construct or statement in HLFIR"); - return mlir::failure(); + auto root = mlir::cast( + forallOp.getOperation()); + if (mlir::failed(::rewrite(root, tryFusingAssignments, rewriter))) + TODO(forallOp.getLoc(), "FORALL construct or statement in HLFIR"); + return mlir::success(); } + const bool tryFusingAssignments; }; class WhereOpConversion : public mlir::OpRewritePattern { public: - explicit WhereOpConversion(mlir::MLIRContext *ctx) : OpRewritePattern{ctx} {} + explicit WhereOpConversion(mlir::MLIRContext *ctx, bool tryFusingAssignments) + : OpRewritePattern{ctx}, tryFusingAssignments{tryFusingAssignments} {} mlir::LogicalResult matchAndRewrite(hlfir::WhereOp whereOp, @@ -50,6 +89,7 @@ class WhereOpConversion : public mlir::OpRewritePattern { TODO(whereOp.getLoc(), "WHERE construct or statement in HLFIR"); return mlir::failure(); } + const bool tryFusingAssignments; }; class RegionAssignConversion @@ -84,9 +124,9 @@ class LowerHLFIROrderedAssignments // operations that can be the root of ordered assignments. The other // operations will be taken care of while rewriting these trees (they // cannot exist outside of these operations given their verifiers/traits). - patterns - .insert( - context); + patterns.insert( + context, this->tryFusingAssignments.getValue()); + patterns.insert(context); mlir::ConversionTarget target(*context); target.markUnknownOpDynamicallyLegal([](mlir::Operation *op) { return !mlir::isa(op); diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp new file mode 100644 index 0000000000000..d8c909d8387b0 --- /dev/null +++ b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp @@ -0,0 +1,664 @@ +//===- ScheduleOrderedAssignments.cpp -- Ordered Assignment Scheduling ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ScheduleOrderedAssignments.h" +#include "flang/Optimizer/Analysis/AliasAnalysis.h" +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/Todo.h" +#include "flang/Optimizer/Dialect/Support/FIRContext.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "flang-ordered-assignment" + +//===----------------------------------------------------------------------===// +// Scheduling logging utilities for debug and test +//===----------------------------------------------------------------------===// + +/// Log RAW or WAW conflict. +static void LLVM_ATTRIBUTE_UNUSED logConflict(llvm::raw_ostream &os, + mlir::Value writtenOrReadVarA, + mlir::Value writtenVarB); +/// Log when an expression evaluation must be saved. +static void LLVM_ATTRIBUTE_UNUSED logSaveEvaluation(llvm::raw_ostream &os, + unsigned runid, + mlir::Region &yieldRegion, + bool anyWrite); +/// Log when an assignment is scheduled. +static void LLVM_ATTRIBUTE_UNUSED logAssignmentEvaluation( + llvm::raw_ostream &os, unsigned runid, hlfir::RegionAssignOp assign); +/// Log when starting to schedule an order assignment tree. +static void LLVM_ATTRIBUTE_UNUSED logStartScheduling( + llvm::raw_ostream &os, hlfir::OrderedAssignmentTreeOpInterface root); +/// Log op if effect value is not known. +static void LLVM_ATTRIBUTE_UNUSED logIfUnkownEffectValue( + llvm::raw_ostream &os, mlir::MemoryEffects::EffectInstance effect, + mlir::Operation &op); + +//===----------------------------------------------------------------------===// +// Scheduling Implementation +//===----------------------------------------------------------------------===// + +namespace { +/// Structure that is in charge of building the schedule. For each +/// hlfir.region_assign inside an ordered assignment tree, it is walked through +/// the parent operations and their "leaf" regions (that contain expression +/// evaluations). The Scheduler analyze the memory effects of these regions +/// against the effect of the current assignment, and if any conflict is found, +/// it will create an action to save the value computed by the region before the +/// assignment evaluation. +class Scheduler { +public: + Scheduler(bool tryFusingAssignments) + : tryFusingAssignments{tryFusingAssignments} {} + + /// Start scheduling an assignment. Gather the write side effect from the + /// assignment. + void startSchedulingAssignment(hlfir::RegionAssignOp assign, + bool leafRegionsMayOnlyRead); + + /// Start analysing a set of evaluation regions that can be evaluated in + /// any order between themselves according to Fortran rules (like the controls + /// of forall). The point of this is to avoid adding the side effects of + /// independent evaluations to a run that would save only one of the control. + void startIndependentEvaluationGroup() { + assert(independentEvaluationEffects.empty() && + "previous group was not finished"); + }; + + /// Analyze the memory effects of a region containing an expression + /// evaluation. If any conflict is found with the current assignment, or if + /// the expression has write effects (which is possible outside of forall), + /// create an action in the schedule to save the value in the schedule before + /// evaluating the current assignment. For expression with write effect, + /// saving them ensures they are evaluated only once. A region whose value + /// was saved in a previous run is considered to have no side effects with the + /// current assignment: the saved value will be used. + void saveEvaluationIfConflict(mlir::Region &yieldRegion, + bool leafRegionsMayOnlyRead, + bool yieldIsImplicitRead = true); + + /// Finish evaluating a group of independent regions. The current independent + /// regions effects are added to the "parent" effect list since evaluating the + /// next analyzed region would require evaluating the current independent + /// regions. + void finishIndependentEvaluationGroup() { + parentEvaluationEffects.append(independentEvaluationEffects.begin(), + independentEvaluationEffects.end()); + independentEvaluationEffects.clear(); + } + + /// After all the dependent evaluation regions have been analyzed, create the + /// action to evaluate the assignment that was being analyzed. + void finishSchedulingAssignment(hlfir::RegionAssignOp assign); + + /// Once all the assignments have been analyzed and scheduled, return the + /// schedule. The scheduler object should not be used after this call. + hlfir::Schedule moveSchedule() { return std::move(schedule); } + +private: + /// Save a conflicting region that is evaluating an expression that is + /// controlling or masking the current assignment, or is evaluating the + /// RHS/LHS. + void + saveEvaluation(mlir::Region &yieldRegion, + llvm::ArrayRef effects, + bool anyWrite); + + /// Can the current assignment be schedule with the previous run. This is + /// only possible if the assignment and all of its dependencies have no side + /// effects conflicting with the previous run. + bool canFuseAssignmentWithPreviousRun(); + + /// Memory effects of the assignments being lowered. + llvm::SmallVector assignEffects; + /// Memory effects of the unsaved evaluation region that are controlling or + /// masking the current assignments. + llvm::SmallVector + parentEvaluationEffects; + /// Same as parentEvaluationEffects, but for the current "leaf group" being + /// analyzed scheduled. + llvm::SmallVector + independentEvaluationEffects; + + /// Were any region saved for the current assignment? + bool savedAnyRegionForCurrentAssignment = false; + + // Schedule being built. + hlfir::Schedule schedule; + /// Leaf regions that have been saved so far. + llvm::SmallSet savedRegions; + /// Is schedule.back() a schedule that is only saving region with read + /// effects? + bool currentRunIsReadOnly = false; + + /// Option to tell if the scheduler should try fusing to assignments in the + /// same loops. + const bool tryFusingAssignments; +}; +} // namespace + +//===----------------------------------------------------------------------===// +// Scheduling Implementation : gathering memory effects of nodes. +//===----------------------------------------------------------------------===// + +/// Is \p var the result of a ForallIndexOp? +/// Read effects to forall index can be ignored since forall +/// indices cannot be assigned to. +static bool isForallIndex(mlir::Value var) { + return var && + mlir::isa_and_nonnull(var.getDefiningOp()); +} + +/// Gather the memory effects of the operations contained in a region. +/// \p mayOnlyRead can be given to exclude some potential write effects that +/// cannot affect the current scheduling problem because it is known that the +/// regions are evaluating pure expressions from a Fortran point of view. It is +/// useful because low level IR in the region may contain operation that lacks +/// side effect interface, or that are writing temporary variables that may be +/// hard to identify as such (one would have to prove the write is "local" to +/// the region even when the alloca may be outside of the region). +static void gatherMemoryEffects( + mlir::Region ®ion, bool mayOnlyRead, + llvm::SmallVectorImpl &effects) { + /// This analysis is a simple walk of all the operations of the region that is + /// evaluating and yielding a value. This is a lot simpler and safer than + /// trying to walk back the SSA DAG from the yielded value. But if desired, + /// this could be changed. + for (mlir::Operation &op : region.getOps()) { + if (op.hasTrait()) { + for (mlir::Region &subRegion : op.getRegions()) + gatherMemoryEffects(subRegion, mayOnlyRead, effects); + // In MLIR, RecursiveMemoryEffects can be combined with + // MemoryEffectOpInterface to describe extra effects on top of the + // effects of the nested operations. However, the presence of + // RecursiveMemoryEffects and the absence of MemoryEffectOpInterface + // implies the operation has no other memory effects than the one of its + // nested operations. + if (!mlir::isa(op)) + continue; + } + mlir::MemoryEffectOpInterface interface = + mlir::dyn_cast(op); + if (!interface) { + LLVM_DEBUG(llvm::dbgs() << "unknown effect: " << op << "\n";); + // There is no generic way to know what this operation is reading/writing + // to. Assume the worst. No need to continue analyzing the code any + // further. + effects.emplace_back(mlir::MemoryEffects::Read::get()); + if (!mayOnlyRead) + effects.emplace_back(mlir::MemoryEffects::Write::get()); + return; + } + // Collect read/write effects. Alloc/Free effects do not matter, they + // are either local to the evaluation region and can be repeated, or, if + // they are allocatable/pointer allocation/deallocation, they are conveyed + // via the write that is updating the descriptor/allocatable (and there + // cannot be any indirect allocatable/pointer allocation/deallocation if + // mayOnlyRead is set). When mayOnlyRead is set, local write effects are + // also ignored. + llvm::SmallVector opEffects; + interface.getEffects(opEffects); + for (auto &effect : opEffects) + if (!isForallIndex(effect.getValue())) { + if (mlir::isa(effect.getEffect())) { + LLVM_DEBUG(logIfUnkownEffectValue(llvm::dbgs(), effect, op);); + effects.push_back(effect); + } else if (!mayOnlyRead && + mlir::isa(effect.getEffect())) { + LLVM_DEBUG(logIfUnkownEffectValue(llvm::dbgs(), effect, op);); + effects.push_back(effect); + } + } + } +} + +/// Return the entity yielded by a region, or a null value if the region +/// is not terminated by a yield. +static mlir::Value getYieldedEntity(mlir::Region ®ion) { + if (region.empty() || region.back().empty()) + return nullptr; + if (auto yield = mlir::dyn_cast(region.back().back())) + return yield.getEntity(); + return nullptr; +} + +/// Gather the effect of an assignment. This is the implicit write to the LHS +/// of an assignment. This also includes the effects of the user defined +/// assignment, if any, but this does not include the effects of evaluating the +/// RHS and LHS, which occur before the assignment effects in Fortran. +static void gatherAssignEffects( + hlfir::RegionAssignOp regionAssign, + bool userDefAssignmentMayOnlyWriteToAssignedVariable, + llvm::SmallVectorImpl &assignEffects) { + mlir::Value assignedVar = getYieldedEntity(regionAssign.getLhsRegion()); + if (!assignedVar) + TODO(regionAssign.getLoc(), + "assignment to vector subscripted entity in HLFIR"); + assignEffects.emplace_back(mlir::MemoryEffects::Write::get(), assignedVar); + + // TODO: gather the read/write effects of user defined assignments. + if (!regionAssign.getUserDefinedAssignment().empty()) + TODO(regionAssign.getLoc(), "user defined assignments"); +} + +//===----------------------------------------------------------------------===// +// Scheduling Implementation : finding conflicting memory effects. +//===----------------------------------------------------------------------===// + +/// Follow addressing and declare like operation to the storage source. +/// This allows using FIR alias analysis that otherwise does not know +/// about those operations. This is correct, but ignoring the designate +/// and declare info may yield false positive regarding aliasing (e.g, +/// if it could be proved that the variable are different sub-part of +/// an array). +static mlir::Value getStorageSource(mlir::Value var) { + // TODO: define some kind of View interface for Fortran in FIR, + // and use it in the FIR alias analysis. + mlir::Value source = var; + while (auto *op = source.getDefiningOp()) { + if (auto designate = mlir::dyn_cast(op)) { + source = designate.getMemref(); + } else if (auto declare = mlir::dyn_cast(op)) { + source = declare.getMemref(); + } else { + break; + } + } + return source; +} + +/// Could there be any read or write in effectsA on a variable written to in +/// effectsB? +static bool +anyRAWorWAW(llvm::ArrayRef effectsA, + llvm::ArrayRef effectsB, + fir::AliasAnalysis &aliasAnalysis) { + for (const auto &effectB : effectsB) + if (mlir::isa(effectB.getEffect())) { + mlir::Value writtenVarB = effectB.getValue(); + if (writtenVarB) + writtenVarB = getStorageSource(writtenVarB); + for (const auto &effectA : effectsA) + if (mlir::isa( + effectA.getEffect())) { + mlir::Value writtenOrReadVarA = effectA.getValue(); + if (!writtenVarB || !writtenOrReadVarA) { + LLVM_DEBUG( + logConflict(llvm::dbgs(), writtenOrReadVarA, writtenVarB);); + return true; // unknown conflict. + } + writtenOrReadVarA = getStorageSource(writtenOrReadVarA); + if (!aliasAnalysis.alias(writtenOrReadVarA, writtenVarB).isNo()) { + LLVM_DEBUG( + logConflict(llvm::dbgs(), writtenOrReadVarA, writtenVarB);); + return true; + } + } + } + return false; +} + +/// Could there be any read or write in effectsA on a variable written to in +/// effectsB, or any read in effectsB on a variable written to in effectsA? +static bool +conflict(llvm::ArrayRef effectsA, + llvm::ArrayRef effectsB) { + fir::AliasAnalysis aliasAnalysis; + // (RAW || WAW) || (WAR || WAW). + return anyRAWorWAW(effectsA, effectsB, aliasAnalysis) || + anyRAWorWAW(effectsB, effectsA, aliasAnalysis); +} + +/// Could there be any write effects in "effects"? +static bool +anyWrite(llvm::ArrayRef effects) { + return llvm::any_of( + effects, [](const mlir::MemoryEffects::EffectInstance &effect) { + return mlir::isa(effect.getEffect()); + }); +} + +//===----------------------------------------------------------------------===// +// Scheduling Implementation : Scheduler class implementation +//===----------------------------------------------------------------------===// + +void Scheduler::startSchedulingAssignment(hlfir::RegionAssignOp assign, + bool leafRegionsMayOnlyRead) { + gatherAssignEffects(assign, leafRegionsMayOnlyRead, assignEffects); +} + +void Scheduler::saveEvaluationIfConflict(mlir::Region &yieldRegion, + bool leafRegionsMayOnlyRead, + bool yieldIsImplicitRead) { + // If the region evaluation was previously executed and saved, the saved + // value will be used when evaluating the current assignment and this has + // no effects in the current assignment evaluation. + if (savedRegions.contains(&yieldRegion)) + return; + llvm::SmallVector effects; + gatherMemoryEffects(yieldRegion, leafRegionsMayOnlyRead, effects); + // Yield has no effect as such, but in the context of order assignments. + // The order assignments will usually read the yielded entity (except for + // the yielded assignments LHS that is only read if this is an assignment + // with a finalizer, or a user defined assignment where the LHS is + // intent(inout)). + if (yieldIsImplicitRead) { + mlir::Value entity = getYieldedEntity(yieldRegion); + if (entity && hlfir::isFortranVariableType(entity.getType())) + effects.emplace_back(mlir::MemoryEffects::Read::get(), entity); + } + if (!leafRegionsMayOnlyRead && anyWrite(effects)) { + // Region with write effect must be executed only once: save it the first + // time it is encountered. + saveEvaluation(yieldRegion, effects, /*anyWrite=*/true); + } else if (conflict(effects, assignEffects)) { + // Region that conflicts with the current assignments must be fully + // evaluated and saved before doing the assignment (Note that it may + // have already have been evaluated without saving it before, but this + // implies that it never conflicted with a prior assignment, so its value + // should be the same.) + saveEvaluation(yieldRegion, effects, /*anyWrite=*/false); + } else { + // Can be executed while doing the assignment. + independentEvaluationEffects.append(effects.begin(), effects.end()); + } +} + +void Scheduler::saveEvaluation( + mlir::Region &yieldRegion, + llvm::ArrayRef effects, + bool anyWrite) { + savedAnyRegionForCurrentAssignment = true; + if (anyWrite) { + // Create a new run just for regions with side effect. Further analysis + // could try to prove the effects do not conflict with the previous + // schedule. + schedule.emplace_back(hlfir::Run{}); + currentRunIsReadOnly = false; + } else if (!currentRunIsReadOnly) { + // For now, do not try to fuse an evaluation with a previous + // run that contains any write effects. One could try to prove + // that "effects" do not conflict with the current run assignments. + schedule.emplace_back(hlfir::Run{}); + currentRunIsReadOnly = true; + } + // Otherwise, save the yielded entity in the current run, that already + // saving other read only entities. + schedule.back().actions.emplace_back(hlfir::SaveEntity{&yieldRegion}); + // The run to save the yielded entity will need to evaluate all the unsaved + // parent control or masks. Note that these effects may already be in the + // current run memoryEffects, but it is just easier always add them, even if + // this may add them again. + schedule.back().memoryEffects.append(parentEvaluationEffects.begin(), + parentEvaluationEffects.end()); + schedule.back().memoryEffects.append(effects.begin(), effects.end()); + savedRegions.insert(&yieldRegion); + LLVM_DEBUG( + logSaveEvaluation(llvm::dbgs(), schedule.size(), yieldRegion, anyWrite);); +} + +bool Scheduler::canFuseAssignmentWithPreviousRun() { + // If a region was saved for the current assignment, the previous + // run is already known to conflict. Skip the analysis. + if (savedAnyRegionForCurrentAssignment || schedule.empty()) + return false; + auto &previousRunEffects = schedule.back().memoryEffects; + return !conflict(previousRunEffects, assignEffects) && + !conflict(previousRunEffects, parentEvaluationEffects) && + !conflict(previousRunEffects, independentEvaluationEffects); +} + +void Scheduler::finishSchedulingAssignment(hlfir::RegionAssignOp assign) { + // For now, always schedule each assignment in its own run. They could + // be done as part of previous assignment runs if it is proven they have + // no conflicting effects. + currentRunIsReadOnly = false; + if (!tryFusingAssignments || !canFuseAssignmentWithPreviousRun()) + schedule.emplace_back(hlfir::Run{}); + schedule.back().actions.emplace_back(assign); + // TODO: when fusing, it would probably be best to filter the + // parentEvaluationEffects that already in the previous run effects (since + // assignments may share the same parents), otherwise, this can make the + // conflict() calls more and more expensive. + schedule.back().memoryEffects.append(parentEvaluationEffects.begin(), + parentEvaluationEffects.end()); + schedule.back().memoryEffects.append(assignEffects.begin(), + assignEffects.end()); + assignEffects.clear(); + parentEvaluationEffects.clear(); + independentEvaluationEffects.clear(); + savedAnyRegionForCurrentAssignment = false; + LLVM_DEBUG(logAssignmentEvaluation(llvm::dbgs(), schedule.size(), assign)); +} + +//===----------------------------------------------------------------------===// +// Scheduling Implementation : driving the Scheduler in the assignment tree. +//===----------------------------------------------------------------------===// + +/// Gather the hlfir.region_assign nested directly and indirectly inside root in +/// execution order. +static void +gatherAssignments(hlfir::OrderedAssignmentTreeOpInterface root, + llvm::SmallVector &assignments) { + llvm::SmallVector nodeStack{root.getOperation()}; + while (!nodeStack.empty()) { + mlir::Operation *node = nodeStack.pop_back_val(); + if (auto regionAssign = mlir::dyn_cast(node)) { + assignments.push_back(regionAssign); + continue; + } + auto nodeIface = + mlir::dyn_cast(node); + if (nodeIface) + if (mlir::Block *block = nodeIface.getSubTreeBlock()) + for (mlir::Operation &op : llvm::reverse(block->getOperations())) + nodeStack.push_back(&op); + } +} + +/// Gather the parents of (not included) \p node in reverse execution order. +static void gatherParents( + hlfir::OrderedAssignmentTreeOpInterface node, + llvm::SmallVectorImpl &parents) { + while (node) { + auto parent = + mlir::dyn_cast_or_null( + node->getParentOp()); + if (parent && parent.getSubTreeRegion() == node->getParentRegion()) { + parents.push_back(parent); + node = parent; + } else { + break; + } + } +} + +// Build the list of the parent nodes for this assignment. The list is built +// from the closest parent until the ordered assignment tree root (this is the +// revere of their execution order). +static void gatherAssignmentParents( + hlfir::RegionAssignOp assign, + llvm::SmallVectorImpl &parents) { + gatherParents(mlir::cast( + assign.getOperation()), + parents); +} + +hlfir::Schedule +hlfir::buildEvaluationSchedule(hlfir::OrderedAssignmentTreeOpInterface root, + bool tryFusingAssignments) { + LLVM_DEBUG(logStartScheduling(llvm::dbgs(), root);); + // The expressions inside an hlfir.forall must be pure (with the Fortran + // definition of pure). This is not a commitment that there are no operation + // with write effect in the regions: entities local to the region may still + // be written to (e.g., a temporary accumulator implementing SUM). This is + // a commitment that no write effect will affect the scheduling problem, and + // that all write effect caught by MLIR analysis can be ignored for the + // current problem. + const bool leafRegionsMayOnlyRead = + mlir::isa(root.getOperation()); + + // Loop through the assignments and schedule them. + Scheduler scheduler(tryFusingAssignments); + llvm::SmallVector assignments; + gatherAssignments(root, assignments); + for (hlfir::RegionAssignOp assign : assignments) { + scheduler.startSchedulingAssignment(assign, leafRegionsMayOnlyRead); + // Go through the list of parents (not including the current + // hlfir.region_assign) in Fortran execution order so that any parent leaf + // region that must be saved is saved in order. + llvm::SmallVector parents; + gatherAssignmentParents(assign, parents); + for (hlfir::OrderedAssignmentTreeOpInterface parent : + llvm::reverse(parents)) { + scheduler.startIndependentEvaluationGroup(); + llvm::SmallVector yieldRegions; + parent.getLeafRegions(yieldRegions); + for (mlir::Region *yieldRegion : yieldRegions) + scheduler.saveEvaluationIfConflict(*yieldRegion, + leafRegionsMayOnlyRead); + scheduler.finishIndependentEvaluationGroup(); + } + // Look for conflicts between the RHS/LHS evaluation and the assignments. + // The LHS yield has no implicit read effect on the produced variable (the + // variable is not read before the assignment). + scheduler.startIndependentEvaluationGroup(); + scheduler.saveEvaluationIfConflict(assign.getRhsRegion(), + leafRegionsMayOnlyRead); + scheduler.saveEvaluationIfConflict(assign.getLhsRegion(), + leafRegionsMayOnlyRead, + /*yieldIsImplicitRead=*/false); + scheduler.finishIndependentEvaluationGroup(); + scheduler.finishSchedulingAssignment(assign); + } + return scheduler.moveSchedule(); +} + +mlir::Value hlfir::SaveEntity::getSavedValue() { + mlir::Value saved = getYieldedEntity(*yieldRegion); + assert(saved && "SaveEntity must contain region terminated by YieldOp"); + return saved; +} + +//===----------------------------------------------------------------------===// +// Debug and test logging implementation +//===----------------------------------------------------------------------===// + +static llvm::raw_ostream &printRegionId(llvm::raw_ostream &os, + mlir::Region &yieldRegion) { + mlir::Operation *parent = yieldRegion.getParentOp(); + if (auto forall = mlir::dyn_cast(parent)) { + if (&forall.getLbRegion() == &yieldRegion) + os << "lb"; + else if (&forall.getUbRegion() == &yieldRegion) + os << "ub"; + else if (&forall.getStepRegion() == &yieldRegion) + os << "step"; + } else if (auto assign = mlir::dyn_cast(parent)) { + if (&assign.getMaskRegion() == &yieldRegion) + os << "mask"; + } else if (auto assign = mlir::dyn_cast(parent)) { + if (&assign.getRhsRegion() == &yieldRegion) + os << "rhs"; + else if (&assign.getLhsRegion() == &yieldRegion) + os << "lhs"; + } else { + os << "unknown"; + } + return os; +} + +static llvm::raw_ostream & +printNodeIndexInBody(llvm::raw_ostream &os, + hlfir::OrderedAssignmentTreeOpInterface node, + hlfir::OrderedAssignmentTreeOpInterface parent) { + if (!parent || !parent.getSubTreeRegion()) + return os; + mlir::Operation *nodeOp = node.getOperation(); + unsigned index = 1; + for (mlir::Operation &op : parent.getSubTreeRegion()->getOps()) + if (nodeOp == &op) { + return os << index; + } else if (nodeOp->getName() == op.getName()) { + ++index; + } + return os; +} + +static llvm::raw_ostream &printNodePath(llvm::raw_ostream &os, + mlir::Operation *op) { + auto node = + mlir::dyn_cast_or_null(op); + if (!node) { + os << "unknown node"; + return os; + } + llvm::SmallVector parents; + gatherParents(node, parents); + hlfir::OrderedAssignmentTreeOpInterface previousParent; + for (auto parent : llvm::reverse(parents)) { + os << parent->getName().stripDialect(); + printNodeIndexInBody(os, parent, previousParent) << "/"; + previousParent = parent; + } + os << node->getName().stripDialect(); + return printNodeIndexInBody(os, node, previousParent); +} + +static llvm::raw_ostream &printRegionPath(llvm::raw_ostream &os, + mlir::Region &yieldRegion) { + printNodePath(os, yieldRegion.getParentOp()) << "/"; + return printRegionId(os, yieldRegion); +} + +static void LLVM_ATTRIBUTE_UNUSED logSaveEvaluation(llvm::raw_ostream &os, + unsigned runid, + mlir::Region &yieldRegion, + bool anyWrite) { + os << "run " << runid << " save " << (anyWrite ? "(w)" : " ") << ": "; + printRegionPath(os, yieldRegion) << "\n"; +} + +static void LLVM_ATTRIBUTE_UNUSED logAssignmentEvaluation( + llvm::raw_ostream &os, unsigned runid, hlfir::RegionAssignOp assign) { + os << "run " << runid << " evaluate: "; + printNodePath(os, assign.getOperation()) << "\n"; +} + +static void LLVM_ATTRIBUTE_UNUSED logConflict(llvm::raw_ostream &os, + mlir::Value writtenOrReadVarA, + mlir::Value writtenVarB) { + auto printIfValue = [&](mlir::Value var) -> llvm::raw_ostream & { + if (!var) + return os << ""; + return os << var; + }; + os << "conflict: R/W: "; + printIfValue(writtenOrReadVarA) << " W:"; + printIfValue(writtenVarB) << "\n"; +} + +static void LLVM_ATTRIBUTE_UNUSED logStartScheduling( + llvm::raw_ostream &os, hlfir::OrderedAssignmentTreeOpInterface root) { + os << "------------ scheduling "; + printNodePath(os, root.getOperation()); + if (auto funcOp = root->getParentOfType()) + os << " in " << funcOp.getSymName() << " "; + os << "------------\n"; +} + +static void LLVM_ATTRIBUTE_UNUSED logIfUnkownEffectValue( + llvm::raw_ostream &os, mlir::MemoryEffects::EffectInstance effect, + mlir::Operation &op) { + if (effect.getValue() != nullptr) + return; + os << "unknown effected value ("; + os << (mlir::isa(effect.getEffect()) ? "R" : "W"); + os << "): " << op << "\n"; +} diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.h b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.h new file mode 100644 index 0000000000000..2ed242edc973a --- /dev/null +++ b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.h @@ -0,0 +1,96 @@ +//===- ScheduleOrderedAssignments.h --- Assignment scheduling ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file defines a utility to analyze and schedule the evaluation of +// of hlfir::OrderedAssignmentTreeOpInterface trees that represent Fortran +// Forall, Where, user defined assignments and assignments to vector +// subscripted entities. +//===----------------------------------------------------------------------===// + +#ifndef OPTIMIZER_HLFIR_TRANSFORM_SCHEDULEORDEREDASSIGNMENTS_H +#define OPTIMIZER_HLFIR_TRANSFORM_SCHEDULEORDEREDASSIGNMENTS_H + +#include "flang/Optimizer/HLFIR/HLFIROps.h" + +namespace hlfir { + +/// Structure to represent that the value yielded by some region +/// must be fully evaluated and saved for all index values at +/// a given point of the ordered assignment tree evaluation. +/// All subsequent evaluation depending on the value yielded +/// by this region will use the value that was saved. +struct SaveEntity { + mlir::Region *yieldRegion; + /// Returns the hlfir.yield op argument. + mlir::Value getSavedValue(); +}; + +/// A run is a list of actions required to evaluate an ordered assignment tree +/// that can be done in the same loop nest. +/// The actions can evaluate and saves element values into temporary or evaluate +/// assignments. +/// The evaluation of an action in a run will cause the evaluation of all the +/// regions that yield entities required to implement the action, except if the +/// region was saved in a previous run, in which case it will use the previously +/// saved value. +struct Run { + /// An action is either saving the values yielded by a region, or evaluating + /// the assignment part of an hlfir::RegionAssignOp. + using Action = std::variant; + llvm::SmallVector actions; + llvm::SmallVector memoryEffects; +}; + +/// List of runs to be executed in order to evaluate an order assignment tree. +using Schedule = llvm::SmallVector; + +/// Example of schedules and run, and what they mean: +/// Fortran: forall (i=i:10) x(i) = y(i) +/// +/// hlfir.forall lb { hlfir.yield %c1} ub { hlfir.yield %c10} do { +/// ^bb1(%i: index) +/// hlfir.region_assign { +/// %yi_addr = hlfir.designate %y(%i) +/// %yi = fir.load %yi_addr +/// hlfir.yield %yi +/// } to { +/// %xi = hlfir.designate %x(%i) +/// hlfir.yield %xi +/// } +/// } +/// +/// If the scheduling analysis cannot prove that %x and %y do not overlap, it +/// will generate 2 runs for the schdule. The first containing +/// SaveEntity{rhs_region}, and the second one containing the +/// hlfir.region_assign. +/// +/// The lowering of that schedule will have to: +/// For the first run: +/// 1. create a temporary to contain all the %yi for all %i +/// 2. create a loop nest for the forall, evaluate the %yi and save them +/// inside the loop, but do not evaluate the LHS or assignment. +/// For the second run: +/// 3. create a loop nest again for the forall, evaluate the LHS, get the +/// saved %yi, and evaluate %yi to %xi. After all runs: +/// 4. clean the temporary for the %yi. +/// +/// If the scheduling analysis can prove %x and %y do not overlap, it will +/// generate only one run with the hlfir.region_assign, which will be +/// implemented as a single loop that evaluate %xi, %yi and does %xi = %yi in +/// the loop body. + +/// Core function that analyzes an ordered assignment tree and builds a +/// schedule for its evaluation. +/// The main goal of the scheduler is to avoid creating temporary storage +/// (required for SaveEntity). But it can optionally be asked to fuse Forall +/// and Where assignments in the same loop nests when possible since it has the +/// memory effects analysis at hand. +Schedule buildEvaluationSchedule(hlfir::OrderedAssignmentTreeOpInterface root, + bool tryFusingAssignments); + +} // namespace hlfir +#endif // OPTIMIZER_HLFIR_TRANSFORM_SCHEDULEORDERASSIGNMENTS_H diff --git a/flang/test/HLFIR/order_assignments/forall-fusing-scheduling.f90 b/flang/test/HLFIR/order_assignments/forall-fusing-scheduling.f90 new file mode 100644 index 0000000000000..2ebbebd235521 --- /dev/null +++ b/flang/test/HLFIR/order_assignments/forall-fusing-scheduling.f90 @@ -0,0 +1,103 @@ +! Test optional fusing of forall assignments in the scheduling analysis +! from lower-hlfir-ordered-assignments pass. Assignments are fused in the +! same loop nest if they are given the same run id. + +! RUN: bbc -hlfir -o - -pass-pipeline="builtin.module(lower-hlfir-ordered-assignments{fuse-assignments=false})" --debug-only=flang-ordered-assignment -flang-dbg-order-assignment-schedule-only %s 2>&1 | FileCheck %s --check-prefix NOFUSE + +! RUN: bbc -hlfir -o - -pass-pipeline="builtin.module(lower-hlfir-ordered-assignments{fuse-assignments=true})" --debug-only=flang-ordered-assignment -flang-dbg-order-assignment-schedule-only %s 2>&1 | FileCheck %s --check-prefix FUSE + +! REQUIRES: asserts + +subroutine fusable_assign_easy(x, y, z) + integer :: x(:), y(:), z(:) + forall(i=1:10) + x(i) = 42 + z(i) = 42 + end forall +end subroutine +!NOFUSE-LABEL: ------------ scheduling forall in _QPfusable_assign_easy ------------ +!NOFUSE-NEXT: run 1 evaluate: forall/region_assign1 +!NOFUSE-NEXT: run 2 evaluate: forall/region_assign2 + +!FUSE-LABEL: ------------ scheduling forall in _QPfusable_assign_easy ------------ +!FUSE-NEXT: run 1 evaluate: forall/region_assign1 +!FUSE-NEXT: run 1 evaluate: forall/region_assign2 + +subroutine fusable_assign(x, y, z) + integer :: x(:), y(:), z(:) + forall(i=1:10) + x(i) = y(i) + z(i) = y(11-i) + end forall +end subroutine +!NOFUSE-LABEL: ------------ scheduling forall in _QPfusable_assign ------------ +!NOFUSE-NEXT: run 1 evaluate: forall/region_assign1 +!NOFUSE-NEXT: run 2 evaluate: forall/region_assign2 + +!FUSE-LABEL: ------------ scheduling forall in _QPfusable_assign ------------ +!FUSE-NEXT: run 1 evaluate: forall/region_assign1 +!FUSE-NEXT: run 1 evaluate: forall/region_assign2 + +subroutine unfusable_assign_1(x, y, z) + integer :: x(:), y(:), z(:) + forall(i=1:10) + x(i) = y(i) + z(i) = x(11-i) + end forall +end subroutine +!NOFUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_1 ------------ +!NOFUSE-NEXT: run 1 evaluate: forall/region_assign1 +!NOFUSE-NEXT: run 2 evaluate: forall/region_assign2 + +!FUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_1 ------------ +!FUSE-NEXT: run 1 evaluate: forall/region_assign1 +!FUSE-NEXT: conflict: R/W: of type '!fir.box>' at index: 0 W: of type '!fir.box>' at index: 0 +!FUSE-NEXT: run 2 evaluate: forall/region_assign2 + +subroutine unfusable_assign_2(x, y) + integer :: x(:), y(:) + forall(i=1:10) + x(i) = y(i) + x(i+1) = y(i+1) + end forall +end subroutine +!NOFUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_2 ------------ +!NOFUSE-NEXT: run 1 evaluate: forall/region_assign1 +!NOFUSE-NEXT: run 2 evaluate: forall/region_assign2 + +!FUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_2 ------------ +!FUSE-NEXT: run 1 evaluate: forall/region_assign1 +!FUSE-NEXT: conflict: R/W: of type '!fir.box>' at index: 0 W: of type '!fir.box>' at index: 0 +!FUSE-NEXT: run 2 evaluate: forall/region_assign2 + +subroutine unfusable_assign_3(x, y, z) + integer :: x(:, :), y(:, :), z(:, :) + forall(i=1:10) + forall(j=1:z(i, i)) x(i, j) = y(i, j) + z(i, :) = y(i, :) + end forall +end subroutine +!NOFUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_3 ------------ +!NOFUSE-NEXT: run 1 evaluate: forall/forall1/region_assign1 +!NOFUSE-NEXT: run 2 evaluate: forall/region_assign1 + +!FUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_3 ------------ +!FUSE-NEXT: run 1 evaluate: forall/forall1/region_assign1 +!FUSE-NEXT: conflict: R/W: of type '!fir.box>' at index: 2 W: of type '!fir.box>' at index: 2 +!FUSE-NEXT: run 2 evaluate: forall/region_assign1 + +subroutine unfusable_assign_4(x, y, z) + integer :: x(:, :), y(:, :), z(:, :) + forall(i=1:10) + x(i, :) = y(i, :) + forall(j=1:x(i, i)) z(i, j) = y(i, j) + end forall +end subroutine +!NOFUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_4 ------------ +!NOFUSE-NEXT: run 1 evaluate: forall/region_assign1 +!NOFUSE-NEXT: run 2 evaluate: forall/forall1/region_assign1 + +!FUSE-LABEL: ------------ scheduling forall in _QPunfusable_assign_4 ------------ +!FUSE-NEXT: run 1 evaluate: forall/region_assign1 +!FUSE-NEXT: conflict: R/W: of type '!fir.box>' at index: 0 W: of type '!fir.box>' at index: 0 +!FUSE-NEXT: run 2 evaluate: forall/forall1/region_assign1 diff --git a/flang/test/HLFIR/order_assignments/forall-scheduling.f90 b/flang/test/HLFIR/order_assignments/forall-scheduling.f90 new file mode 100644 index 0000000000000..f5cb53bb8284f --- /dev/null +++ b/flang/test/HLFIR/order_assignments/forall-scheduling.f90 @@ -0,0 +1,168 @@ +! Test forall scheduling analysis from lower-hlfir-ordered-assignments pass. +! The printed output is done via LLVM_DEBUG, hence the "asserts" requirement. +! This test test that conflicting actions are not scheduled to be evaluated +! in the same loops (same run id). + +! RUN: bbc -hlfir -o - -pass-pipeline="builtin.module(lower-hlfir-ordered-assignments)" --debug-only=flang-ordered-assignment -flang-dbg-order-assignment-schedule-only %s 2>&1 | FileCheck %s +! REQUIRES: asserts + +subroutine no_conflict(x) + real :: x(:) + forall(i=1:10) x(i) = i +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QPno_conflict ------------ +!CHECK-NEXT: run 1 evaluate: forall/region_assign1 + +subroutine rhs_lhs_overlap(x) + real :: x(:) + forall(i=1:10) x(i) = x(11-i) +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QPrhs_lhs_overlap ------------ +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 0 W: of type '!fir.box>' at index: 0 +!CHECK-NEXT: run 1 save : forall/region_assign1/rhs +!CHECK-NEXT: run 2 evaluate: forall/region_assign1 + +subroutine no_rhs_lhs_overlap(x, y) + real :: x(:), y(:) + forall(i=1:10) x(i) = y(i) +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QPno_rhs_lhs_overlap ------------ +!CHECK-NEXT: run 1 evaluate: forall/region_assign1 + +subroutine no_rhs_lhs_overlap_2(x) + real :: x(:), y(10) + forall(i=1:10) x(i) = y(i) +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QPno_rhs_lhs_overlap_2 ------------ +!CHECK-NEXT: run 1 evaluate: forall/region_assign1 + +subroutine no_rhs_lhs_overlap_3() + real :: x(10), y(10) + forall(i=1:10) x(i) = y(i) +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QPno_rhs_lhs_overlap_3 ------------ +!CHECK-NEXT: run 1 evaluate: forall/region_assign1 + +subroutine array_expr_rhs_lhs_overlap(x) + real :: x(:, :) + forall(i=1:10) x(i, :) = x(:, i)*2 +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QParray_expr_rhs_lhs_overlap ------------ +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 0 W: of type '!fir.box>' at index: 0 +!CHECK-NEXT: run 1 save : forall/region_assign1/rhs +!CHECK-NEXT: run 2 evaluate: forall/region_assign1 + +subroutine array_expr_no_rhs_lhs_overlap(x, y, z) + real :: x(:, :), y(:, :), z(:, :) + forall(i=1:10) x(i, :) = y(:, i) + z(i, :) +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QParray_expr_no_rhs_lhs_overlap ------------ +!CHECK-NEXT: run 1 evaluate: forall/region_assign1 + +subroutine rhs_lhs_overlap_2(x, y) + real, target :: x(:), y(:) + forall(i=1:10) x(i) = y(i) +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QPrhs_lhs_overlap_2 ------------ +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 1 W: of type '!fir.box>' at index: 0 +!CHECK-NEXT: run 1 save : forall/region_assign1/rhs +!CHECK-NEXT: run 2 evaluate: forall/region_assign1 + +subroutine lhs_lhs_overlap(x) + integer :: x(10) + forall(i=1:10) x(x(i)) = i +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QPlhs_lhs_overlap ------------ +!CHECK-NEXT: conflict: R/W: of type '!fir.ref>' at index: 0 W: of type '!fir.ref>' at index: 0 +!CHECK-NEXT: run 1 save : forall/region_assign1/lhs +!CHECK-NEXT: run 2 evaluate: forall/region_assign1 + +subroutine unknown_function_call(x) + interface + pure real function foo(x, i) + integer, intent(in) :: i + real, intent(in) :: x(10) + end function + end interface + real :: x(10) + forall(i=1:10) x(i) = foo(x, i) +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QPunknown_function_call ------------ +!CHECK-NEXT: unknown effect: {{.*}} fir.call @_QPfoo +!CHECK-NEXT: conflict: R/W: W: of type '!fir.ref>' at index: 0 +!CHECK-NEXT: run 1 save : forall/region_assign1/rhs +!CHECK-NEXT: run 2 evaluate: forall/region_assign1 + +subroutine unknown_function_call2(x) + interface + pure real function foo2(i) + integer, value :: i + end function + end interface + ! foo2 may read x since it is a target, even if it is pure, + ! if the actual argument of x is a module variable accessible + ! to foo via host association. + real, target :: x(:) + forall(i=1:10) x(i) = foo2(i) +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QPunknown_function_call2 ------------ +!CHECK-NEXT: unknown effect: {{.*}} fir.call @_QPfoo2( +!CHECK-NEXT: conflict: R/W: W: of type '!fir.box>' at index: 0 +!CHECK-NEXT: run 1 save : forall/region_assign1/rhs +!CHECK-NEXT: run 2 evaluate: forall/region_assign1 + +subroutine forall_mask_conflict(x) + integer :: x(:) + forall(i=1:10, x(11-i)>0) x(i) = 42 +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QPforall_mask_conflict ------------ +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 0 W: of type '!fir.box>' at index: 0 +!CHECK-NEXT: run 1 save : forall/forall_mask1/mask +!CHECK-NEXT: run 2 evaluate: forall/forall_mask1/region_assign1 + +subroutine forall_ub_conflict(x, y) + integer :: x(:, :) + forall(i=1:10) + forall(j=1:x(i,i)) + x(i, j) = 42 + end forall + end forall +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QPforall_ub_conflict ------------ +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 0 W: of type '!fir.box>' at index: 0 +!CHECK-NEXT: run 1 save : forall/forall1/ub +!CHECK-NEXT: run 2 evaluate: forall/forall1/region_assign1 + +subroutine sequential_assign(x, y) + integer :: x(:), y(:) + forall(i=1:10) + x(i) = y(i) + y(2*i) = x(i) + end forall +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QPsequential_assign ------------ +!CHECK-NEXT: run 1 evaluate: forall/region_assign1 +!CHECK-NEXT: run 2 evaluate: forall/region_assign2 + +subroutine loads_of_conlficts(x, y) + integer, target :: x(:, :), y(:, :) + forall(i=1:10) + forall (j=1:y(i,i)) x(x(i, j), j) = y(i, j) + forall (j=1:x(i,i), y(i,i)>0) y(x(i, j), j) = 0 + end forall +end subroutine +!CHECK-LABEL: ------------ scheduling forall in _QPloads_of_conlficts ------------ +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 1 W: of type '!fir.box>' at index: 0 +!CHECK-NEXT: run 1 save : forall/forall1/ub +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 1 W: of type '!fir.box>' at index: 0 +!CHECK-NEXT: run 1 save : forall/forall1/region_assign1/rhs +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 0 W: of type '!fir.box>' at index: 0 +!CHECK-NEXT: run 1 save : forall/forall1/region_assign1/lhs +!CHECK-NEXT: run 2 evaluate: forall/forall1/region_assign1 +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 0 W: of type '!fir.box>' at index: 1 +!CHECK-NEXT: run 3 save : forall/forall2/ub +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 1 W: of type '!fir.box>' at index: 1 +!CHECK-NEXT: run 3 save : forall/forall2/forall_mask1/mask +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 0 W: of type '!fir.box>' at index: 1 +!CHECK-NEXT: run 3 save : forall/forall2/forall_mask1/region_assign1/lhs +!CHECK-NEXT: run 4 evaluate: forall/forall2/forall_mask1/region_assign1 diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index dcb37043e6eb7..fcba73da2efe7 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -300,6 +300,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR( (void)mlir::applyPassManagerCLOptions(pm); if (passPipeline.hasAnyOccurrences()) { // run the command-line specified pipeline + hlfir::registerHLFIRPasses(); (void)passPipeline.addToPipeline(pm, [&](const llvm::Twine &msg) { mlir::emitError(mlir::UnknownLoc::get(&ctx)) << msg; return mlir::failure();