diff --git a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h index 4a96b11d44804..88bf4af382724 100644 --- a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h +++ b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h @@ -93,5 +93,52 @@ class HomogeneousScalarStack { /// Temporary storage. mlir::Value temp; }; + +/// Structure to hold the value of a single entity. +class SimpleCopy { +public: + SimpleCopy(mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::Entity source, llvm::StringRef tempName); + + void pushValue(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value value) { + assert(false && "must not be called: value already set"); + } + void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder){}; + mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder) { + return copy.getBase(); + } + void destroy(mlir::Location loc, fir::FirOpBuilder &builder); + +public: + /// Temporary storage for the copy. + hlfir::AssociateOp copy; +}; + +/// Generic wrapper over the different sorts of temporary storages. +class TemporaryStorage { +public: + template + TemporaryStorage(T &&impl) : impl{std::forward(impl)} {} + + void pushValue(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value value) { + std::visit([&](auto &temp) { temp.pushValue(loc, builder, value); }, impl); + } + void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder) { + std::visit([&](auto &temp) { temp.resetFetchPosition(loc, builder); }, + impl); + } + mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder) { + return std::visit([&](auto &temp) { return temp.fetch(loc, builder); }, + impl); + } + void destroy(mlir::Location loc, fir::FirOpBuilder &builder) { + std::visit([&](auto &temp) { temp.destroy(loc, builder); }, impl); + } + +private: + std::variant impl; +}; } // namespace fir::factory #endif // FORTRAN_OPTIMIZER_BUILDER_TEMPORARYSTORAGE_H diff --git a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp index d707d623bc9c8..b4e01556af086 100644 --- a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp +++ b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp @@ -10,8 +10,8 @@ //===----------------------------------------------------------------------===// #include "flang/Optimizer/Builder/TemporaryStorage.h" +#include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/HLFIRTools.h" -#include "flang/Optimizer/Builder/Runtime/RTBuilder.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" @@ -133,3 +133,24 @@ hlfir::Entity fir::factory::HomogeneousScalarStack::moveStackAsArrayExpr( auto hlfirExpr = builder.create(loc, temp, mustFree); return hlfir::Entity{hlfirExpr}; } + +//===----------------------------------------------------------------------===// +// fir::factory::SimpleCopy implementation. +//===----------------------------------------------------------------------===// + +fir::factory::SimpleCopy::SimpleCopy(mlir::Location loc, + fir::FirOpBuilder &builder, + hlfir::Entity source, + llvm::StringRef tempName) { + // Use hlfir.as_expr and hlfir.associate to create a copy and leave + // bufferization deals with how best to make the copy. + if (source.isVariable()) + source = hlfir::Entity{builder.create(loc, source)}; + copy = hlfir::genAssociateExpr(loc, builder, source, + source.getFortranElementType(), tempName); +} + +void fir::factory::SimpleCopy::destroy(mlir::Location loc, + fir::FirOpBuilder &builder) { + builder.create(loc, copy); +} diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp index 0317f83063f5e..1ec3aca640cb4 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp @@ -20,9 +20,11 @@ #include "ScheduleOrderedAssignments.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/HLFIRTools.h" +#include "flang/Optimizer/Builder/TemporaryStorage.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "mlir/IR/Dominance.h" #include "mlir/IR/IRMapping.h" #include "mlir/Transforms/DialectConversion.h" #include "llvm/ADT/SmallSet.h" @@ -106,8 +108,20 @@ class OrderedAssignmentRewriter { currentRun = nullptr; assert(constructStack.empty() && "must exit constructs after a run"); mapper.clear(); + savedInCurrentRunBeforeUse.clear(); } + /// After all run have been lowered, clean-up all the temporary + /// storage that were created (do not call final routines). + void cleanupSavedEntities() { + for (auto &temp : savedEntities) + temp.second.destroy(root.getLoc(), builder); + } + + /// Lowered value for an expression, and the original hlfir.yield if any + /// clean-up needs to be cloned after usage. + using ValueAndCleanUp = std::pair>; + private: /// Walk the part of an order assignment tree node that needs /// to be evaluated in the current run. @@ -126,11 +140,16 @@ class OrderedAssignmentRewriter { void post(hlfir::ForallMaskOp); void post(hlfir::WhereOp); void post(hlfir::ElseWhereOp); + /// Enter (and maybe create) the fir.if else block of an ElseWhereOp, + /// but do not generate the elswhere mask or the new fir.if. + void enterElsewhere(hlfir::ElseWhereOp); /// Is this an assignment to a vector subscripted entity? static bool hasVectorSubscriptedLhs(hlfir::RegionAssignOp regionAssignOp); /// Are they any leaf region in node that must be saved in the current run? - bool mustSavedRegionIn(hlfir::OrderedAssignmentTreeOpInterface node) const; + bool mustSaveRegionIn( + hlfir::OrderedAssignmentTreeOpInterface node, + llvm::SmallVectorImpl &saveEntities) const; /// Should this node be evaluated in the current run? Saving a region in a /// node does not imply the node needs to be evaluated. bool @@ -154,7 +173,7 @@ class OrderedAssignmentRewriter { /// should be done after using the entity. Like, generateYieldedScalarValue, /// this will return the saved value if the region was saved in a previous /// run. - std::pair> + ValueAndCleanUp generateYieldedEntity(mlir::Region ®ion, std::optional castToType = std::nullopt); @@ -173,8 +192,43 @@ class OrderedAssignmentRewriter { mlir::Value generateMaskedEntity(MaskedArrayExpr &maskedExpr); /// Create a fir.if at the current position inside the where loop nest - /// given a mask expression. - void generateMaskIfOp(MaskedArrayExpr &mask); + /// given the element value of a mask. + void generateMaskIfOp(mlir::Value cdt); + + /// Save a value for subsequent runs. + void generateSaveEntity(hlfir::SaveEntity savedEntity, + bool willUseSavedEntityInSameRun); + + /// Get a value if it was saved in this run or a previous run. Returns + /// nullopt if it has not been saved. + std::optional getIfSaved(mlir::Region ®ion); + + /// Generate code before the loop nest for the current run, if any. + void doBeforeLoopNest(const std::function &callback) { + if (constructStack.empty()) { + callback(); + return; + } + auto insertionPoint = builder.saveInsertionPoint(); + builder.setInsertionPoint(constructStack[0]); + callback(); + builder.restoreInsertionPoint(insertionPoint); + } + + /// Can the current loop nest iteration number be computed? For simplicity, + /// this is true if an only if all the bounds and steps of the fir.do_loop + /// nest dominates the outer loop. The argument is filled with the current + /// loop nest on success. + bool currentLoopNestIterationNumberCanBeComputed( + llvm::SmallVectorImpl &loopNest); + + template + fir::factory::TemporaryStorage *insertSavedEntity(mlir::Region ®ion, + T &&temp) { + auto inserted = savedEntities.try_emplace(®ion, std::forward(temp)); + assert(inserted.second && "temp must have been emplaced"); + return &inserted.first->second; + } fir::FirOpBuilder &builder; @@ -182,6 +236,10 @@ class OrderedAssignmentRewriter { /// operations and the operations that have been cloned in the current run. /// It is reset between two runs. mlir::IRMapping mapper; + /// Dominance info is used to determine if inner loop bounds are all computed + /// before outer loop for the current loop. It does not need to be reset + /// between runs. + mlir::DominanceInfo dominanceInfo; /// Construct stack in the current run. This allows setting back the insertion /// point correctly when leaving a node that requires a fir.do_loop or fir.if /// operation. @@ -189,20 +247,50 @@ class OrderedAssignmentRewriter { /// Current where loop nest, if any. std::optional whereLoopNest; + /// Map of temporary storage to keep track of saved entity once the run + /// that saves them has been lowered. It is kept in-between runs. + llvm::DenseMap savedEntities; + /// Map holding the value that were saved in the current run and that also + /// need to be used (because their construct will be visited). It is reset + /// after each run. It avoids having to store and fetch in the temporary + /// during the same run, which would required the temporary to have different + /// fetching and storing counters. + llvm::DenseMap savedInCurrentRunBeforeUse; + /// Root of the order assignment tree being lowered. hlfir::OrderedAssignmentTreeOpInterface root; /// Pointer to the current run of the schedule being lowered. hlfir::Run *currentRun = nullptr; + + /// When allocating temporary storage inlined, indicate if the storage should + /// be heap or stack allocated. Temporary allocated with the runtime are heap + /// allocated by the runtime. + bool allocateOnHeap = true; }; } // namespace void OrderedAssignmentRewriter::walk( hlfir::OrderedAssignmentTreeOpInterface node) { - if (mustSavedRegionIn(node)) - TODO(node.getLoc(), - "creating temporary storage in FORALL or WHERE constructs"); - if (isRequiredInCurrentRun(node) || mlir::isa(node)) { - llvm::TypeSwitch(node.getOperation()) + bool mustVisit = + isRequiredInCurrentRun(node) || mlir::isa(node); + llvm::SmallVector saveEntities; + mlir::Operation *nodeOp = node.getOperation(); + if (mustSaveRegionIn(node, saveEntities)) { + mlir::IRRewriter::InsertPoint insertionPoint; + if (auto elseWhereOp = mlir::dyn_cast(nodeOp)) { + // ElseWhere mask to save must be evaluated inside the fir.if else + // for the previous where/elsewehere (its evaluation must be + // masked by the "pending control mask"). + insertionPoint = builder.saveInsertionPoint(); + enterElsewhere(elseWhereOp); + } + for (hlfir::SaveEntity saveEntity : saveEntities) + generateSaveEntity(saveEntity, mustVisit); + if (insertionPoint.isSet()) + builder.restoreInsertionPoint(insertionPoint); + } + if (mustVisit) { + llvm::TypeSwitch(nodeOp) .Case( [&](auto concreteOp) { pre(concreteOp); }) @@ -212,7 +300,7 @@ void OrderedAssignmentRewriter::walk( if (auto subNode = mlir::dyn_cast(op)) walk(subNode); - llvm::TypeSwitch(node.getOperation()) + llvm::TypeSwitch(nodeOp) .Case([&](auto concreteOp) { post(concreteOp); }) .Default([](auto) {}); @@ -292,14 +380,11 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) { generateCleanupIfAny(oldLhsYield); } -void OrderedAssignmentRewriter::generateMaskIfOp(MaskedArrayExpr &mask) { - assert(whereLoopNest.has_value() && "must be inside a WHERE"); - mlir::Location loc = mask.loc; - hlfir::Entity maskVal{generateMaskedEntity(mask)}; - maskVal = hlfir::loadTrivialScalar(loc, builder, maskVal); - mlir::Value cdt = builder.createConvert(loc, builder.getI1Type(), maskVal); - // Else region is added when visiting nested hlfir.elseWhereOp, if any. - auto ifOp = builder.create(loc, std::nullopt, cdt, +void OrderedAssignmentRewriter::generateMaskIfOp(mlir::Value cdt) { + mlir::Location loc = cdt.getLoc(); + cdt = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{cdt}); + cdt = builder.createConvert(loc, builder.getI1Type(), cdt); + auto ifOp = builder.create(cdt.getLoc(), std::nullopt, cdt, /*withElseRegion=*/false); constructStack.push_back(ifOp.getOperation()); builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); @@ -307,18 +392,46 @@ void OrderedAssignmentRewriter::generateMaskIfOp(MaskedArrayExpr &mask) { void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) { mlir::Location loc = whereOp.getLoc(); - MaskedArrayExpr mask(loc, whereOp.getMaskRegion()); if (!whereLoopNest) { - // Start a loop nest iterating on the shape of the where mask. + // This is the top-level WHERE. Start a loop nest iterating on the shape of + // the where mask. + if (auto maybeSaved = getIfSaved(whereOp.getMaskRegion())) { + // Use the saved value to get the shape and condition element. + hlfir::Entity savedMask{maybeSaved->first}; + mlir::Value shape = hlfir::genShape(loc, builder, savedMask); + whereLoopNest = hlfir::genLoopNest(loc, builder, shape); + constructStack.push_back(whereLoopNest->outerLoop.getOperation()); + builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody()); + mlir::Value cdt = hlfir::getElementAt(loc, builder, savedMask, + whereLoopNest->oneBasedIndices); + generateMaskIfOp(cdt); + if (maybeSaved->second) { + // If this is the same run as the one that saved the value, the clean-up + // was left-over to be done now. + auto insertionPoint = builder.saveInsertionPoint(); + builder.setInsertionPointAfter(whereLoopNest->outerLoop); + generateCleanupIfAny(maybeSaved->second); + builder.restoreInsertionPoint(insertionPoint); + } + return; + } + // The mask was not evaluated yet or can be safely re-evaluated. + MaskedArrayExpr mask(loc, whereOp.getMaskRegion()); mask.generateNoneElementalPart(builder, mapper); mlir::Value shape = mask.generateShape(builder, mapper); whereLoopNest = hlfir::genLoopNest(loc, builder, shape); constructStack.push_back(whereLoopNest->outerLoop.getOperation()); builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody()); + mlir::Value cdt = generateMaskedEntity(mask); + generateMaskIfOp(cdt); + return; } + // Where Loops have been already created by a parent WHERE. // Generate a fir.if with the value of the current element of the mask - // inside the loops. - generateMaskIfOp(mask); + // inside the loops. The case where the mask was saved is handled in the + // generateYieldedScalarValue call. + mlir::Value cdt = generateYieldedScalarValue(whereOp.getMaskRegion()); + generateMaskIfOp(cdt); } void OrderedAssignmentRewriter::post(hlfir::WhereOp whereOp) { @@ -333,20 +446,27 @@ void OrderedAssignmentRewriter::post(hlfir::WhereOp whereOp) { } } -void OrderedAssignmentRewriter::pre(hlfir::ElseWhereOp elseWhereOp) { - assert(!constructStack.empty() && "cannot be empty inside a where"); - mlir::Location loc = elseWhereOp.getLoc(); +void OrderedAssignmentRewriter::enterElsewhere(hlfir::ElseWhereOp elseWhereOp) { // Create an "else" region for the current where/elsewhere fir.if. auto ifOp = mlir::dyn_cast(constructStack.back()); - assert(ifOp && ifOp.getElseRegion().empty() && "must be an if without else"); - builder.createBlock(&ifOp.getElseRegion()); - auto end = builder.create(loc); - builder.setInsertionPoint(end); + assert(ifOp && "must be an if"); + if (ifOp.getElseRegion().empty()) { + mlir::Location loc = elseWhereOp.getLoc(); + builder.createBlock(&ifOp.getElseRegion()); + auto end = builder.create(loc); + builder.setInsertionPoint(end); + } else { + builder.setInsertionPoint(&ifOp.getElseRegion().back().back()); + } +} + +void OrderedAssignmentRewriter::pre(hlfir::ElseWhereOp elseWhereOp) { + enterElsewhere(elseWhereOp); if (elseWhereOp.getMaskRegion().empty()) return; // Create new nested fir.if with elsewhere mask if any. - MaskedArrayExpr mask(loc, elseWhereOp.getMaskRegion()); - generateMaskIfOp(mask); + mlir::Value cdt = generateYieldedScalarValue(elseWhereOp.getMaskRegion()); + generateMaskIfOp(cdt); } void OrderedAssignmentRewriter::post(hlfir::ElseWhereOp elseWhereOp) { @@ -370,14 +490,51 @@ static bool isForallIndex(mlir::Value value) { return value.getDefiningOp(); } -std::pair> +static OrderedAssignmentRewriter::ValueAndCleanUp +castIfNeeded(mlir::Location loc, fir::FirOpBuilder &builder, + OrderedAssignmentRewriter::ValueAndCleanUp valueAndCleanUp, + std::optional castToType) { + if (!castToType.has_value()) + return valueAndCleanUp; + mlir::Value cast = + builder.createConvert(loc, *castToType, valueAndCleanUp.first); + return {cast, valueAndCleanUp.second}; +} + +std::optional +OrderedAssignmentRewriter::getIfSaved(mlir::Region ®ion) { + mlir::Location loc = region.getParentOp()->getLoc(); + // If the region was saved in the same run, use the value that was evaluated + // instead of fetching the temp, and do clean-up, if any, that were delayed. + // This is done to avoid requiring the temporary stack to have different + // fetching and storing counters, and also because it produces slightly better + // code. + if (auto savedInSameRun = savedInCurrentRunBeforeUse.find(®ion); + savedInSameRun != savedInCurrentRunBeforeUse.end()) + return savedInSameRun->second; + // If the region was saved in a previous run, fetch the saved value. + if (auto temp = savedEntities.find(®ion); temp != savedEntities.end()) { + doBeforeLoopNest([&]() { temp->second.resetFetchPosition(loc, builder); }); + return ValueAndCleanUp{temp->second.fetch(loc, builder), std::nullopt}; + } + return std::nullopt; +} + +OrderedAssignmentRewriter::ValueAndCleanUp OrderedAssignmentRewriter::generateYieldedEntity( mlir::Region ®ion, std::optional castToType) { - // TODO: if the region was saved, use that instead of generating code again. + mlir::Location loc = region.getParentOp()->getLoc(); + if (auto maybeValueAndCleanUp = getIfSaved(region)) + return castIfNeeded(loc, builder, *maybeValueAndCleanUp, castToType); + // Otherwise, evaluate the region now. + + // Masked expression must not evaluate the elemental parts that are masked, + // they have custom code generation. if (whereLoopNest.has_value()) { - mlir::Location loc = region.getParentOp()->getLoc(); - return {generateMaskedEntity(loc, region), std::nullopt}; + mlir::Value maskedValue = generateMaskedEntity(loc, region); + return castIfNeeded(loc, builder, {maskedValue, std::nullopt}, castToType); } + assert(region.hasOneBlock() && "region must contain one block"); auto oldYield = mlir::dyn_cast_or_null( region.back().getOperations().back()); @@ -434,7 +591,9 @@ OrderedAssignmentRewriter::generateYieldedEntity( mlir::Value OrderedAssignmentRewriter::generateYieldedScalarValue( mlir::Region ®ion, std::optional castToType) { + mlir::Location loc = region.getParentOp()->getLoc(); auto [value, maybeYield] = generateYieldedEntity(region, castToType); + value = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{value}); assert(fir::isa_trivial(value.getType()) && "not a trivial scalar value"); generateCleanupIfAny(maybeYield); return value; @@ -468,7 +627,8 @@ void OrderedAssignmentRewriter::generateCleanupIfAny( assert(maybeYield->getCleanup().hasOneBlock() && "region must contain one block"); for (auto &op : maybeYield->getCleanup().back().getOperations()) - builder.clone(op, mapper); + if (!mlir::isa(op)) + builder.clone(op, mapper); } } @@ -478,14 +638,15 @@ bool OrderedAssignmentRewriter::hasVectorSubscriptedLhs( regionAssignOp.getLhsRegion().back().back()); } -bool OrderedAssignmentRewriter::mustSavedRegionIn( - hlfir::OrderedAssignmentTreeOpInterface node) const { +bool OrderedAssignmentRewriter::mustSaveRegionIn( + hlfir::OrderedAssignmentTreeOpInterface node, + llvm::SmallVectorImpl &saveEntities) const { for (auto &action : currentRun->actions) if (hlfir::SaveEntity *savedEntity = std::get_if(&action)) if (node.getOperation() == savedEntity->yieldRegion->getParentOp()) - return true; - return false; + saveEntities.push_back(*savedEntity); + return !saveEntities.empty(); } bool OrderedAssignmentRewriter::isRequiredInCurrentRun( @@ -634,6 +795,125 @@ void MaskedArrayExpr::generateNoneElementalCleanupIfAny( } } +static bool isLeftHandSide(mlir::Region ®ion) { + auto assign = mlir::dyn_cast(region.getParentOp()); + return assign && (&assign.getLhsRegion() == ®ion); +} + +bool OrderedAssignmentRewriter::currentLoopNestIterationNumberCanBeComputed( + llvm::SmallVectorImpl &loopNest) { + if (constructStack.empty()) + return true; + mlir::Operation *outerLoop = constructStack[0]; + mlir::Operation *currentConstruct = constructStack.back(); + // Loop through the loops until the outer construct is met, and test if the + // loop operands dominate the outer construct. + while (currentConstruct) { + if (auto doLoop = mlir::dyn_cast(currentConstruct)) { + if (llvm::any_of(doLoop->getOperands(), [&](mlir::Value value) { + return !dominanceInfo.properlyDominates(value, outerLoop); + })) { + return false; + } + loopNest.push_back(doLoop); + } + if (currentConstruct == outerLoop) + currentConstruct = nullptr; + else + currentConstruct = currentConstruct->getParentOp(); + } + return true; +} + +static mlir::Value +computeLoopNestIterationNumber(mlir::Location loc, fir::FirOpBuilder &builder, + llvm::ArrayRef loopNest) { + mlir::Value loopExtent; + for (fir::DoLoopOp doLoop : loopNest) { + mlir::Value extent = builder.genExtentFromTriplet( + loc, doLoop.getLowerBound(), doLoop.getUpperBound(), doLoop.getStep(), + builder.getIndexType()); + if (!loopExtent) + loopExtent = extent; + else + loopExtent = builder.create(loc, loopExtent, extent); + } + assert(loopExtent && "loopNest must not be empty"); + return loopExtent; +} + +void OrderedAssignmentRewriter::generateSaveEntity( + hlfir::SaveEntity savedEntity, bool willUseSavedEntityInSameRun) { + mlir::Region ®ion = *savedEntity.yieldRegion; + mlir::Location loc = region.getParentOp()->getLoc(); + + if (!mlir::isa(region.back().back())) + TODO(loc, "creating temporary storage for vector subscripted LHS"); + + // Evaluate the region inside the loop nest (if any). + auto [clonedValue, oldYield] = generateYieldedEntity(region); + hlfir::Entity entity{clonedValue}; + if (isLeftHandSide(region)) // Need to save the address, not the values. + TODO(loc, "creating temporary storage for LHS"); + else + entity = hlfir::loadTrivialScalar(loc, builder, entity); + mlir::Type entityType = entity.getType(); + + static constexpr char tempName[] = ".tmp.forall"; + if (constructStack.empty()) { + // Value evaluated outside of any loops (this may be the first MASK of a + // WHERE construct, or an LHS/RHS temp of hlfir.region_assign outside of + // WHERE/FORALL). + insertSavedEntity(region, + fir::factory::SimpleCopy(loc, builder, entity, tempName)); + } else { + // Need to create a temporary for values computed inside loops. + // Create temporary storage outside of the loop nest given the entity + // type (and the loop context). + fir::factory::TemporaryStorage *temp; + llvm::SmallVector loopNest; + bool loopShapeCanBePreComputed = + currentLoopNestIterationNumberCanBeComputed(loopNest); + doBeforeLoopNest([&] { + /// For simple scalars inside loops whose total iteration number can be + /// pre-computed, create a rank-1 array outside of the loops. It will be + /// assigned/fetched inside the loops like a normal Fortran array given + /// the iteration count. + if (loopShapeCanBePreComputed && fir::isa_trivial(entityType)) { + mlir::Value loopExtent = + computeLoopNestIterationNumber(loc, builder, loopNest); + auto sequenceType = + builder.getVarLenSeqTy(entityType).cast(); + temp = insertSavedEntity(region, + fir::factory::HomogeneousScalarStack{ + loc, builder, sequenceType, loopExtent, + /*lenParams=*/{}, allocateOnHeap, + /*stackThroughLoops=*/true, tempName}); + + } else { + // If the number of iteration is not known, or if the values at each + // iterations are values that may have different shape, type parameters + // or dynamic type, use the runtime to create and manage a stack-like + // temporary. + TODO(loc, "use runtime to create temporary storage in FORALL or WHERE"); + } + }); + // Inside the loop nest (and any fir.if if there are active masks), copy + // the value to the temp and do clean-ups for the value if any. + temp->pushValue(loc, builder, entity); + } + + // Delay the clean-up if the entity will be used in the same run (i.e., the + // parent construct will be visited and needs to be lowered). + if (willUseSavedEntityInSameRun) { + auto inserted = + savedInCurrentRunBeforeUse.try_emplace(®ion, entity, oldYield); + assert(inserted.second && "entity must have been emplaced"); + } else { + generateCleanupIfAny(oldYield); + } +} + /// Lower an ordered assignment tree to fir.do_loop and hlfir.assign given /// a schedule. static void lower(hlfir::OrderedAssignmentTreeOpInterface root, @@ -643,6 +923,7 @@ static void lower(hlfir::OrderedAssignmentTreeOpInterface root, OrderedAssignmentRewriter assignmentRewriter(builder, root); for (auto &run : schedule) assignmentRewriter.lowerRun(run); + assignmentRewriter.cleanupSavedEntities(); } /// Shared rewrite entry point for all the ordered assignment tree root diff --git a/flang/test/HLFIR/order_assignments/impure-where.fir b/flang/test/HLFIR/order_assignments/impure-where.fir new file mode 100644 index 0000000000000..537fd48282cf8 --- /dev/null +++ b/flang/test/HLFIR/order_assignments/impure-where.fir @@ -0,0 +1,73 @@ +// Test code generation of hlfir.where/hflir.elsewhere when an +// "impure" mask is used and several runs are needed. The mask +// must be saved so that the impure function is only evaluated once. +// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s + +func.func private @impure() -> !fir.heap>> +func.func @test_elsewhere_impure_mask(%x: !fir.ref>, %y: !fir.ref>, %z: !fir.ref>, %mask: !fir.ref>>) { + %c-1 = arith.constant -1 : index + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : index + %1 = fir.shape %c10 : (index) -> !fir.shape<1> + hlfir.where { + hlfir.yield %mask : !fir.ref>> + } do { + hlfir.elsewhere mask { + %mask2 = fir.call @impure() : () -> !fir.heap>> + hlfir.yield %mask2 : !fir.heap>> cleanup { + fir.freemem %mask2 : !fir.heap>> + } + } do { + hlfir.region_assign { + hlfir.yield %y : !fir.ref> + } to { + hlfir.yield %x : !fir.ref> + } + hlfir.region_assign { + hlfir.yield %x : !fir.ref> + } to { + hlfir.yield %z : !fir.ref> + } + } + } + return +} +// CHECK-LABEL: func.func @test_elsewhere_impure_mask( +// CHECK: %[[VAL_12:.*]] = fir.call @impure() : () -> !fir.heap>> +// CHECK: %[[VAL_21:.*]] = fir.allocmem !fir.array> +// CHECK: %[[VAL_23:.*]]:2 = hlfir.declare %[[VAL_21]](%{{.*}}) {uniq_name = ".tmp.forall"} +// CHECK: fir.do_loop +// CHECK: fir.if {{.*}} { +// CHECK: } else { +// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_12]] (%{{.*}}) +// CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ref> +// CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_23]]#0 (%{{.*}}) : (!fir.box>>, index) -> !fir.ref> +// CHECK: hlfir.assign %[[VAL_29]] to %[[VAL_32]] : !fir.logical<4>, !fir.ref> +// CHECK: } +// CHECK: } +// CHECK-NOT: fir.call @impure +// CHECK: fir.do_loop +// CHECK: fir.if {{.*}} { +// CHECK: } else { +// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_23]]#0 (%{{.*}}) : (!fir.box>>, index) -> !fir.ref> +// CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref> +// CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_44]] { +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK-NOT: fir.call @impure +// CHECK: fir.do_loop +// CHECK: fir.if {{.*}} { +// CHECK: } else { +// CHECK: %[[VAL_52:.*]] = hlfir.designate %[[VAL_23]]#0 (%{{.*}}) : (!fir.box>>, index) -> !fir.ref> +// CHECK: %[[VAL_53:.*]] = fir.load %[[VAL_52]] : !fir.ref> +// CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_53]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_54]] { +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: fir.freemem %[[VAL_21]] : !fir.heap>> +// CHECK: fir.freemem %[[VAL_12]] : !fir.heap>> +// CHECK: return +// CHECK: } diff --git a/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir b/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir new file mode 100644 index 0000000000000..6566620a51bfc --- /dev/null +++ b/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir @@ -0,0 +1,332 @@ +// Test code generation of hlfir.forall and hlfir.where when temporary +// storage is needed and can be allocated inline. +// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s + +func.func @test_scalar_save(%arg0: !fir.box>) { + %c10_i32 = arith.constant 10 : i32 + %c1_i32 = arith.constant 1 : i32 + %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) + hlfir.forall lb { + hlfir.yield %c1_i32 : i32 + } ub { + hlfir.yield %c10_i32 : i32 + } (%arg1: i32) { + hlfir.region_assign { + %1 = fir.convert %arg1 : (i32) -> i64 + %2 = hlfir.designate %0#0 (%1) : (!fir.box>, i64) -> !fir.ref + %3 = fir.load %2 : !fir.ref + hlfir.yield %3 : i32 + } to { + %1 = arith.addi %arg1, %c1_i32 : i32 + %2 = fir.convert %1 : (i32) -> i64 + %3 = hlfir.designate %0#0 (%2) : (!fir.box>, i64) -> !fir.ref + hlfir.yield %3 : !fir.ref + } + } + return +} +// CHECK-LABEL: func.func @test_scalar_save( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>) { +// CHECK: %[[VAL_1:.*]] = fir.alloca index +// CHECK: %[[VAL_2:.*]] = arith.constant 10 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_3]] : (i32) -> index +// CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (i32) -> index +// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_8:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_9:.*]] = arith.subi %[[VAL_6]], %[[VAL_5]] : index +// CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_7]] : index +// CHECK: %[[VAL_11:.*]] = arith.divsi %[[VAL_10]], %[[VAL_7]] : index +// CHECK: %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_8]] : index +// CHECK: %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_8]] : index +// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_15:.*]] = arith.constant 1 : index +// CHECK: fir.store %[[VAL_14]] to %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_16:.*]] = fir.allocmem !fir.array, %[[VAL_13]] {bindc_name = ".tmp.forall", uniq_name = ""} +// CHECK: %[[VAL_17:.*]] = fir.shape %[[VAL_13]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_16]](%[[VAL_17]]) {uniq_name = ".tmp.forall"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) +// CHECK: fir.do_loop %[[VAL_19:.*]] = %[[VAL_5]] to %[[VAL_6]] step %[[VAL_7]] { +// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (index) -> i32 +// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64 +// CHECK: %[[VAL_22:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_21]]) : (!fir.box>, i64) -> !fir.ref +// CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref +// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_24]], %[[VAL_15]] : index +// CHECK: fir.store %[[VAL_25]] to %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_24]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_23]] to %[[VAL_26]] : i32, !fir.ref +// CHECK: } +// CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_3]] : (i32) -> index +// CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_2]] : (i32) -> index +// CHECK: %[[VAL_29:.*]] = arith.constant 1 : index +// CHECK: fir.store %[[VAL_14]] to %[[VAL_1]] : !fir.ref +// CHECK: fir.do_loop %[[VAL_30:.*]] = %[[VAL_27]] to %[[VAL_28]] step %[[VAL_29]] { +// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (index) -> i32 +// CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_33:.*]] = arith.addi %[[VAL_32]], %[[VAL_15]] : index +// CHECK: fir.store %[[VAL_33]] to %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_34:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_32]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_34]] : !fir.ref +// CHECK: %[[VAL_36:.*]] = arith.addi %[[VAL_31]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> i64 +// CHECK: %[[VAL_38:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_37]]) : (!fir.box>, i64) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_35]] to %[[VAL_38]] : i32, !fir.ref +// CHECK: } +// CHECK: fir.freemem %[[VAL_16]] : !fir.heap> +// CHECK: return +// CHECK: } + +func.func @mask_and_rhs_conflict(%arg0: !fir.box>) { + %c42_i32 = arith.constant 42 : i32 + %c10_i32 = arith.constant 10 : i32 + %c1_i32 = arith.constant 1 : i32 + %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) + hlfir.forall lb { + hlfir.yield %c1_i32 : i32 + } ub { + hlfir.yield %c10_i32 : i32 + } (%arg1: i32) { + hlfir.forall_mask { + %1 = fir.convert %arg1 : (i32) -> i64 + %2 = hlfir.designate %0#0 (%1) : (!fir.box>, i64) -> !fir.ref + %3 = fir.load %2 : !fir.ref + %4 = arith.cmpi sgt, %3, %c42_i32 : i32 + hlfir.yield %4 : i1 + } do { + hlfir.region_assign { + %1 = fir.convert %arg1 : (i32) -> i64 + %2 = hlfir.designate %0#0 (%1) : (!fir.box>, i64) -> !fir.ref + %3 = fir.load %2 : !fir.ref + hlfir.yield %3 : i32 + } to { + %1 = arith.addi %arg1, %c1_i32 : i32 + %2 = fir.convert %1 : (i32) -> i64 + %3 = hlfir.designate %0#0 (%2) : (!fir.box>, i64) -> !fir.ref + hlfir.yield %3 : !fir.ref + } + } + } + return +} +// CHECK-LABEL: func.func @mask_and_rhs_conflict( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>) { +// CHECK: %[[VAL_1:.*]] = fir.alloca index +// CHECK: %[[VAL_2:.*]] = fir.alloca index +// CHECK: %[[VAL_3:.*]] = arith.constant 42 : i32 +// CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32 +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_5]] : (i32) -> index +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_4]] : (i32) -> index +// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_10:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_11:.*]] = arith.subi %[[VAL_8]], %[[VAL_7]] : index +// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_9]] : index +// CHECK: %[[VAL_13:.*]] = arith.divsi %[[VAL_12]], %[[VAL_9]] : index +// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13]], %[[VAL_10]] : index +// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14]], %[[VAL_13]], %[[VAL_10]] : index +// CHECK: %[[VAL_16:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_17:.*]] = arith.constant 1 : index +// CHECK: fir.store %[[VAL_16]] to %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_18:.*]] = fir.allocmem !fir.array, %[[VAL_15]] {bindc_name = ".tmp.forall", uniq_name = ""} +// CHECK: %[[VAL_19:.*]] = fir.shape %[[VAL_15]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_18]](%[[VAL_19]]) {uniq_name = ".tmp.forall"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) +// CHECK: %[[VAL_21:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_8]], %[[VAL_7]] : index +// CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_22]], %[[VAL_9]] : index +// CHECK: %[[VAL_24:.*]] = arith.divsi %[[VAL_23]], %[[VAL_9]] : index +// CHECK: %[[VAL_25:.*]] = arith.cmpi sgt, %[[VAL_24]], %[[VAL_21]] : index +// CHECK: %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_24]], %[[VAL_21]] : index +// CHECK: %[[VAL_27:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_28:.*]] = arith.constant 1 : index +// CHECK: fir.store %[[VAL_27]] to %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_29:.*]] = fir.allocmem !fir.array, %[[VAL_26]] {bindc_name = ".tmp.forall", uniq_name = ""} +// CHECK: %[[VAL_30:.*]] = fir.shape %[[VAL_26]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_29]](%[[VAL_30]]) {uniq_name = ".tmp.forall"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) +// CHECK: fir.do_loop %[[VAL_32:.*]] = %[[VAL_7]] to %[[VAL_8]] step %[[VAL_9]] { +// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (index) -> i32 +// CHECK: %[[VAL_34:.*]] = fir.convert %[[VAL_33]] : (i32) -> i64 +// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_34]]) : (!fir.box>, i64) -> !fir.ref +// CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref +// CHECK: %[[VAL_37:.*]] = arith.cmpi sgt, %[[VAL_36]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_39:.*]] = arith.addi %[[VAL_38]], %[[VAL_17]] : index +// CHECK: fir.store %[[VAL_39]] to %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_38]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_37]] to %[[VAL_40]] : i1, !fir.ref +// CHECK: fir.if %[[VAL_37]] { +// CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_33]] : (i32) -> i64 +// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_41]]) : (!fir.box>, i64) -> !fir.ref +// CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref +// CHECK: %[[VAL_44:.*]] = fir.load %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_45:.*]] = arith.addi %[[VAL_44]], %[[VAL_28]] : index +// CHECK: fir.store %[[VAL_45]] to %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_31]]#0 (%[[VAL_44]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_43]] to %[[VAL_46]] : i32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_47:.*]] = fir.convert %[[VAL_5]] : (i32) -> index +// CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_4]] : (i32) -> index +// CHECK: %[[VAL_49:.*]] = arith.constant 1 : index +// CHECK: fir.store %[[VAL_16]] to %[[VAL_2]] : !fir.ref +// CHECK: fir.store %[[VAL_27]] to %[[VAL_1]] : !fir.ref +// CHECK: fir.do_loop %[[VAL_50:.*]] = %[[VAL_47]] to %[[VAL_48]] step %[[VAL_49]] { +// CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (index) -> i32 +// CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_53:.*]] = arith.addi %[[VAL_52]], %[[VAL_17]] : index +// CHECK: fir.store %[[VAL_53]] to %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_54:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_52]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_55:.*]] = fir.load %[[VAL_54]] : !fir.ref +// CHECK: fir.if %[[VAL_55]] { +// CHECK: %[[VAL_56:.*]] = fir.load %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_57:.*]] = arith.addi %[[VAL_56]], %[[VAL_28]] : index +// CHECK: fir.store %[[VAL_57]] to %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_58:.*]] = hlfir.designate %[[VAL_31]]#0 (%[[VAL_56]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_59:.*]] = fir.load %[[VAL_58]] : !fir.ref +// CHECK: %[[VAL_60:.*]] = arith.addi %[[VAL_51]], %[[VAL_5]] : i32 +// CHECK: %[[VAL_61:.*]] = fir.convert %[[VAL_60]] : (i32) -> i64 +// CHECK: %[[VAL_62:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_61]]) : (!fir.box>, i64) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_59]] to %[[VAL_62]] : i32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK-DAG: fir.freemem %[[VAL_18]] : !fir.heap> +// CHECK-DAG: fir.freemem %[[VAL_29]] : !fir.heap> +// CHECK: return +// CHECK: } + +func.func @test_where_mask_save(%arg0: !fir.box>) { + %c0 = arith.constant 0 : index + %c42_i32 = arith.constant 42 : i32 + %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) + hlfir.where { + %1:3 = fir.box_dims %0#0, %c0 : (!fir.box>, index) -> (index, index, index) + %2 = fir.shape %1#1 : (index) -> !fir.shape<1> + %3 = hlfir.elemental %2 : (!fir.shape<1>) -> !hlfir.expr> { + ^bb0(%arg1: index): + %4 = hlfir.designate %0#0 (%arg1) : (!fir.box>, index) -> !fir.ref + %5 = fir.load %4 : !fir.ref + %6 = arith.cmpi sgt, %5, %c42_i32 : i32 + %7 = fir.convert %6 : (i1) -> !fir.logical<4> + hlfir.yield_element %7 : !fir.logical<4> + } + hlfir.yield %3 : !hlfir.expr> cleanup { + hlfir.destroy %3 : !hlfir.expr> + } + } do { + hlfir.region_assign { + hlfir.yield %c42_i32 : i32 + } to { + hlfir.yield %0#0 : !fir.box> + } + } + return +} +// CHECK-LABEL: func.func @test_where_mask_save( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>) { +// CHECK: %[[VAL_1:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 42 : i32 +// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_3]]#0, %[[VAL_1]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]]#1 : (index) -> !fir.shape<1> +// CHECK: %[[VAL_6:.*]] = hlfir.elemental %[[VAL_5]] : (!fir.shape<1>) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_7:.*]]: index): +// CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_7]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_8]] : !fir.ref +// CHECK: %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_2]] : i32 +// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i1) -> !fir.logical<4> +// CHECK: hlfir.yield_element %[[VAL_11]] : !fir.logical<4> +// CHECK: } +// CHECK: %[[VAL_12:.*]]:3 = hlfir.associate %[[VAL_13:.*]](%[[VAL_5]]) {uniq_name = ".tmp.forall"} : (!hlfir.expr>, !fir.shape<1>) -> (!fir.box>>, !fir.ref>>, i1) +// CHECK: hlfir.destroy %[[VAL_13]] : !hlfir.expr> +// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index +// CHECK: fir.do_loop %[[VAL_15:.*]] = %[[VAL_14]] to %[[VAL_4]]#1 step %[[VAL_14]] { +// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_15]]) : (!fir.box>>, index) -> !fir.ref> +// CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref> +// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_18]] { +// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_15]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_19]] : i32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: hlfir.end_associate %[[VAL_12]]#1, %[[VAL_12]]#2 : !fir.ref>>, i1 +// CHECK: return +// CHECK: } + +func.func @test_where_rhs_save(%x: !fir.ref>, %mask: !fir.ref>>) { + %c-1 = arith.constant -1 : index + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : index + %1 = fir.shape %c10 : (index) -> !fir.shape<1> + hlfir.where { + hlfir.yield %mask : !fir.ref>> + } do { + hlfir.region_assign { + %2 = hlfir.designate %x (%c10:%c1:%c-1) shape %1 : +(!fir.ref>, index, index, index, !fir.shape<1>) -> !fir.ref> + hlfir.yield %2 : !fir.ref> + } to { + hlfir.yield %x : !fir.ref> + } + } + return +} +// CHECK-LABEL: func.func @test_where_rhs_save( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>>) { +// CHECK: %[[VAL_2:.*]] = fir.alloca index +// CHECK: %[[VAL_3:.*]] = arith.constant -1 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 10 : index +// CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_7:.*]] = arith.constant 10 : index +// CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_5]]:%[[VAL_4]]:%[[VAL_3]]) shape %[[VAL_6]] : (!fir.ref>, index, index, index, !fir.shape<1>) -> !fir.ref> +// CHECK: %[[VAL_11:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_7]], %[[VAL_9]] : index +// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[VAL_9]] : index +// CHECK: %[[VAL_14:.*]] = arith.divsi %[[VAL_13]], %[[VAL_9]] : index +// CHECK: %[[VAL_15:.*]] = arith.cmpi sgt, %[[VAL_14]], %[[VAL_11]] : index +// CHECK: %[[VAL_16:.*]] = arith.select %[[VAL_15]], %[[VAL_14]], %[[VAL_11]] : index +// CHECK: %[[VAL_17:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_18:.*]] = arith.constant 1 : index +// CHECK: fir.store %[[VAL_17]] to %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_19:.*]] = fir.allocmem !fir.array, %[[VAL_16]] {bindc_name = ".tmp.forall", uniq_name = ""} +// CHECK: %[[VAL_20:.*]] = fir.shape %[[VAL_16]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_19]](%[[VAL_20]]) {uniq_name = ".tmp.forall"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) +// CHECK: fir.do_loop %[[VAL_22:.*]] = %[[VAL_9]] to %[[VAL_7]] step %[[VAL_9]] { +// CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_22]]) : (!fir.ref>>, index) -> !fir.ref> +// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref> +// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_25]] { +// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_10]] (%[[VAL_22]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref +// CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_28]], %[[VAL_18]] : index +// CHECK: fir.store %[[VAL_29]] to %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_30:.*]] = hlfir.designate %[[VAL_21]]#0 (%[[VAL_28]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_30]] : i32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_31:.*]] = arith.constant 10 : index +// CHECK: %[[VAL_32:.*]] = fir.shape %[[VAL_31]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_33:.*]] = arith.constant 1 : index +// CHECK: fir.store %[[VAL_17]] to %[[VAL_2]] : !fir.ref +// CHECK: fir.do_loop %[[VAL_34:.*]] = %[[VAL_33]] to %[[VAL_31]] step %[[VAL_33]] { +// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_34]]) : (!fir.ref>>, index) -> !fir.ref> +// CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref> +// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_37]] { +// CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_39:.*]] = arith.addi %[[VAL_38]], %[[VAL_18]] : index +// CHECK: fir.store %[[VAL_39]] to %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_21]]#0 (%[[VAL_38]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref +// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_34]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_41]] to %[[VAL_42]] : i32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: fir.freemem %[[VAL_19]] : !fir.heap> +// CHECK: return +// CHECK: } diff --git a/flang/test/HLFIR/ordered-assignments-codegen-todo.fir b/flang/test/HLFIR/ordered-assignments-codegen-todo.fir deleted file mode 100644 index 6557a03219fb3..0000000000000 --- a/flang/test/HLFIR/ordered-assignments-codegen-todo.fir +++ /dev/null @@ -1,24 +0,0 @@ -// Just test that Ordered assignment pass TODOs are properly reported. -// RUN: %not_todo_cmd fir-opt --lower-hlfir-ordered-assignments %s 2>&1 | FileCheck %s - - -// CHECK: not yet implemented: creating temporary storage in FORALL or WHERE constructs - -func.func @forall_todo(%arg0: !fir.ref>) { - %c1 = arith.constant 1 : index - %c10 = arith.constant 10 : index - hlfir.forall lb { - hlfir.yield %c1 : index - } ub { - hlfir.yield %c10 : index - } (%arg2: i64) { - hlfir.region_assign { - %1 = hlfir.designate %arg0 (%arg2) : (!fir.ref>, i64) -> !fir.ref - hlfir.yield %1 : !fir.ref - } to { - %1 = hlfir.designate %arg0 (%arg2) : (!fir.ref>, i64) -> !fir.ref - hlfir.yield %1 : !fir.ref - } - } - return -}