diff --git a/flang/include/flang/Optimizer/Builder/Runtime/TemporaryStack.h b/flang/include/flang/Optimizer/Builder/Runtime/TemporaryStack.h index b35d6d6d66ec4..085d2ad2e6f62 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/TemporaryStack.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/TemporaryStack.h @@ -30,5 +30,16 @@ void genValueAt(mlir::Location loc, fir::FirOpBuilder &builder, void genDestroyValueStack(mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value opaquePtr); +mlir::Value genCreateDescriptorStack(mlir::Location loc, + fir::FirOpBuilder &builder); + +void genPushDescriptor(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value opaquePtr, mlir::Value boxValue); +void genDescriptorAt(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value opaquePtr, mlir::Value i, + mlir::Value retValueBox); + +void genDestroyDescriptorStack(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value opaquePtr); } // namespace fir::runtime #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_TEMPORARYSTACK_H diff --git a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h index 51b84050a87a3..5f2e1c4b510b0 100644 --- a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h +++ b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h @@ -120,6 +120,30 @@ class SimpleCopy { hlfir::AssociateOp copy; }; +/// Structure to keep track of a simple mlir::Value. This is useful +/// when a value does not need an in memory copy because it is +/// already saved in an SSA value that will be accessible at the fetching +/// point. +class SSARegister { +public: + SSARegister(){}; + + void pushValue(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value value) { + ssaRegister = value; + } + void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder){}; + mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder) { + return ssaRegister; + } + void destroy(mlir::Location loc, fir::FirOpBuilder &builder) {} + bool canBeFetchedAfterPush() const { return true; } + +public: + /// Temporary storage for the copy. + mlir::Value ssaRegister; +}; + /// Data structure to stack any kind of values with the same static type and /// rank. Each value may have different type parameters, bounds, and dynamic /// type. Fetching value N will return a value with the same dynamic type, @@ -150,6 +174,61 @@ class AnyValueStack { mlir::Value retValueBox; }; +/// Data structure to stack any kind of variables with the same static type and +/// rank. Each variable may have different type parameters, bounds, and dynamic +/// type. Fetching variable N will return a variable with the same address, +/// dynamic type, bounds, and type parameters as the Nth variable that was +/// pushed. It is implemented using runtime. +class AnyVariableStack { +public: + AnyVariableStack(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Type valueStaticType); + + void pushValue(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value value); + void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder); + mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder); + void destroy(mlir::Location loc, fir::FirOpBuilder &builder); + bool canBeFetchedAfterPush() const { return true; } + +private: + /// Keep the original variable type. + mlir::Type variableStaticType; + /// Runtime cookie created by the runtime. It is a pointer to an opaque + /// runtime data structure that manages the stack. + mlir::Value opaquePtr; + /// Counter to keep track of the fetching position. + Counter counter; + /// Pointer box passed to the runtime when fetching the values. + mlir::Value retValueBox; +}; + +class TemporaryStorage; + +/// Data structure to stack vector subscripted entity shape and +/// element addresses. AnyVariableStack allows saving vector subscripted +/// entities element addresses, but when saving several vector subscripted +/// entities on a stack, and if the context does not allow retrieving the +/// vector subscript entities shapes, these shapes must be saved too. +class AnyVectorSubscriptStack : public AnyVariableStack { +public: + AnyVectorSubscriptStack(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Type valueStaticType, + bool shapeCanBeSavedAsRegister, int rank); + void pushShape(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value shape); + void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder); + mlir::Value fetchShape(mlir::Location loc, fir::FirOpBuilder &builder); + void destroy(mlir::Location loc, fir::FirOpBuilder &builder); + bool canBeFetchedAfterPush() const { return true; } + +private: + std::unique_ptr shapeTemp; + // If the shape is saved inside a descriptor (as extents), + // keep track of the descriptor type. + std::optional boxType; +}; + /// Generic wrapper over the different sorts of temporary storages. class TemporaryStorage { public: @@ -178,8 +257,15 @@ class TemporaryStorage { impl); } + template + T &cast() { + return std::get(impl); + } + private: - std::variant impl; + std::variant + impl; }; } // namespace fir::factory #endif // FORTRAN_OPTIMIZER_BUILDER_TEMPORARYSTORAGE_H diff --git a/flang/lib/Optimizer/Builder/Runtime/TemporaryStack.cpp b/flang/lib/Optimizer/Builder/Runtime/TemporaryStack.cpp index f184e4040b1d3..732152c823a06 100644 --- a/flang/lib/Optimizer/Builder/Runtime/TemporaryStack.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/TemporaryStack.cpp @@ -56,3 +56,52 @@ void fir::runtime::genDestroyValueStack(mlir::Location loc, auto args = fir::runtime::createArguments(builder, loc, funcType, opaquePtr); builder.create(loc, func, args); } + +mlir::Value fir::runtime::genCreateDescriptorStack(mlir::Location loc, + fir::FirOpBuilder &builder) { + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc(loc, + builder); + mlir::FunctionType funcType = func.getFunctionType(); + mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); + mlir::Value sourceLine = + fir::factory::locationToLineNo(builder, loc, funcType.getInput(1)); + auto args = fir::runtime::createArguments(builder, loc, funcType, sourceFile, + sourceLine); + return builder.create(loc, func, args).getResult(0); +} + +void fir::runtime::genPushDescriptor(mlir::Location loc, + fir::FirOpBuilder &builder, + mlir::Value opaquePtr, + mlir::Value boxDescriptor) { + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc(loc, builder); + mlir::FunctionType funcType = func.getFunctionType(); + auto args = fir::runtime::createArguments(builder, loc, funcType, opaquePtr, + boxDescriptor); + builder.create(loc, func, args); +} + +void fir::runtime::genDescriptorAt(mlir::Location loc, + fir::FirOpBuilder &builder, + mlir::Value opaquePtr, mlir::Value i, + mlir::Value retDescriptorBox) { + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc(loc, builder); + mlir::FunctionType funcType = func.getFunctionType(); + auto args = fir::runtime::createArguments(builder, loc, funcType, opaquePtr, + i, retDescriptorBox); + builder.create(loc, func, args); +} + +void fir::runtime::genDestroyDescriptorStack(mlir::Location loc, + fir::FirOpBuilder &builder, + mlir::Value opaquePtr) { + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc(loc, + builder); + mlir::FunctionType funcType = func.getFunctionType(); + auto args = fir::runtime::createArguments(builder, loc, funcType, opaquePtr); + builder.create(loc, func, args); +} diff --git a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp index a108e06351a60..dbc285ce9e22d 100644 --- a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp +++ b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp @@ -231,3 +231,127 @@ void fir::factory::AnyValueStack::destroy(mlir::Location loc, fir::FirOpBuilder &builder) { fir::runtime::genDestroyValueStack(loc, builder, opaquePtr); } + +//===----------------------------------------------------------------------===// +// fir::factory::AnyVariableStack implementation. +//===----------------------------------------------------------------------===// + +fir::factory::AnyVariableStack::AnyVariableStack(mlir::Location loc, + fir::FirOpBuilder &builder, + mlir::Type variableStaticType) + : variableStaticType{variableStaticType}, + counter{loc, builder, + builder.createIntegerConstant(loc, builder.getI64Type(), 0), + /*stackThroughLoops=*/true} { + opaquePtr = fir::runtime::genCreateDescriptorStack(loc, builder); + mlir::Type storageType = + hlfir::getFortranElementOrSequenceType(variableStaticType); + mlir::Type ptrType = fir::PointerType::get(storageType); + mlir::Type boxType; + if (hlfir::isPolymorphicType(variableStaticType)) + boxType = fir::ClassType::get(ptrType); + else + boxType = fir::BoxType::get(ptrType); + retValueBox = builder.createTemporary(loc, boxType); +} + +void fir::factory::AnyVariableStack::pushValue(mlir::Location loc, + fir::FirOpBuilder &builder, + mlir::Value variable) { + hlfir::Entity entity{variable}; + mlir::Type storageElementType = + hlfir::getFortranElementType(retValueBox.getType()); + auto [box, maybeCleanUp] = + hlfir::convertToBox(loc, builder, entity, storageElementType); + fir::runtime::genPushDescriptor(loc, builder, opaquePtr, fir::getBase(box)); + if (maybeCleanUp) + (*maybeCleanUp)(); +} + +void fir::factory::AnyVariableStack::resetFetchPosition( + mlir::Location loc, fir::FirOpBuilder &builder) { + counter.reset(loc, builder); +} + +mlir::Value fir::factory::AnyVariableStack::fetch(mlir::Location loc, + fir::FirOpBuilder &builder) { + mlir::Value indexValue = counter.getAndIncrementIndex(loc, builder); + fir::runtime::genDescriptorAt(loc, builder, opaquePtr, indexValue, + retValueBox); + hlfir::Entity retBox{builder.create(loc, retValueBox)}; + // The runtime always tracks variable as address, but the form of the variable + // that was saved may be different (raw address, fir.boxchar), ensure + // the returned variable has the same form of the one that was saved. + if (mlir::isa(variableStaticType)) + return builder.createConvert(loc, variableStaticType, retBox); + if (mlir::isa(variableStaticType)) + return hlfir::genVariableBoxChar(loc, builder, retBox); + mlir::Value rawAddr = genVariableRawAddress(loc, builder, retBox); + return builder.createConvert(loc, variableStaticType, rawAddr); +} + +void fir::factory::AnyVariableStack::destroy(mlir::Location loc, + fir::FirOpBuilder &builder) { + fir::runtime::genDestroyDescriptorStack(loc, builder, opaquePtr); +} + +//===----------------------------------------------------------------------===// +// fir::factory::AnyVectorSubscriptStack implementation. +//===----------------------------------------------------------------------===// + +fir::factory::AnyVectorSubscriptStack::AnyVectorSubscriptStack( + mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Type variableStaticType, bool shapeCanBeSavedAsRegister, int rank) + : AnyVariableStack{loc, builder, variableStaticType} { + if (shapeCanBeSavedAsRegister) { + shapeTemp = + std::unique_ptr(new TemporaryStorage{SSARegister{}}); + return; + } + // The shape will be tracked as the dimension inside a descriptor because + // that is the easiest from a lowering point of view, and this is an + // edge case situation that will probably not very well be exercised. + mlir::Type type = + fir::BoxType::get(builder.getVarLenSeqTy(builder.getI32Type(), rank)); + boxType = type; + shapeTemp = std::unique_ptr( + new TemporaryStorage{AnyVariableStack{loc, builder, type}}); +} + +void fir::factory::AnyVectorSubscriptStack::pushShape( + mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value shape) { + if (boxType) { + // The shape is saved as a dimensions inside a descriptors. + mlir::Type refType = fir::ReferenceType::get( + hlfir::getFortranElementOrSequenceType(*boxType)); + mlir::Value null = builder.createNullConstant(loc, refType); + mlir::Value descriptor = + builder.create(loc, *boxType, null, shape); + shapeTemp->pushValue(loc, builder, descriptor); + return; + } + // Otherwise, simply keep track of the fir.shape itself, it is invariant. + shapeTemp->cast().pushValue(loc, builder, shape); +} + +void fir::factory::AnyVectorSubscriptStack::resetFetchPosition( + mlir::Location loc, fir::FirOpBuilder &builder) { + static_cast(this)->resetFetchPosition(loc, builder); + shapeTemp->resetFetchPosition(loc, builder); +} + +mlir::Value +fir::factory::AnyVectorSubscriptStack::fetchShape(mlir::Location loc, + fir::FirOpBuilder &builder) { + if (boxType) { + hlfir::Entity descriptor{shapeTemp->fetch(loc, builder)}; + return hlfir::genShape(loc, builder, descriptor); + } + return shapeTemp->cast().fetch(loc, builder); +} + +void fir::factory::AnyVectorSubscriptStack::destroy( + mlir::Location loc, fir::FirOpBuilder &builder) { + static_cast(this)->destroy(loc, builder); + shapeTemp->destroy(loc, builder); +} diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp index 96628282cc860..1fecbc0b8deb7 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp @@ -181,14 +181,16 @@ class OrderedAssignmentRewriter { std::optional elementalCleanup; mlir::Region *nonElementalCleanup = nullptr; std::optional vectorSubscriptLoopNest; + std::optional vectorSubscriptShape; }; /// Generate the left-hand side. If the left-hand side is vector /// subscripted (hlfir.elemental_addr), this will create a loop nest /// (unless it was already created by a WHERE mask) and return the /// element address. - LhsValueAndCleanUp generateYieldedLHS(mlir::Location loc, - mlir::Region &lhsRegion); + LhsValueAndCleanUp + generateYieldedLHS(mlir::Location loc, mlir::Region &lhsRegion, + std::optional loweredRhs = std::nullopt); /// If \p maybeYield is present and has a clean-up, generate the clean-up /// at the current insertion point (by cloning). @@ -212,6 +214,8 @@ class OrderedAssignmentRewriter { /// Save a value for subsequent runs. void generateSaveEntity(hlfir::SaveEntity savedEntity, bool willUseSavedEntityInSameRun); + void saveLeftHandSide(hlfir::SaveEntity savedEntity, + hlfir::RegionAssignOp regionAssignOp); /// Get a value if it was saved in this run or a previous run. Returns /// nullopt if it has not been saved. @@ -421,9 +425,9 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) { std::optional elementalLoopNest; auto [rhsValue, oldRhsYield] = generateYieldedEntity(regionAssignOp.getRhsRegion()); - LhsValueAndCleanUp loweredLhs = - generateYieldedLHS(loc, regionAssignOp.getLhsRegion()); hlfir::Entity rhsEntity{rhsValue}; + LhsValueAndCleanUp loweredLhs = + generateYieldedLHS(loc, regionAssignOp.getLhsRegion(), rhsEntity); hlfir::Entity lhsEntity{loweredLhs.lhs}; if (loweredLhs.vectorSubscriptLoopNest) rhsEntity = hlfir::getElementAt( @@ -692,17 +696,53 @@ mlir::Value OrderedAssignmentRewriter::generateYieldedScalarValue( } OrderedAssignmentRewriter::LhsValueAndCleanUp -OrderedAssignmentRewriter::generateYieldedLHS(mlir::Location loc, - mlir::Region &lhsRegion) { +OrderedAssignmentRewriter::generateYieldedLHS( + mlir::Location loc, mlir::Region &lhsRegion, + std::optional loweredRhs) { LhsValueAndCleanUp loweredLhs; hlfir::ElementalAddrOp elementalAddrLhs = mlir::dyn_cast(lhsRegion.back().back()); + if (auto temp = savedEntities.find(&lhsRegion); temp != savedEntities.end()) { + // The LHS address was computed and saved in a previous run. Fetch it. + doBeforeLoopNest([&]() { temp->second.resetFetchPosition(loc, builder); }); + if (elementalAddrLhs && !whereLoopNest) { + // Vector subscripted designator address are saved element by element. + // If no "elemental" loops have been created yet, the shape of the + // RHS, if it is an array can be used, or the shape of the vector + // subscripted designator must be retrieved to generate the "elemental" + // loop nest. + if (loweredRhs && loweredRhs->isArray()) { + // The RHS shape can be used to create the elemental loops and avoid + // saving the LHS shape. + loweredLhs.vectorSubscriptShape = + hlfir::genShape(loc, builder, *loweredRhs); + } else { + // If the shape cannot be retrieved from the RHS, it must have been + // saved. Get it from the temporary. + auto &vectorTmp = + temp->second.cast(); + loweredLhs.vectorSubscriptShape = vectorTmp.fetchShape(loc, builder); + } + loweredLhs.vectorSubscriptLoopNest = hlfir::genLoopNest( + loc, builder, loweredLhs.vectorSubscriptShape.value()); + builder.setInsertionPointToStart( + loweredLhs.vectorSubscriptLoopNest->innerLoop.getBody()); + } + loweredLhs.lhs = temp->second.fetch(loc, builder); + return loweredLhs; + } + // The LHS has not yet been evaluated and saved. Evaluate it now. if (elementalAddrLhs && !whereLoopNest) { + // This is a vector subscripted entity. The address of elements must + // be returned. If no "elemental" loops have been created for a WHERE, + // create them now based on the vector subscripted designator shape. for (auto &op : lhsRegion.front().without_terminator()) (void)builder.clone(op, mapper); - mlir::Value newShape = mapper.lookupOrDefault(elementalAddrLhs.getShape()); - loweredLhs.vectorSubscriptLoopNest = hlfir::genLoopNest( - loc, builder, newShape, !elementalAddrLhs.isOrdered()); + loweredLhs.vectorSubscriptShape = + mapper.lookupOrDefault(elementalAddrLhs.getShape()); + loweredLhs.vectorSubscriptLoopNest = + hlfir::genLoopNest(loc, builder, *loweredLhs.vectorSubscriptShape, + !elementalAddrLhs.isOrdered()); builder.setInsertionPointToStart( loweredLhs.vectorSubscriptLoopNest->innerLoop.getBody()); mapper.map(elementalAddrLhs.getIndices(), @@ -713,6 +753,8 @@ OrderedAssignmentRewriter::generateYieldedLHS(mlir::Location loc, loweredLhs.lhs = mapper.lookupOrDefault(loweredLhs.elementalCleanup->getEntity()); } else { + // This is a designator without vector subscripts. Generate it as + // it is done for other entities. auto [lhs, yield] = generateYieldedEntity(lhsRegion); loweredLhs.lhs = lhs; if (yield && !yield->getCleanup().empty()) @@ -932,9 +974,12 @@ void MaskedArrayExpr::generateNoneElementalCleanupIfAny( } } -static bool isLeftHandSide(mlir::Region ®ion) { +static hlfir::RegionAssignOp +getAssignIfLeftHandSideRegion(mlir::Region ®ion) { auto assign = mlir::dyn_cast(region.getParentOp()); - return assign && (&assign.getLhsRegion() == ®ion); + if (assign && (&assign.getLhsRegion() == ®ion)) + return assign; + return nullptr; } bool OrderedAssignmentRewriter::currentLoopNestIterationNumberCanBeComputed( @@ -993,18 +1038,20 @@ getTempName(hlfir::OrderedAssignmentTreeOpInterface root) { void OrderedAssignmentRewriter::generateSaveEntity( hlfir::SaveEntity savedEntity, bool willUseSavedEntityInSameRun) { mlir::Region ®ion = *savedEntity.yieldRegion; - mlir::Location loc = region.getParentOp()->getLoc(); - if (!mlir::isa(region.back().back())) - TODO(loc, "creating temporary storage for vector subscripted LHS"); + if (hlfir::RegionAssignOp regionAssignOp = + getAssignIfLeftHandSideRegion(region)) { + // Need to save the address, not the values. + assert(!willUseSavedEntityInSameRun && + "lhs cannot be used in the loop nest where it is saved"); + return saveLeftHandSide(savedEntity, regionAssignOp); + } + mlir::Location loc = region.getParentOp()->getLoc(); // Evaluate the region inside the loop nest (if any). auto [clonedValue, oldYield] = generateYieldedEntity(region); hlfir::Entity entity{clonedValue}; - if (isLeftHandSide(region)) // Need to save the address, not the values. - TODO(loc, "creating temporary storage for LHS"); - else - entity = hlfir::loadTrivialScalar(loc, builder, entity); + entity = hlfir::loadTrivialScalar(loc, builder, entity); mlir::Type entityType = entity.getType(); llvm::StringRef tempName = getTempName(root); @@ -1069,6 +1116,72 @@ void OrderedAssignmentRewriter::generateSaveEntity( } } +static bool rhsIsArray(hlfir::RegionAssignOp regionAssignOp) { + auto yieldOp = mlir::dyn_cast( + regionAssignOp.getRhsRegion().back().back()); + return yieldOp && hlfir::Entity{yieldOp.getEntity()}.isArray(); +} + +void OrderedAssignmentRewriter::saveLeftHandSide( + hlfir::SaveEntity savedEntity, hlfir::RegionAssignOp regionAssignOp) { + mlir::Region ®ion = *savedEntity.yieldRegion; + mlir::Location loc = region.getParentOp()->getLoc(); + LhsValueAndCleanUp loweredLhs = generateYieldedLHS(loc, region); + fir::factory::TemporaryStorage *temp = nullptr; + if (loweredLhs.vectorSubscriptLoopNest) + constructStack.push_back(loweredLhs.vectorSubscriptLoopNest->outerLoop); + if (loweredLhs.vectorSubscriptLoopNest && !rhsIsArray(regionAssignOp)) { + // Vector subscripted entity for which the shape must also be saved on top + // of the element addresses (e.g. the shape may change in each forall + // iteration and is needed to create the elemental loops). + mlir::Value shape = loweredLhs.vectorSubscriptShape.value(); + int rank = mlir::cast(shape.getType()).getRank(); + const bool shapeIsInvariant = + constructStack.empty() || + dominanceInfo.properlyDominates(shape, constructStack[0]); + doBeforeLoopNest([&] { + // Outside of any forall/where/elemental loops, create a temporary that + // will both be able to save the vector subscripted designator shape(s) + // and element addresses. + temp = + insertSavedEntity(region, fir::factory::AnyVectorSubscriptStack{ + loc, builder, loweredLhs.lhs.getType(), + shapeIsInvariant, rank}); + }); + // Save shape before the elemental loop nest created by the vector + // subscripted LHS. + auto &vectorTmp = temp->cast(); + auto insertionPoint = builder.saveInsertionPoint(); + builder.setInsertionPoint(loweredLhs.vectorSubscriptLoopNest->outerLoop); + vectorTmp.pushShape(loc, builder, shape); + builder.restoreInsertionPoint(insertionPoint); + } else { + // Otherwise, only save the LHS address. + // If the LHS address dominates the constructs, its SSA value can + // simply be tracked and there is no need to save the address in memory. + // Otherwise, the addresses are stored at each iteration in memory with + // a descriptor stack. + if (constructStack.empty() || + dominanceInfo.properlyDominates(loweredLhs.lhs, constructStack[0])) + doBeforeLoopNest([&] { + temp = insertSavedEntity(region, fir::factory::SSARegister{}); + }); + else + doBeforeLoopNest([&] { + temp = insertSavedEntity( + region, fir::factory::AnyVariableStack{loc, builder, + loweredLhs.lhs.getType()}); + }); + } + temp->pushValue(loc, builder, loweredLhs.lhs); + generateCleanupIfAny(loweredLhs.elementalCleanup); + if (loweredLhs.vectorSubscriptLoopNest) { + constructStack.pop_back(); + builder.setInsertionPointAfter( + loweredLhs.vectorSubscriptLoopNest->outerLoop); + } +} + /// Lower an ordered assignment tree to fir.do_loop and hlfir.assign given /// a schedule. static void lower(hlfir::OrderedAssignmentTreeOpInterface root, diff --git a/flang/test/HLFIR/order_assignments/lhs-conflicts-codegen.fir b/flang/test/HLFIR/order_assignments/lhs-conflicts-codegen.fir new file mode 100644 index 0000000000000..45ceb516a6863 --- /dev/null +++ b/flang/test/HLFIR/order_assignments/lhs-conflicts-codegen.fir @@ -0,0 +1,216 @@ +// Test code generation of hlfir.region_assign when the LHS computed +// address must be saved before the assignment is evaluated. Because +// the assignment would modify the LHS evaluation. +// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s + +// Test simplified IR for: +// +// x(x(1):x(2)) = l +// +// Verify that, although a conflict is detected, the LHS is not saved +// on a descriptor stack: it is already in a register that can be used +// since there is no forall. + +func.func @save_box_in_ssa_register(%arg0: !fir.box>, %arg1: !fir.box>>) { + %c2 = arith.constant 2 : index + %c1 = arith.constant 1 : index + %0:2 = hlfir.declare %arg1 {uniq_name = "l"} : (!fir.box>>) -> (!fir.box>>, !fir.box>>) + %1:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) + hlfir.region_assign { + hlfir.yield %0#0 : !fir.box>> + } to { + %2 = hlfir.designate %1#0 (%c1) : (!fir.box>, index) -> !fir.ref + %3 = fir.load %2 : !fir.ref + %4 = hlfir.designate %1#0 (%c2) : (!fir.box>, index) -> !fir.ref + %5 = fir.load %4 : !fir.ref + %6 = arith.subi %5, %3 : i64 + %7 = fir.convert %6 : (i64) -> index + %8 = fir.shape %7 : (index) -> !fir.shape<1> + %9 = hlfir.designate %1#0 (%3:%5:%c1) shape %8 : (!fir.box>, i64, i64, index, !fir.shape<1>) -> !fir.box> + hlfir.yield %9 : !fir.box> + } user_defined_assign (%arg2: !fir.ref>) to (%arg3: !fir.ref) { + %2 = fir.load %arg2 : !fir.ref> + fir.call @logical_to_real(%arg3, %2) : (!fir.ref, !fir.logical<4>) -> () + } + return +} +// CHECK-LABEL: func.func @save_box_in_ssa_register( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.box>>) { +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_18:.*]] = hlfir.designate %[[VAL_5]]#0 (%{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} : (!fir.box>, i64, i64, index, !fir.shape<1>) -> !fir.box> +// CHECK: fir.do_loop %[[VAL_20:.*]] = {{.*}} { +// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_18]] (%[[VAL_20]]) : (!fir.box>, index) -> !fir.ref +// CHECK: fir.call @logical_to_real(%[[VAL_21]], %{{.*}}) : (!fir.ref, !fir.logical<4>) -> () +// CHECK: } +// CHECK: return +// CHECK: } + +// Test simplified IR for: +// +// ! x = [0,1,2,4] -> [4,2,1,1] +// forall (i=1:3) x(x(i)+1:x(i+1)) = x(4-i) +// +// Verify that the LHS are all computed an saved on a stack before +// any assignment is made. +// +func.func @save_box_in_stack(%arg0: !fir.box>) { + %c1 = arith.constant 1 : index + %c1_i32 = arith.constant 1 : i32 + %c4_i64 = arith.constant 4 : i64 + %c3_i64 = arith.constant 3 : i64 + %c1_i64 = arith.constant 1 : i64 + %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) + hlfir.forall lb { + hlfir.yield %c1_i64 : i64 + } ub { + hlfir.yield %c3_i64 : i64 + } (%arg1: i64) { + hlfir.region_assign { + %1 = arith.subi %c4_i64, %arg1 : i64 + %2 = hlfir.designate %0#0 (%1) : (!fir.box>, i64) -> !fir.ref + %3 = fir.load %2 : !fir.ref + hlfir.yield %3 : i32 + } to { + %1 = hlfir.designate %0#0 (%arg1) : (!fir.box>, i64) -> !fir.ref + %2 = fir.load %1 : !fir.ref + %3 = arith.addi %2, %c1_i32 : i32 + %4 = arith.addi %arg1, %c1_i64 : i64 + %5 = hlfir.designate %0#0 (%4) : (!fir.box>, i64) -> !fir.ref + %6 = fir.load %5 : !fir.ref + %7 = arith.subi %6, %3 : i32 + %8 = fir.convert %7 : (i32) -> index + %9 = fir.shape %8 : (index) -> !fir.shape<1> + %10 = hlfir.designate %0#0 (%3:%6:%c1) shape %9 : (!fir.box>, i32, i32, index, !fir.shape<1>) -> !fir.box> + hlfir.yield %10 : !fir.box> + } + } + return +} +// CHECK-LABEL: func.func @save_box_in_stack( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>) { +// CHECK: %[[VAL_1:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_2:.*]] = fir.alloca i64 +// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_30:.*]] = fir.call @_FortranACreateDescriptorStack(%{{.*}}, %{{.*}}) : (!fir.ref, i32) -> !fir.llvm_ptr +// CHECK: fir.do_loop {{.*}} { +// CHECK: %[[VAL_48:.*]] = hlfir.designate %[[VAL_9]]#0 {{.*}} : (!fir.box>, i32, i32, index, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_49:.*]] = fir.convert %[[VAL_48]] : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_50:.*]] = fir.call @_FortranAPushDescriptor(%[[VAL_30]], %[[VAL_49]]) : (!fir.llvm_ptr, !fir.box) -> none +// CHECK: } +// CHECK: fir.store %{{.*}} to %[[VAL_2]] : !fir.ref +// CHECK: fir.do_loop {{.*}} { +// CHECK: %[[VAL_60:.*]] = fir.load %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_61:.*]] = arith.addi %[[VAL_60]], %{{.*}} : i64 +// CHECK: fir.store %[[VAL_61]] to %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_62:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[VAL_63:.*]] = fir.call @_FortranADescriptorAt(%[[VAL_30]], %[[VAL_60]], %[[VAL_62]]) : (!fir.llvm_ptr, i64, !fir.ref>) -> none +// CHECK: %[[VAL_64:.*]] = fir.load %[[VAL_1]] : !fir.ref>>> +// CHECK: %[[VAL_65:.*]] = fir.convert %[[VAL_64]] : (!fir.box>>) -> !fir.box> +// CHECK: hlfir.assign %{{.*}} to %[[VAL_65]] : i32, !fir.box> +// CHECK: } +// CHECK: fir.call @_FortranADestroyDescriptorStack(%[[VAL_30]]) : (!fir.llvm_ptr) -> none + +// Test simplified IR for: +// +// integer(8) :: x(*) +// forall (integer::i=1:10) x(x(foo(x, i):bar(x, i))) = x(11-i) +// +// The shape of the vector subscripted designator must be saved at each +// iteration. +// +func.func @test_vector_subscript_overlap(%arg0: !fir.ref>) { + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : index + %c11 = arith.constant 11 : index + %0 = fir.undefined index + %1 = fir.shape %0 : (index) -> !fir.shape<1> + %2:2 = hlfir.declare %arg0(%1) {uniq_name = "x"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) + hlfir.forall lb { + hlfir.yield %c1 : index + } ub { + hlfir.yield %c10 : index + } (%arg1: index) { + hlfir.region_assign { + %3 = arith.subi %c11, %arg1 : index + %4 = hlfir.designate %2#0 (%3) : (!fir.box>, index) -> !fir.ref + %5 = fir.load %4 : !fir.ref + hlfir.yield %5 : i64 + } to { + %3 = fir.call @foo(%2#1, %arg1) : (!fir.ref>, index) -> index + %4 = fir.call @bar(%2#1, %arg1) : (!fir.ref>, index) -> index + %5 = arith.subi %4, %3 : index + %6 = fir.shape %5 : (index) -> !fir.shape<1> + %7 = hlfir.designate %2#0 (%3:%4:%c1) shape %6 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.elemental_addr %6 : !fir.shape<1> { + ^bb0(%arg2: index): + %8 = hlfir.designate %7 (%arg2) : (!fir.box>, index) -> !fir.ref + %9 = fir.load %8 : !fir.ref + %10 = hlfir.designate %2#0 (%9) : (!fir.box>, i64) -> !fir.ref + hlfir.yield %10 : !fir.ref + } + } + } + return +} +// CHECK-LABEL: func.func @test_vector_subscript_overlap( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>) { +// CHECK: %[[VAL_1:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_2:.*]] = fir.alloca i64 +// CHECK: %[[VAL_3:.*]] = fir.alloca !fir.box> +// CHECK: %[[VAL_4:.*]] = fir.alloca i64 +// CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_0]](%{{.*}}) {uniq_name = "x"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +// CHECK: %[[VAL_30:.*]] = fir.call @_FortranACreateDescriptorStack(%{{.*}}, %{{.*}}) : (!fir.ref, i32) -> !fir.llvm_ptr +// CHECK: %[[VAL_37:.*]] = fir.call @_FortranACreateDescriptorStack(%{{.*}}, %{{.*}}) : (!fir.ref, i32) -> !fir.llvm_ptr +// CHECK: fir.do_loop {{.*}} { +// CHECK: %[[VAL_45:.*]] = fir.call @foo +// CHECK: %[[VAL_46:.*]] = fir.call @bar +// CHECK: %[[VAL_47:.*]] = arith.subi %[[VAL_46]], %[[VAL_45]] : index +// CHECK: %[[VAL_48:.*]] = fir.shape %[[VAL_47]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_51:.*]] = fir.zero_bits !fir.ref> +// CHECK: %[[VAL_52:.*]] = fir.embox %[[VAL_51]](%[[VAL_48]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_55:.*]] = fir.convert %[[VAL_52]] : (!fir.box>) -> !fir.box +// Save the vector subscripted designator shape. +// CHECK: %[[VAL_56:.*]] = fir.call @_FortranAPushDescriptor({{.*}}, {{.*}}) : (!fir.llvm_ptr, !fir.box) -> none +// CHECK: fir.do_loop {{.*}} { +// CHECK: %[[VAL_60:.*]] = hlfir.designate %[[VAL_11]]#0 (%{{.*}}) : (!fir.box>, i64) -> !fir.ref +// CHECK: %[[VAL_61:.*]] = fir.embox %[[VAL_60]] : (!fir.ref) -> !fir.box +// CHECK: %[[VAL_62:.*]] = fir.convert %[[VAL_61]] : (!fir.box) -> !fir.box +// Save the vector subscripted designator element address. +// CHECK: %[[VAL_63:.*]] = fir.call @_FortranAPushDescriptor(%[[VAL_30]], %[[VAL_62]]) : (!fir.llvm_ptr, !fir.box) -> none +// CHECK: } +// CHECK: } +// CHECK: fir.store %{{.*}} to %[[VAL_4]] : !fir.ref +// CHECK: fir.store %{{.*}} to %[[VAL_2]] : !fir.ref +// CHECK: fir.do_loop {{.*}} { +// CHECK: %[[VAL_69:.*]] = fir.load %{{.*}} : !fir.ref +// CHECK: %[[VAL_70:.*]] = fir.load %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_71:.*]] = arith.addi %[[VAL_70]], %{{.*}} : i64 +// CHECK: fir.store %[[VAL_71]] to %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_72:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>>>) -> !fir.ref> +// Fetch the vector subscripted designator shape to create the elemental loop. +// CHECK: %[[VAL_73:.*]] = fir.call @_FortranADescriptorAt(%[[VAL_37]], %[[VAL_70]], %[[VAL_72]]) : (!fir.llvm_ptr, i64, !fir.ref>) -> none +// CHECK: %[[VAL_74:.*]] = fir.load %[[VAL_1]] : !fir.ref>>> +// CHECK: %[[VAL_75:.*]] = fir.convert %[[VAL_74]] : (!fir.box>>) -> !fir.box> +// CHECK: %[[VAL_76:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_77:.*]]:3 = fir.box_dims %[[VAL_75]], %[[VAL_76]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_79:.*]] = arith.constant 1 : index +// CHECK: fir.do_loop %[[VAL_80:.*]] = %[[VAL_79]] to %[[VAL_77]]#1 step %[[VAL_79]] { +// CHECK: %[[VAL_81:.*]] = fir.load %[[VAL_4]] : !fir.ref +// CHECK: %[[VAL_82:.*]] = arith.addi %[[VAL_81]], %{{.*}} : i64 +// CHECK: fir.store %[[VAL_82]] to %[[VAL_4]] : !fir.ref +// CHECK: %[[VAL_83:.*]] = fir.convert %[[VAL_3]] : (!fir.ref>>) -> !fir.ref> +// Fetch the vector subscripted designator element address. +// CHECK: %[[VAL_84:.*]] = fir.call @_FortranADescriptorAt(%[[VAL_30]], %[[VAL_81]], %[[VAL_83]]) : (!fir.llvm_ptr, i64, !fir.ref>) -> none +// CHECK: %[[VAL_85:.*]] = fir.load %[[VAL_3]] : !fir.ref>> +// CHECK: %[[VAL_86:.*]] = fir.box_addr %[[VAL_85]] : (!fir.box>) -> !fir.ptr +// CHECK: %[[VAL_87:.*]] = fir.convert %[[VAL_86]] : (!fir.ptr) -> !fir.ref +// CHECK: hlfir.assign %{{.*}} to %[[VAL_87]] : i64, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_88:.*]] = fir.call @_FortranADestroyDescriptorStack(%[[VAL_30]]) : (!fir.llvm_ptr) -> none +// CHECK: %[[VAL_89:.*]] = fir.call @_FortranADestroyDescriptorStack(%[[VAL_37]]) : (!fir.llvm_ptr) -> none + +func.func private @integer_to_real(!fir.ref, !fir.logical<4>) +func.func private @foo(!fir.ref>, index) -> index +func.func private @bar(!fir.ref>, index) -> index