diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 8b1235b50cc6f..951050b135985 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -249,8 +249,11 @@ mlir::Value genVariableBoxChar(mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity var); /// Get or create a fir.box or fir.class from a variable. +/// A fir.box with different attributes that \p var can be created +/// using \p forceBoxType. hlfir::Entity genVariableBox(mlir::Location loc, fir::FirOpBuilder &builder, - hlfir::Entity var); + hlfir::Entity var, + fir::BaseBoxType forceBoxType = {}); /// If the entity is a variable, load its value (dereference pointers and /// allocatables if needed). Do nothing if the entity is already a value, and diff --git a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h index 5f2e1c4b510b0..b17a75354e7d1 100644 --- a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h +++ b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h @@ -179,6 +179,8 @@ class AnyValueStack { /// type. Fetching variable N will return a variable with the same address, /// dynamic type, bounds, and type parameters as the Nth variable that was /// pushed. It is implemented using runtime. +/// Note that this is not meant to save POINTER or ALLOCATABLE descriptor +/// addresses, use AnyDescriptorAddressStack instead. class AnyVariableStack { public: AnyVariableStack(mlir::Location loc, fir::FirOpBuilder &builder, @@ -203,6 +205,21 @@ class AnyVariableStack { mlir::Value retValueBox; }; +/// Data structure to stack descriptor addresses. It stores the descriptor +/// addresses as int_ptr values under the hood. +class AnyDescriptorAddressStack : public AnyValueStack { +public: + AnyDescriptorAddressStack(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Type descriptorAddressType); + + void pushValue(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value value); + mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder); + +private: + mlir::Type descriptorAddressType; +}; + class TemporaryStorage; /// Data structure to stack vector subscripted entity shape and @@ -264,7 +281,8 @@ class TemporaryStorage { private: std::variant + AnyVariableStack, AnyVectorSubscriptStack, + AnyDescriptorAddressStack> impl; }; } // namespace fir::factory diff --git a/flang/include/flang/Optimizer/Dialect/FIRType.h b/flang/include/flang/Optimizer/Dialect/FIRType.h index 1e637895d8e99..3d30f4e673682 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRType.h +++ b/flang/include/flang/Optimizer/Dialect/FIRType.h @@ -53,6 +53,9 @@ class BaseBoxType : public mlir::Type { /// Is this the box for an assumed rank? bool isAssumedRank() const; + /// Is this a box for a pointer? + bool isPointer() const; + /// Return the same type, except for the shape, that is taken the shape /// of shapeMold. BaseBoxType getBoxTypeWithNewShape(mlir::Type shapeMold) const; diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td index f4102538efc3c..c12066b1346f6 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -1377,7 +1377,7 @@ def hlfir_RegionAssignOp : hlfir_Op<"region_assign", [hlfir_OrderedAssignmentTre regions.push_back(&getUserDefinedAssignment()); } mlir::Region* getSubTreeRegion() { return nullptr; } - + bool isPointerAssignment(); }]; let hasCustomAssemblyFormat = 1; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index cc19f335cd017..4c6e47d250329 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -4355,6 +4355,62 @@ class FirConverter : public Fortran::lower::AbstractConverter { stmtCtx); } + void genForallPointerAssignment( + mlir::Location loc, const Fortran::evaluate::Assignment &assign, + const Fortran::evaluate::Assignment::BoundsSpec &lbExprs) { + if (Fortran::evaluate::IsProcedureDesignator(assign.rhs)) + TODO(loc, "procedure pointer assignment inside FORALL"); + std::optional lhsType = + assign.lhs.GetType(); + // Polymorphic pointer assignment is delegated to the runtime, and + // PointerAssociateLowerBounds needs the lower bounds as arguments, so they + // must be preserved. + if (lhsType && lhsType->IsPolymorphic()) + TODO(loc, "polymorphic pointer assignment in FORALL"); + // Nullification is special, there is no RHS that can be prepared, + // need to encode it in HLFIR. + if (Fortran::evaluate::UnwrapExpr( + assign.rhs)) + TODO(loc, "NULL pointer assignment in FORALL"); + // Lower bounds could be "applied" when preparing RHS, but in order + // to deal with the polymorphic case and to reuse existing pointer + // assignment helpers in HLFIR codegen, it is better to keep them + // separate. + if (!lbExprs.empty()) + TODO(loc, "Pointer assignment with new lower bounds inside FORALL"); + // Otherwise, this is a "dumb" pointer assignment that can be represented + // with hlfir.region_assign with descriptor address/value and later + // implemented with a store. + auto regionAssignOp = builder->create(loc); + + // Lower LHS in its own region. + builder->createBlock(®ionAssignOp.getLhsRegion()); + Fortran::lower::StatementContext lhsContext; + hlfir::Entity lhs = Fortran::lower::convertExprToHLFIR( + loc, *this, assign.lhs, localSymbols, lhsContext); + + auto lhsYieldOp = builder->create(loc, lhs); + Fortran::lower::genCleanUpInRegionIfAny( + loc, *builder, lhsYieldOp.getCleanup(), lhsContext); + + // Lower RHS in its own region. + builder->createBlock(®ionAssignOp.getRhsRegion()); + Fortran::lower::StatementContext rhsContext; + hlfir::Entity rhs = Fortran::lower::convertExprToHLFIR( + loc, *this, assign.rhs, localSymbols, rhsContext); + // Create pointer descriptor value from the RHS. + if (rhs.isMutableBox()) + rhs = hlfir::Entity{builder->create(loc, rhs)}; + auto lhsBoxType = + llvm::cast(fir::unwrapRefType(lhs.getType())); + mlir::Value newBox = hlfir::genVariableBox(loc, *builder, rhs, lhsBoxType); + auto rhsYieldOp = builder->create(loc, newBox); + Fortran::lower::genCleanUpInRegionIfAny( + loc, *builder, rhsYieldOp.getCleanup(), rhsContext); + + builder->setInsertionPointAfter(regionAssignOp); + } + // Create the 2 x newRank array with the bounds to be passed to the runtime as // a descriptor. mlir::Value createBoundArray(llvm::ArrayRef lbounds, @@ -4793,13 +4849,16 @@ class FirConverter : public Fortran::lower::AbstractConverter { }, [&](const Fortran::evaluate::Assignment::BoundsSpec &lbExprs) { if (isInsideHlfirForallOrWhere()) - TODO(loc, "pointer assignment inside FORALL"); - genPointerAssignment(loc, assign, lbExprs); + genForallPointerAssignment(loc, assign, lbExprs); + else + genPointerAssignment(loc, assign, lbExprs); }, [&](const Fortran::evaluate::Assignment::BoundsRemapping &boundExprs) { if (isInsideHlfirForallOrWhere()) - TODO(loc, "pointer assignment inside FORALL"); + TODO( + loc, + "pointer assignment with bounds remapping inside FORALL"); genPointerAssignment(loc, assign, boundExprs); }, }, diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8993065c2bb64..c5a7205afb796 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -349,26 +349,54 @@ mlir::Value hlfir::genVariableBoxChar(mlir::Location loc, lengths[0]); } +static hlfir::Entity changeBoxAttributes(mlir::Location loc, + fir::FirOpBuilder &builder, + hlfir::Entity var, + fir::BaseBoxType forceBoxType) { + assert(llvm::isa(var.getType()) && "expect box type"); + // Propagate lower bounds. + mlir::Value shift; + llvm::SmallVector lbounds = + getNonDefaultLowerBounds(loc, builder, var); + if (!lbounds.empty()) + shift = builder.genShift(loc, lbounds); + auto rebox = builder.create(loc, forceBoxType, var, shift, + /*slice=*/nullptr); + return hlfir::Entity{rebox}; +} + hlfir::Entity hlfir::genVariableBox(mlir::Location loc, fir::FirOpBuilder &builder, - hlfir::Entity var) { + hlfir::Entity var, + fir::BaseBoxType forceBoxType) { assert(var.isVariable() && "must be a variable"); var = hlfir::derefPointersAndAllocatables(loc, builder, var); - if (mlir::isa(var.getType())) - return var; + if (mlir::isa(var.getType())) { + if (!forceBoxType || forceBoxType == var.getType()) + return var; + return changeBoxAttributes(loc, builder, var, forceBoxType); + } // Note: if the var is not a fir.box/fir.class at that point, it has default // lower bounds and is not polymorphic. mlir::Value shape = var.isArray() ? hlfir::genShape(loc, builder, var) : mlir::Value{}; llvm::SmallVector typeParams; - auto maybeCharType = - mlir::dyn_cast(var.getFortranElementType()); + mlir::Type elementType = + forceBoxType ? fir::getFortranElementType(forceBoxType.getEleTy()) + : var.getFortranElementType(); + auto maybeCharType = mlir::dyn_cast(elementType); if (!maybeCharType || maybeCharType.hasDynamicLen()) hlfir::genLengthParameters(loc, builder, var, typeParams); mlir::Value addr = var.getBase(); if (mlir::isa(var.getType())) addr = genVariableRawAddress(loc, builder, var); mlir::Type boxType = fir::BoxType::get(var.getElementOrSequenceType()); + if (forceBoxType) { + boxType = forceBoxType; + mlir::Type baseType = + fir::ReferenceType::get(fir::unwrapRefType(forceBoxType.getEleTy())); + addr = builder.createConvert(loc, baseType, addr); + } auto embox = builder.create(loc, boxType, addr, shape, /*slice=*/mlir::Value{}, typeParams); diff --git a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp index 4c59574dd433a..48c2cb2181a0b 100644 --- a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp +++ b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp @@ -355,3 +355,27 @@ void fir::factory::AnyVectorSubscriptStack::destroy( static_cast(this)->destroy(loc, builder); shapeTemp->destroy(loc, builder); } + +//===----------------------------------------------------------------------===// +// fir::factory::AnyDescriptorAddressStack implementation. +//===----------------------------------------------------------------------===// + +fir::factory::AnyDescriptorAddressStack::AnyDescriptorAddressStack( + mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Type descriptorAddressType) + : AnyValueStack(loc, builder, builder.getIntPtrType()), + descriptorAddressType{descriptorAddressType} {} + +void fir::factory::AnyDescriptorAddressStack::pushValue( + mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value variable) { + mlir::Value cast = + builder.createConvert(loc, builder.getIntPtrType(), variable); + static_cast(this)->pushValue(loc, builder, cast); +} + +mlir::Value +fir::factory::AnyDescriptorAddressStack::fetch(mlir::Location loc, + fir::FirOpBuilder &builder) { + mlir::Value addr = static_cast(this)->fetch(loc, builder); + return builder.createConvert(loc, descriptorAddressType, addr); +} diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp index 719cb1b9d75aa..1277b50fa3b29 100644 --- a/flang/lib/Optimizer/Dialect/FIRType.cpp +++ b/flang/lib/Optimizer/Dialect/FIRType.cpp @@ -1358,6 +1358,10 @@ bool fir::BaseBoxType::isAssumedRank() const { return false; } +bool fir::BaseBoxType::isPointer() const { + return llvm::isa(getEleTy()); +} + //===----------------------------------------------------------------------===// // FIROpsDialect //===----------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp index 2fcfa1353f86b..383e6a2630537 100644 --- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp +++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp @@ -1891,6 +1891,20 @@ llvm::LogicalResult hlfir::RegionAssignOp::verify() { return mlir::success(); } +bool hlfir::RegionAssignOp::isPointerAssignment() { + if (!getUserDefinedAssignment().empty()) + return false; + hlfir::YieldOp yieldOp = + mlir::dyn_cast_or_null(getTerminator(getLhsRegion())); + if (!yieldOp) + return false; + mlir::Type lhsType = yieldOp.getEntity().getType(); + if (!hlfir::isBoxAddressType(lhsType)) + return false; + auto baseBoxType = llvm::cast(fir::unwrapRefType(lhsType)); + return baseBoxType.isPointer(); +} + //===----------------------------------------------------------------------===// // YieldOp //===----------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp index cba1bfc74e922..7561daefa3b83 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp @@ -224,6 +224,10 @@ class OrderedAssignmentRewriter { /// Save a value for subsequent runs. void generateSaveEntity(hlfir::SaveEntity savedEntity, bool willUseSavedEntityInSameRun); + /// Save a variable address instead of its value. + void saveNonVectorSubscriptedAddress(hlfir::SaveEntity savedEntity); + /// Save a LHS variable address instead of its value, handling the cases + /// where the LHS is vector subscripted. void saveLeftHandSide(hlfir::SaveEntity savedEntity, hlfir::RegionAssignOp regionAssignOp); @@ -444,7 +448,16 @@ convertToMoldType(mlir::Location loc, fir::FirOpBuilder &builder, void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) { mlir::Location loc = regionAssignOp.getLoc(); - std::optional elementalLoopNest; + if (regionAssignOp.isPointerAssignment()) { + auto [lhsValue, oldLhsYield] = + generateYieldedEntity(regionAssignOp.getLhsRegion()); + auto [rhsValue, oldRhsYield] = + generateYieldedEntity(regionAssignOp.getRhsRegion()); + builder.createStoreWithConvert(loc, rhsValue, lhsValue); + generateCleanupIfAny(oldLhsYield); + generateCleanupIfAny(oldRhsYield); + return; + } auto [rhsValue, oldRhsYield] = generateYieldedEntity(regionAssignOp.getRhsRegion()); hlfir::Entity rhsEntity{rhsValue}; @@ -1075,6 +1088,12 @@ getAssignIfLeftHandSideRegion(mlir::Region ®ion) { return nullptr; } +static bool isPointerAssignmentRHS(mlir::Region ®ion) { + auto assign = mlir::dyn_cast(region.getParentOp()); + return assign && assign.isPointerAssignment() && + (&assign.getRhsRegion() == ®ion); +} + bool OrderedAssignmentRewriter::currentLoopNestIterationNumberCanBeComputed( llvm::SmallVectorImpl &loopNest) { if (constructStack.empty()) @@ -1139,6 +1158,11 @@ void OrderedAssignmentRewriter::generateSaveEntity( "lhs cannot be used in the loop nest where it is saved"); return saveLeftHandSide(savedEntity, regionAssignOp); } + if (isPointerAssignmentRHS(region)) { + assert(!willUseSavedEntityInSameRun && + "rhs cannot be used in the loop nest where it is saved"); + return saveNonVectorSubscriptedAddress(savedEntity); + } mlir::Location loc = region.getParentOp()->getLoc(); // Evaluate the region inside the loop nest (if any). @@ -1230,14 +1254,56 @@ static bool rhsIsArray(hlfir::RegionAssignOp regionAssignOp) { return yieldOp && hlfir::Entity{yieldOp.getEntity()}.isArray(); } +static bool isVectorSubscripted(mlir::Region ®ion) { + return llvm::isa(region.back().back()); +} + +void OrderedAssignmentRewriter::saveNonVectorSubscriptedAddress( + hlfir::SaveEntity savedEntity) { + mlir::Region ®ion = *savedEntity.yieldRegion; + mlir::Location loc = region.getParentOp()->getLoc(); + assert(!isVectorSubscripted(region) && + "expected variable without vector subscripts"); + ValueAndCleanUp varAndCleanup = generateYieldedEntity(region); + hlfir::Entity var{varAndCleanup.first}; + fir::factory::TemporaryStorage *temp = nullptr; + // If the address dominates the constructs, its SSA value can simply be + // tracked and there is no need to save the address in memory. Otherwise, + // the addresses are stored at each iteration in memory with a descriptor + // stack. + if (constructStack.empty() || + dominanceInfo.properlyDominates(var, constructStack[0])) + doBeforeLoopNest( + [&] { temp = insertSavedEntity(region, fir::factory::SSARegister{}); }); + else + doBeforeLoopNest([&] { + if (var.isMutableBox()) + temp = + insertSavedEntity(region, fir::factory::AnyDescriptorAddressStack{ + loc, builder, var.getType()}); + else + temp = insertSavedEntity(region, fir::factory::AnyVariableStack{ + loc, builder, var.getType()}); + }); + temp->pushValue(loc, builder, var); + generateCleanupIfAny(varAndCleanup.second); +} + void OrderedAssignmentRewriter::saveLeftHandSide( hlfir::SaveEntity savedEntity, hlfir::RegionAssignOp regionAssignOp) { mlir::Region ®ion = *savedEntity.yieldRegion; + if (!isVectorSubscripted(region)) { + saveNonVectorSubscriptedAddress(savedEntity); + return; + } + // Save vector subscripted LHS address. mlir::Location loc = region.getParentOp()->getLoc(); LhsValueAndCleanUp loweredLhs = generateYieldedLHS(loc, region); - fir::factory::TemporaryStorage *temp = nullptr; + // loweredLhs.vectorSubscriptLoopNest is empty inside a WHERE because the + // WHERE loops are already indexing the vector subscripted designator. if (loweredLhs.vectorSubscriptLoopNest) constructStack.push_back(loweredLhs.vectorSubscriptLoopNest->outerOp); + fir::factory::TemporaryStorage *temp = nullptr; if (loweredLhs.vectorSubscriptLoopNest && !rhsIsArray(regionAssignOp)) { // Vector subscripted entity for which the shape must also be saved on top // of the element addresses (e.g. the shape may change in each forall @@ -1264,22 +1330,15 @@ void OrderedAssignmentRewriter::saveLeftHandSide( vectorTmp.pushShape(loc, builder, shape); builder.restoreInsertionPoint(insertionPoint); } else { - // Otherwise, only save the LHS address. - // If the LHS address dominates the constructs, its SSA value can - // simply be tracked and there is no need to save the address in memory. - // Otherwise, the addresses are stored at each iteration in memory with - // a descriptor stack. - if (constructStack.empty() || - dominanceInfo.properlyDominates(loweredLhs.lhs, constructStack[0])) - doBeforeLoopNest([&] { - temp = insertSavedEntity(region, fir::factory::SSARegister{}); - }); - else - doBeforeLoopNest([&] { - temp = insertSavedEntity( - region, fir::factory::AnyVariableStack{loc, builder, - loweredLhs.lhs.getType()}); - }); + // Only saving the scalar elements addresses. These addresses computation + // depend on the inner loop indices generated for the vector subscripts + // (no need to wast time checking dominance) and can only be save in a + // variable stack so far. + doBeforeLoopNest([&] { + temp = insertSavedEntity( + region, fir::factory::AnyVariableStack{loc, builder, + loweredLhs.lhs.getType()}); + }); } temp->pushValue(loc, builder, loweredLhs.lhs); generateCleanupIfAny(loweredLhs.elementalCleanup); diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp index 5971b5b9d76a0..722cd8a4488b1 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp @@ -377,7 +377,7 @@ void Scheduler::startSchedulingAssignment(hlfir::RegionAssignOp assign, // Unconditionally collect effects of the evaluations of LHS and RHS // in case they need to be analyzed for any parent that might be // affected by conflicts of these evaluations. - // This collection migth be skipped, if there are no such parents, + // This collection might be skipped, if there are no such parents, // but for the time being we run it always. gatherAssignEvaluationEffects(assign, leafRegionsMayOnlyRead, assignEvaluateEffects); @@ -597,9 +597,12 @@ hlfir::buildEvaluationSchedule(hlfir::OrderedAssignmentTreeOpInterface root, // Look for conflicts between the RHS/LHS evaluation and the assignments. // The LHS yield has no implicit read effect on the produced variable (the // variable is not read before the assignment). + // During pointer assignments, the RHS data is not read, only the address + // is taken. scheduler.startIndependentEvaluationGroup(); - scheduler.saveEvaluationIfConflict(assign.getRhsRegion(), - leafRegionsMayOnlyRead); + scheduler.saveEvaluationIfConflict( + assign.getRhsRegion(), leafRegionsMayOnlyRead, + /*yieldIsImplicitRead=*/!assign.isPointerAssignment()); // There is no point to save the LHS outside of Forall and assignment to a // vector subscripted LHS because the LHS is already fully evaluated and // saved in the resulting SSA address value (that may be a descriptor or diff --git a/flang/test/HLFIR/order_assignments/forall-pointer-assignment-codegen.fir b/flang/test/HLFIR/order_assignments/forall-pointer-assignment-codegen.fir new file mode 100644 index 0000000000000..1d198765aff9e --- /dev/null +++ b/flang/test/HLFIR/order_assignments/forall-pointer-assignment-codegen.fir @@ -0,0 +1,200 @@ +// Test code generation of hlfir.region_assign representing pointer +// assignments inside FORALL. + +// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s + +!t = !fir.type +!ptr_wrapper = !fir.type>>}> + +func.func @test_no_conflict(%n: i64, %arg1: !fir.box>, %arg2: !fir.ref ) { + %c1 = arith.constant 1 : i64 + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "a"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %3:2 = hlfir.declare %arg2 dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "somet"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + hlfir.forall lb { + hlfir.yield %c1 : i64 + } ub { + hlfir.yield %n : i64 + } (%arg3: i64) { + hlfir.region_assign { + %5 = fir.embox %3#0 : (!fir.ref) -> !fir.box> + hlfir.yield %5 : !fir.box> + } to { + %6 = hlfir.designate %1#0 (%arg3) : (!fir.box>, i64) -> !fir.ref + %7 = hlfir.designate %6{"p"} {fortran_attrs = #fir.var_attrs} : (!fir.ref) -> !fir.ref>> + hlfir.yield %7 : !fir.ref>> + } + } + return +} +// CHECK-LABEL: func.func @test_no_conflict( +// CHECK-SAME: %[[VAL_0:.*]]: i64, +// CHECK: %[[VAL_3:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare{{.*}}"a" +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare{{.*}}"somet" +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_3]] : (i64) -> index +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_0]] : (i64) -> index +// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index +// CHECK: fir.do_loop %[[VAL_10:.*]] = %[[VAL_7]] to %[[VAL_8]] step %[[VAL_9]] { +// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (index) -> i64 +// CHECK: %[[VAL_12:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_11]]) : (!fir.box>>}>>>, i64) -> !fir.ref>>}>> +// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_12]]{"p"} {fortran_attrs = #fir.var_attrs} : (!fir.ref>>}>>) -> !fir.ref>>> +// CHECK: %[[VAL_14:.*]] = fir.embox %[[VAL_6]]#0 : (!fir.ref>) -> !fir.box>> +// CHECK: fir.store %[[VAL_14]] to %[[VAL_13]] : !fir.ref>>> +// CHECK: } +// CHECK: return +// CHECK: } + +func.func @test_need_to_save_rhs(%n: i64, %arg1: !fir.box> ) { + %c1 = arith.constant 1 : i64 + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "a"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + hlfir.forall lb { + hlfir.yield %c1 : i64 + } ub { + hlfir.yield %n : i64 + } (%arg2: i64) { + hlfir.region_assign { + %5 = arith.addi %n, %c1: i64 + %6 = arith.subi %5, %arg2 : i64 + %8 = hlfir.designate %1#0 (%6) : (!fir.box>, i64) -> !fir.ref + %9 = hlfir.designate %8{"p"} {fortran_attrs = #fir.var_attrs} : (!fir.ref) -> !fir.ref>> + %10 = fir.load %9 : !fir.ref>> + hlfir.yield %10 : !fir.box> + } to { + %5 = hlfir.designate %1#0 (%arg2) : (!fir.box>, i64) -> !fir.ref + %6 = hlfir.designate %5{"p"} {fortran_attrs = #fir.var_attrs} : (!fir.ref) -> !fir.ref>> + hlfir.yield %6 : !fir.ref>> + } + } + return +} +// CHECK-LABEL: func.func @test_need_to_save_rhs( +// CHECK-SAME: %[[VAL_0:.*]]: i64, +// CHECK: %[[VAL_2:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_3:.*]] = fir.alloca i64 +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare{{.*}}"a" +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_4]] : (i64) -> index +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_0]] : (i64) -> index +// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_10:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_11:.*]] = arith.constant 1 : i64 +// CHECK: fir.store %[[VAL_10]] to %[[VAL_3]] : !fir.ref +// CHECK: %[[VAL_16:.*]] = fir.call @_FortranACreateDescriptorStack( +// CHECK: fir.do_loop %[[VAL_17:.*]] = %[[VAL_7]] to %[[VAL_8]] step %[[VAL_9]] { +// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (index) -> i64 +// CHECK: %[[VAL_19:.*]] = arith.addi %[[VAL_0]], %[[VAL_4]] : i64 +// CHECK: %[[VAL_20:.*]] = arith.subi %[[VAL_19]], %[[VAL_18]] : i64 +// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_20]]) : (!fir.box>>}>>>, i64) -> !fir.ref>>}>> +// CHECK: %[[VAL_22:.*]] = hlfir.designate %[[VAL_21]]{"p"} {fortran_attrs = #fir.var_attrs} : (!fir.ref>>}>>) -> !fir.ref>>> +// CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref>>> +// CHECK: %[[VAL_24:.*]] = fir.box_addr %[[VAL_23]] : (!fir.box>>) -> !fir.ptr> +// CHECK: %[[VAL_25:.*]] = fir.embox %[[VAL_24]] : (!fir.ptr>) -> !fir.box> +// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (!fir.box>) -> !fir.box +// CHECK: fir.call @_FortranAPushDescriptor(%[[VAL_16]], %[[VAL_26]]) : (!fir.llvm_ptr, !fir.box) -> () +// CHECK: } +// CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_4]] : (i64) -> index +// CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_0]] : (i64) -> index +// CHECK: %[[VAL_29:.*]] = arith.constant 1 : index +// CHECK: fir.store %[[VAL_10]] to %[[VAL_3]] : !fir.ref +// CHECK: fir.do_loop %[[VAL_30:.*]] = %[[VAL_27]] to %[[VAL_28]] step %[[VAL_29]] { +// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (index) -> i64 +// CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_31]]) : (!fir.box>>}>>>, i64) -> !fir.ref>>}>> +// CHECK: %[[VAL_33:.*]] = hlfir.designate %[[VAL_32]]{"p"} {fortran_attrs = #fir.var_attrs} : (!fir.ref>>}>>) -> !fir.ref>>> +// CHECK: %[[VAL_34:.*]] = fir.load %[[VAL_3]] : !fir.ref +// CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_34]], %[[VAL_11]] : i64 +// CHECK: fir.store %[[VAL_35]] to %[[VAL_3]] : !fir.ref +// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: fir.call @_FortranADescriptorAt(%[[VAL_16]], %[[VAL_34]], %[[VAL_36]]) : (!fir.llvm_ptr, i64, !fir.ref>) -> () +// CHECK: %[[VAL_37:.*]] = fir.load %[[VAL_2]] : !fir.ref>>> +// CHECK: fir.store %[[VAL_37]] to %[[VAL_33]] : !fir.ref>>> +// CHECK: } +// CHECK: fir.call @_FortranADestroyDescriptorStack(%[[VAL_16]]) : (!fir.llvm_ptr) -> () +// CHECK: return +// CHECK: } + +func.func @test_need_to_save_lhs(%n: i64, %arg1: !fir.box>, %arg2: !fir.ref ) { + %c1 = arith.constant 1 : i64 + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "a"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %3:2 = hlfir.declare %arg2 dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "somet"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + hlfir.forall lb { + hlfir.yield %c1 : i64 + } ub { + hlfir.yield %n : i64 + } (%arg3: i64) { + hlfir.region_assign { + %5 = fir.embox %3#0 : (!fir.ref) -> !fir.box> + hlfir.yield %5 : !fir.box> + } to { + %6 = arith.addi %n, %c1 : i64 + %7 = arith.subi %6, %arg3 : i64 + %9 = hlfir.designate %1#0 (%7) : (!fir.box>, i64) -> !fir.ref + %10 = hlfir.designate %9{"p"} {fortran_attrs = #fir.var_attrs} : (!fir.ref) -> !fir.ref>> + %11 = fir.load %10 : !fir.ref>> + %13 = hlfir.designate %11{"i"} : (!fir.box>) -> !fir.ref + %14 = fir.load %13 : !fir.ref + %16 = hlfir.designate %1#0 (%14) : (!fir.box>, i64) -> !fir.ref + %17 = hlfir.designate %16{"p"} {fortran_attrs = #fir.var_attrs} : (!fir.ref) -> !fir.ref>> + hlfir.yield %17 : !fir.ref>> + } + } + return +} +// CHECK-LABEL: func.func @test_need_to_save_lhs( +// CHECK-SAME: %[[VAL_0:.*]]: i64, +// CHECK: %[[VAL_3:.*]] = fir.alloca i64 +// CHECK: %[[VAL_4:.*]] = fir.alloca !fir.box> +// CHECK: %[[VAL_5:.*]] = fir.alloca i64 +// CHECK: %[[VAL_6:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare{{.*}}"a" +// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare{{.*}}"somet" +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_6]] : (i64) -> index +// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_0]] : (i64) -> index +// CHECK: %[[VAL_12:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_13:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_14:.*]] = arith.constant 1 : i64 +// CHECK: fir.store %[[VAL_13]] to %[[VAL_5]] : !fir.ref +// CHECK: %[[VAL_19:.*]] = fir.call @_FortranACreateValueStack( +// CHECK: fir.do_loop %[[VAL_20:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_12]] { +// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (index) -> i64 +// CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_0]], %[[VAL_6]] : i64 +// CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_22]], %[[VAL_21]] : i64 +// CHECK: %[[VAL_24:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_23]]) : (!fir.box>>}>>>, i64) -> !fir.ref>>}>> +// CHECK: %[[VAL_25:.*]] = hlfir.designate %[[VAL_24]]{"p"} {fortran_attrs = #fir.var_attrs} : (!fir.ref>>}>>) -> !fir.ref>>> +// CHECK: %[[VAL_26:.*]] = fir.load %[[VAL_25]] : !fir.ref>>> +// CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_26]]{"i"} : (!fir.box>>) -> !fir.ref +// CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref +// CHECK: %[[VAL_29:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_28]]) : (!fir.box>>}>>>, i64) -> !fir.ref>>}>> +// CHECK: %[[VAL_30:.*]] = hlfir.designate %[[VAL_29]]{"p"} {fortran_attrs = #fir.var_attrs} : (!fir.ref>>}>>) -> !fir.ref>>> +// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (!fir.ref>>>) -> i64 +// CHECK: fir.store %[[VAL_31]] to %[[VAL_3]] : !fir.ref +// CHECK: %[[VAL_32:.*]] = fir.embox %[[VAL_3]] : (!fir.ref) -> !fir.box +// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (!fir.box) -> !fir.box +// CHECK: fir.call @_FortranAPushValue(%[[VAL_19]], %[[VAL_33]]) : (!fir.llvm_ptr, !fir.box) -> () +// CHECK: } +// CHECK: %[[VAL_34:.*]] = fir.convert %[[VAL_6]] : (i64) -> index +// CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_0]] : (i64) -> index +// CHECK: %[[VAL_36:.*]] = arith.constant 1 : index +// CHECK: fir.store %[[VAL_13]] to %[[VAL_5]] : !fir.ref +// CHECK: fir.do_loop %[[VAL_37:.*]] = %[[VAL_34]] to %[[VAL_35]] step %[[VAL_36]] { +// CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (index) -> i64 +// CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_5]] : !fir.ref +// CHECK: %[[VAL_40:.*]] = arith.addi %[[VAL_39]], %[[VAL_14]] : i64 +// CHECK: fir.store %[[VAL_40]] to %[[VAL_5]] : !fir.ref +// CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_4]] : (!fir.ref>>) -> !fir.ref> +// CHECK: fir.call @_FortranAValueAt(%[[VAL_19]], %[[VAL_39]], %[[VAL_41]]) : (!fir.llvm_ptr, i64, !fir.ref>) -> () +// CHECK: %[[VAL_42:.*]] = fir.load %[[VAL_4]] : !fir.ref>> +// CHECK: %[[VAL_43:.*]] = fir.box_addr %[[VAL_42]] : (!fir.box>) -> !fir.heap +// CHECK: %[[VAL_44:.*]] = fir.load %[[VAL_43]] : !fir.heap +// CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i64) -> !fir.ref>>> +// CHECK: %[[VAL_46:.*]] = fir.embox %[[VAL_9]]#0 : (!fir.ref>) -> !fir.box>> +// CHECK: fir.store %[[VAL_46]] to %[[VAL_45]] : !fir.ref>>> +// CHECK: } +// CHECK: fir.call @_FortranADestroyValueStack(%[[VAL_19]]) : (!fir.llvm_ptr) -> () +// CHECK: return +// CHECK: } diff --git a/flang/test/HLFIR/order_assignments/forall-pointer-assignment-scheduling.f90 b/flang/test/HLFIR/order_assignments/forall-pointer-assignment-scheduling.f90 new file mode 100644 index 0000000000000..52a0105ce2b6a --- /dev/null +++ b/flang/test/HLFIR/order_assignments/forall-pointer-assignment-scheduling.f90 @@ -0,0 +1,111 @@ +! Test analysis of pointer assignment inside FORALL. +! The analysis must detect if the evaluation of the LHS or RHS may be impacted +! by the pointer assignments, or if the forall can be lowered into a single +! loop without any temporary copy. + +! RUN: bbc -hlfir -o /dev/null -pass-pipeline="builtin.module(lower-hlfir-ordered-assignments)" \ +! RUN: --debug-only=flang-ordered-assignment -flang-dbg-order-assignment-schedule-only %s 2>&1 | FileCheck %s +! REQUIRES: asserts +module forall_pointers + type t + integer :: i + end type + type ptr_wrapper + type(t), pointer :: p + end type +contains + +! Simple case that can be lowered into a single loop. +subroutine test_no_conflict(n, a, somet) + integer :: n + type(ptr_wrapper), allocatable :: a(:) + type(t), target :: somet + forall(i=1:n) a(i)%p => somet +end subroutine +! CHECK: ------------ scheduling forall in _QMforall_pointersPtest_no_conflict ------------ +! CHECK-NEXT: run 1 evaluate: forall/region_assign1 + +! Case where the pointer target evaluations are impacted by the pointer +! assignments and should be evaluated for each iteration before doing +! any pointer assignment. +! The test is transposing an array of (wrapped) pointers. +subroutine test_need_to_save_rhs(n, a) + integer :: n + type(ptr_wrapper) :: a(:) + forall(i=1:n) a(i)%p => a(n+1-i)%p +end subroutine +! CHECK: ------------ scheduling forall in _QMforall_pointersPtest_need_to_save_rhs ------------ +! CHECK-NEXT: conflict: R/W +! CHECK-NEXT: run 1 save : forall/region_assign1/rhs +! CHECK-NEXT: run 2 evaluate: forall/region_assign1 + +! Case where the pointer descriptor address evaluations are impacted by the +! assignments and should be evaluated for each iteration before doing +! any pointer assignment. +subroutine test_need_to_save_lhs(n, a, somet) + integer :: n + type(ptr_wrapper) :: a(:) + type(t), target :: somet + forall(i=1:n) a(a(n+1-i)%p%i)%p => somet +end subroutine +! CHECK: ------------ scheduling forall in _QMforall_pointersPtest_need_to_save_lhs ------------ +! CHECK-NEXT: conflict: R/W +! CHECK-NEXT: run 1 save : forall/region_assign1/lhs +! CHECK-NEXT: run 2 evaluate: forall/region_assign1 + +! Case where both the computation of the target and descriptor addresses are +! impacted by the assignment and need to be all evaluated before doing any +! assignment. +subroutine test_need_to_save_lhs_and_rhs(n, a) + integer :: n + type(ptr_wrapper) :: a(:) + forall(i=1:n) a(a(n+1-i)%p%i)%p => a(modulo(-2*i, n+1))%p +end subroutine +! CHECK: ------------ scheduling forall in _QMforall_pointersPtest_need_to_save_lhs_and_rhs ------------ +! CHECK-NEXT: conflict: R/W +! CHECK-NEXT: run 1 save : forall/region_assign1/rhs +! CHECK-NEXT: conflict: R/W +! CHECK-NEXT: run 1 save : forall/region_assign1/lhs +! CHECK-NEXT: run 2 evaluate: forall/region_assign1 +end module + +! End to end test provided for debugging purpose (not run by lit). +program end_to_end + use forall_pointers + integer, parameter :: n = 10 + type(t), target, save :: data(n) = [(t(i), i=1,n)] + type(ptr_wrapper) :: pointers(n) + ! Print pointer/target mapping baseline. + ! Expect: 10 9 8 7 6 5 4 3 2 1 + call reset_pointers(pointers) + call print_pointers(pointers) + + ! Test case where RHS target addresses must be saved in FORALL. + ! Expect: 1 2 3 4 5 6 7 8 9 10 + call test_need_to_save_rhs(n, pointers) + call print_pointers(pointers) + + ! Test case where LHS pointer addresses must be saved in FORALL. + ! Expect: 1 1 1 1 1 1 1 1 1 1 + call reset_pointers(pointers) + call test_need_to_save_lhs(n, pointers, data(1)) + call print_pointers(pointers) + + ! Test case where bot RHS target addresses and LHS pointer addresses must be + ! saved in FORALL. + ! Expect: 2 4 6 8 10 1 3 5 7 9 + call reset_pointers(pointers) + call test_need_to_save_lhs_and_rhs(n, pointers) + call print_pointers(pointers) +contains +subroutine reset_pointers(a) + type(ptr_wrapper) :: a(:) + do i=1,n + a(i)%p => data(n+1-i) + end do +end subroutine +subroutine print_pointers(a) + type(ptr_wrapper) :: a(:) + print *, [(a(i)%p%i, i=lbound(a,1), ubound(a,1))] +end subroutine +end diff --git a/flang/test/HLFIR/order_assignments/vector-subscripts-codegen.fir b/flang/test/HLFIR/order_assignments/vector-subscripts-codegen.fir index c75daf4f69cff..da356b39251d1 100644 --- a/flang/test/HLFIR/order_assignments/vector-subscripts-codegen.fir +++ b/flang/test/HLFIR/order_assignments/vector-subscripts-codegen.fir @@ -212,3 +212,50 @@ func.func @unordered(%arg0: !fir.ref> , %arg1: !fir.ref}> +func.func @_QPwhere_vec_subscripts(%arg0: !fir.ref>>, %arg1: !fir.box>) { + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFwhere_vec_subscriptsEa"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %c4 = arith.constant 4 : index + %2 = fir.shape %c4 : (index) -> !fir.shape<1> + %3:2 = hlfir.declare %arg0(%2) dummy_scope %0 {uniq_name = "_QFwhere_vec_subscriptsEmask"} : (!fir.ref>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref>>, !fir.ref>>) + hlfir.where { + hlfir.yield %3#0 : !fir.ref>> + } do { + hlfir.region_assign { + %cst = arith.constant 0.000000e+00 : f32 + hlfir.yield %cst : f32 + } to { + %c1 = arith.constant 1 : index + %4 = hlfir.designate %1#0 (%c1) : (!fir.box>, index) -> !fir.ref + %6 = hlfir.designate %4{"vec"} shape %2 : (!fir.ref, !fir.shape<1>) -> !fir.ref> + hlfir.elemental_addr %2 unordered : !fir.shape<1> { + ^bb0(%arg2: index): + %8 = hlfir.designate %6 (%arg2) : (!fir.ref>, index) -> !fir.ref + %9 = fir.load %8 : !fir.ref + %10 = hlfir.designate %1#0 (%9) : (!fir.box>, i64) -> !fir.ref + %11 = hlfir.designate %10{"x"} : (!fir.ref) -> !fir.ref + hlfir.yield %11 : !fir.ref + } + } + } + return +} +// CHECK-LABEL: func.func @_QPwhere_vec_subscripts( +// CHECK: %[[VAL_16:.*]] = fir.call @_FortranACreateDescriptorStack( +// CHECK: fir.do_loop {{.*}} +// CHECK: fir.if %{{.*}} { +// CHECK: fir.call @_FortranAPushDescriptor( +// CHECK: } +// CHECK: } +// CHECK: fir.do_loop {{.*}} +// CHECK: fir.if %{{.*}} { +// CHECK: fir.call @_FortranADescriptorAt( +// CHECK: hlfir.assign +// CHECK: } +// CHECK: } +// CHECK: fir.call @_FortranADestroyDescriptorStack( +// CHECK: return +// CHECK: }