diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 393e70f772e5e..8cf838ea1926f 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -418,6 +418,10 @@ hlfir::ElementalOp cloneToElementalOp(mlir::Location loc, /// would be incorrect. bool elementalOpMustProduceTemp(hlfir::ElementalOp elemental); +std::pair +createTempFromMold(mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::Entity mold); + } // namespace hlfir #endif // FORTRAN_OPTIMIZER_BUILDER_HLFIRTOOLS_H diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 93b06e77e458e..e4cbd7c27d3ea 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -22,6 +22,7 @@ #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/Complex.h" #include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" #include "flang/Optimizer/Builder/IntrinsicCall.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Parser/parse-tree.h" @@ -704,6 +705,9 @@ static mlir::Value getReductionInitValue(fir::FirOpBuilder &builder, if (auto seqTy = mlir::dyn_cast(ty)) return getReductionInitValue(builder, loc, seqTy.getEleTy(), op); + if (auto boxTy = mlir::dyn_cast(ty)) + return getReductionInitValue(builder, loc, boxTy.getEleTy(), op); + llvm::report_fatal_error("Unsupported OpenACC reduction type"); } @@ -749,6 +753,14 @@ static mlir::Value genReductionInitRegion(fir::FirOpBuilder &builder, builder.setInsertionPointAfter(loops[0]); return declareOp.getBase(); } + } else if (auto boxTy = mlir::dyn_cast_or_null(ty)) { + if (!mlir::isa(boxTy.getEleTy())) + TODO(loc, "Unsupported boxed type for reduction"); + // Create the private copy from the initial fir.box. + hlfir::Entity source = hlfir::Entity{builder.getBlock()->getArgument(0)}; + auto [temp, cleanup] = hlfir::createTempFromMold(loc, builder, source); + builder.create(loc, initValue, temp); + return temp; } llvm::report_fatal_error("Unsupported OpenACC reduction type"); } @@ -850,8 +862,8 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, ty = fir::unwrapRefType(ty); if (auto seqTy = mlir::dyn_cast(ty)) { - if (seqTy.hasDynamicExtents()) - TODO(loc, "OpenACC reduction on array with dynamic extents"); + assert(!seqTy.hasDynamicExtents() && + "Assumed shaped array should be boxed for reduction"); mlir::Type idxTy = builder.getIndexType(); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); @@ -875,6 +887,29 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, genScalarCombiner(builder, loc, op, seqTy.getEleTy(), load1, load2); builder.create(loc, res, addr1); builder.setInsertionPointAfter(loops[0]); + } else if (auto boxTy = mlir::dyn_cast(ty)) { + fir::SequenceType seqTy = + mlir::dyn_cast_or_null(boxTy.getEleTy()); + if (!seqTy) + TODO(loc, "Unsupported boxed type in OpenACC reduction"); + hlfir::Entity left = hlfir::Entity{value1}; + hlfir::Entity right = hlfir::Entity{value2}; + auto shape = hlfir::genShape(loc, builder, left); + llvm::SmallVector typeParams; + auto genKernel = [&builder, &loc, op, seqTy, &left, &right]( + mlir::Location l, fir::FirOpBuilder &b, + mlir::ValueRange oneBasedIndices) -> hlfir::Entity { + auto leftElement = hlfir::getElementAt(l, b, left, oneBasedIndices); + auto rightElement = hlfir::getElementAt(l, b, right, oneBasedIndices); + auto leftVal = hlfir::loadTrivialScalar(l, b, leftElement); + auto rightVal = hlfir::loadTrivialScalar(l, b, rightElement); + return hlfir::Entity{genScalarCombiner(builder, loc, op, seqTy.getEleTy(), + leftVal, rightVal)}; + }; + mlir::Value elemental = hlfir::genElementalOp( + loc, builder, seqTy.getEleTy(), shape, typeParams, genKernel, + /*isUnordered=*/true); + builder.create(loc, elemental, value1); } else { mlir::Value res = genScalarCombiner(builder, loc, op, ty, value1, value2); builder.create(loc, res, value1); @@ -932,9 +967,6 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList, mlir::acc::DataBoundsOp>(converter, builder, semanticsContext, stmtCtx, accObject, operandLocation, asFortran, bounds); - if (hasDynamicShape(bounds)) - TODO(operandLocation, "OpenACC reductions with dynamic shaped array"); - mlir::Type reductionTy = fir::unwrapRefType(baseAddr.getType()); if (auto seqTy = mlir::dyn_cast(reductionTy)) reductionTy = seqTy.getEleTy(); @@ -953,6 +985,8 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList, std::string recipeName = fir::getTypeAsString( ty, converter.getKindMap(), ("reduction_" + stringifyReductionOperator(mlirOp)).str()); + if (hasDynamicShape(bounds)) + ty = baseAddr.getType(); mlir::acc::ReductionRecipeOp recipe = Fortran::lower::createOrGetReductionRecipe( builder, recipeName, operandLocation, ty, mlirOp, bounds); diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 7034d6e893e7e..cc4bdf356ae9b 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -14,6 +14,7 @@ #include "flang/Optimizer/Builder/Character.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/MutableBox.h" +#include "flang/Optimizer/Builder/Runtime/Allocatable.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "mlir/IR/IRMapping.h" @@ -1030,3 +1031,64 @@ bool hlfir::elementalOpMustProduceTemp(hlfir::ElementalOp elemental) { return false; } + +std::pair +hlfir::createTempFromMold(mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::Entity mold) { + llvm::SmallVector lenParams; + hlfir::genLengthParameters(loc, builder, mold, lenParams); + llvm::StringRef tmpName{".tmp"}; + mlir::Value alloc; + mlir::Value isHeapAlloc; + mlir::Value shape{}; + fir::FortranVariableFlagsAttr declAttrs; + + if (mold.isPolymorphic()) { + // Create unallocated polymorphic temporary using the dynamic type + // of the mold. The static type of the temporary matches + // the static type of the mold, but then the dynamic type + // of the mold is applied to the temporary's descriptor. + + if (mold.isArray()) + hlfir::genShape(loc, builder, mold); + + // Create polymorphic allocatable box on the stack. + mlir::Type boxHeapType = fir::HeapType::get(fir::unwrapRefType( + mlir::cast(mold.getType()).getEleTy())); + // The box must be initialized, because AllocatableApplyMold + // may read its contents (e.g. for checking whether it is allocated). + alloc = fir::factory::genNullBoxStorage(builder, loc, + fir::ClassType::get(boxHeapType)); + // The temporary is unallocated even after AllocatableApplyMold below. + // If the temporary is used as assignment LHS it will be automatically + // allocated on the heap, as long as we use Assign family + // runtime functions. So set MustFree to true. + isHeapAlloc = builder.createBool(loc, true); + declAttrs = fir::FortranVariableFlagsAttr::get( + builder.getContext(), fir::FortranVariableFlagsEnum::allocatable); + } else if (mold.isArray()) { + mlir::Type sequenceType = + hlfir::getFortranElementOrSequenceType(mold.getType()); + shape = hlfir::genShape(loc, builder, mold); + auto extents = hlfir::getIndexExtents(loc, builder, shape); + alloc = builder.createHeapTemporary(loc, sequenceType, tmpName, extents, + lenParams); + isHeapAlloc = builder.createBool(loc, true); + } else { + alloc = builder.createTemporary(loc, mold.getFortranElementType(), tmpName, + /*shape=*/std::nullopt, lenParams); + isHeapAlloc = builder.createBool(loc, false); + } + auto declareOp = builder.create(loc, alloc, tmpName, shape, + lenParams, declAttrs); + if (mold.isPolymorphic()) { + int rank = mold.getRank(); + // TODO: should probably read rank from the mold. + if (rank < 0) + TODO(loc, "create temporary for assumed rank polymorphic"); + fir::runtime::genAllocatableApplyMold(builder, loc, alloc, + mold.getFirBase(), rank); + } + + return {hlfir::Entity{declareOp.getBase()}, isHeapAlloc}; +} diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp index e852a1887c8bf..3ddaf1f2af8fd 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp @@ -101,67 +101,6 @@ static mlir::Value getBufferizedExprMustFreeFlag(mlir::Value bufferizedExpr) { TODO(bufferizedExpr.getLoc(), "general extract storage case"); } -static std::pair -createTempFromMold(mlir::Location loc, fir::FirOpBuilder &builder, - hlfir::Entity mold) { - llvm::SmallVector lenParams; - hlfir::genLengthParameters(loc, builder, mold, lenParams); - llvm::StringRef tmpName{".tmp"}; - mlir::Value alloc; - mlir::Value isHeapAlloc; - mlir::Value shape{}; - fir::FortranVariableFlagsAttr declAttrs; - - if (mold.isPolymorphic()) { - // Create unallocated polymorphic temporary using the dynamic type - // of the mold. The static type of the temporary matches - // the static type of the mold, but then the dynamic type - // of the mold is applied to the temporary's descriptor. - - if (mold.isArray()) - hlfir::genShape(loc, builder, mold); - - // Create polymorphic allocatable box on the stack. - mlir::Type boxHeapType = fir::HeapType::get(fir::unwrapRefType( - mlir::cast(mold.getType()).getEleTy())); - // The box must be initialized, because AllocatableApplyMold - // may read its contents (e.g. for checking whether it is allocated). - alloc = fir::factory::genNullBoxStorage(builder, loc, - fir::ClassType::get(boxHeapType)); - // The temporary is unallocated even after AllocatableApplyMold below. - // If the temporary is used as assignment LHS it will be automatically - // allocated on the heap, as long as we use Assign family - // runtime functions. So set MustFree to true. - isHeapAlloc = builder.createBool(loc, true); - declAttrs = fir::FortranVariableFlagsAttr::get( - builder.getContext(), fir::FortranVariableFlagsEnum::allocatable); - } else if (mold.isArray()) { - mlir::Type sequenceType = - hlfir::getFortranElementOrSequenceType(mold.getType()); - shape = hlfir::genShape(loc, builder, mold); - auto extents = hlfir::getIndexExtents(loc, builder, shape); - alloc = builder.createHeapTemporary(loc, sequenceType, tmpName, extents, - lenParams); - isHeapAlloc = builder.createBool(loc, true); - } else { - alloc = builder.createTemporary(loc, mold.getFortranElementType(), tmpName, - /*shape=*/std::nullopt, lenParams); - isHeapAlloc = builder.createBool(loc, false); - } - auto declareOp = builder.create(loc, alloc, tmpName, shape, - lenParams, declAttrs); - if (mold.isPolymorphic()) { - int rank = mold.getRank(); - // TODO: should probably read rank from the mold. - if (rank < 0) - TODO(loc, "create temporary for assumed rank polymorphic"); - fir::runtime::genAllocatableApplyMold(builder, loc, alloc, - mold.getFirBase(), rank); - } - - return {hlfir::Entity{declareOp.getBase()}, isHeapAlloc}; -} - static std::pair createArrayTemp(mlir::Location loc, fir::FirOpBuilder &builder, mlir::Type exprType, mlir::Value shape, @@ -239,7 +178,7 @@ struct AsExprOpConversion : public mlir::OpConversionPattern { } // Otherwise, create a copy in a new buffer. hlfir::Entity source = hlfir::Entity{adaptor.getVar()}; - auto [temp, cleanup] = createTempFromMold(loc, builder, source); + auto [temp, cleanup] = hlfir::createTempFromMold(loc, builder, source); builder.create(loc, source, temp, temp.isAllocatable(), /*keep_lhs_length_if_realloc=*/false, /*temporary_lhs=*/true); @@ -596,7 +535,7 @@ struct AssociateOpConversion // non-trivial value with more than one use. We will have to make a copy and // use that hlfir::Entity source = hlfir::Entity{bufferizedExpr}; - auto [temp, cleanup] = createTempFromMold(loc, builder, source); + auto [temp, cleanup] = hlfir::createTempFromMold(loc, builder, source); builder.create(loc, source, temp, temp.isAllocatable(), /*keep_lhs_length_if_realloc=*/false, /*temporary_lhs=*/true); diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90 index 86e99a48c2e61..3cd152de6232a 100644 --- a/flang/test/Lower/OpenACC/acc-reduction.f90 +++ b/flang/test/Lower/OpenACC/acc-reduction.f90 @@ -3,6 +3,61 @@ ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s --check-prefixes=CHECK,FIR ! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s --check-prefixes=CHECK,HLFIR +! CHECK-LABEL: acc.reduction.recipe @"reduction_max_ref_?xf32" : !fir.box> reduction_operator init { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box>): +! CHECK: %[[INIT_VALUE:.*]] = arith.constant -1.401300e-45 : f32 +! HLFIR: %[[C0:.*]] = arith.constant 0 : index +! HLFIR: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[ARG0]], %[[C0]] : (!fir.box>, index) -> (index, index, index) +! HLFIR: %[[SHAPE:.*]] = fir.shape %[[BOX_DIMS]]#1 : (index) -> !fir.shape<1> +! HLFIR: %[[TEMP:.*]] = fir.allocmem !fir.array, %0#1 {bindc_name = ".tmp", uniq_name = ""} +! HLFIR: %[[DECLARE:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) +! HLFIR: hlfir.assign %[[INIT_VALUE]] to %[[DECLARE]]#0 : f32, !fir.box> +! HLFIR: acc.yield %[[DECLARE]]#0 : !fir.box> +! CHECK: } combiner { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box>, %[[ARG1:.*]]: !fir.box>): +! HLFIR: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[ARG0]], %{{.*}} : (!fir.box>, index) -> (index, index, index) +! HLFIR: %[[SHAPE:.*]] = fir.shape %[[BOX_DIMS]]#1 : (index) -> !fir.shape<1> +! HLFIR: %[[ELEMENTAL:.*]] = hlfir.elemental %[[SHAPE]] unordered : (!fir.shape<1>) -> !hlfir.expr { +! HLFIR: ^bb0(%arg2: index): +! HLFIR: %[[DES_V1:.*]] = hlfir.designate %[[ARG0]] (%{{.*}}) : (!fir.box>, index) -> !fir.ref +! HLFIR: %[[DES_V2:.*]] = hlfir.designate %[[ARG1]] (%{{.*}}) : (!fir.box>, index) -> !fir.ref +! HLFIR: %[[LOAD_V1:.*]] = fir.load %[[DES_V1]] : !fir.ref +! HLFIR: %[[LOAD_V2:.*]] = fir.load %[[DES_V2]] : !fir.ref +! HLFIR: %[[CMPF:.*]] = arith.cmpf ogt, %[[LOAD_V1]], %[[LOAD_V2]] : f32 +! HLFIR: %[[SELECT:.*]] = arith.select %[[CMPF]], %[[LOAD_V1]], %[[LOAD_V2]] : f32 +! HLFIR: hlfir.yield_element %[[SELECT]] : f32 +! HLFIR: } +! HLFIR: hlfir.assign %[[ELEMENTAL]] to %[[ARG0]] : !hlfir.expr, !fir.box> +! CHECK: acc.yield %[[ARG0]] : !fir.box> +! CHECK: } + +! CHECK-LABEL: acc.reduction.recipe @"reduction_add_ref_?xi32" : !fir.box> reduction_operator init { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box>): +! HLFIR: %[[INIT_VALUE:.*]] = arith.constant 0 : i32 +! HLFIR: %[[C0:.*]] = arith.constant 0 : index +! HLFIR: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[ARG0]], %[[C0]] : (!fir.box>, index) -> (index, index, index) +! HLFIR: %[[SHAPE:.*]] = fir.shape %[[BOX_DIMS]]#1 : (index) -> !fir.shape<1> +! HLFIR: %[[TEMP:.*]] = fir.allocmem !fir.array, %[[BOX_DIMS]]#1 {bindc_name = ".tmp", uniq_name = ""} +! HLFIR: %[[DECLARE:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) +! HLFIR: hlfir.assign %[[INIT_VALUE]] to %[[DECLARE]]#0 : i32, !fir.box> +! HLFIR: acc.yield %[[DECLARE]]#0 : !fir.box> +! CHECK: } combiner { +! CHECK: ^bb0(%[[V1:.*]]: !fir.box>, %[[V2:.*]]: !fir.box>): +! HLFIR: %[[BOX_DIMS]]:3 = fir.box_dims %[[V1]], %{{.*}} : (!fir.box>, index) -> (index, index, index) +! HLFIR: %[[SHAPE:.*]] = fir.shape %[[BOX_DIMS]]#1 : (index) -> !fir.shape<1> +! HLFIR: %[[ELEMENTAL:.*]] = hlfir.elemental %[[SHAPE]] unordered : (!fir.shape<1>) -> !hlfir.expr { +! HLFIR: ^bb0(%{{.*}}: index): +! HLFIR: %[[DES_V1:.*]] = hlfir.designate %[[V1]] (%{{.*}}) : (!fir.box>, index) -> !fir.ref +! HLFIR: %[[DES_V2:.*]] = hlfir.designate %[[V2]] (%{{.*}}) : (!fir.box>, index) -> !fir.ref +! HLFIR: %[[LOAD_V1:.*]] = fir.load %[[DES_V1]] : !fir.ref +! HLFIR: %[[LOAD_V2:.*]] = fir.load %[[DES_V2]] : !fir.ref +! HLFIR: %[[COMBINED:.*]] = arith.addi %[[LOAD_V1]], %[[LOAD_V2]] : i32 +! HLFIR: hlfir.yield_element %[[COMBINED]] : i32 +! HLFIR: } +! HLFIR: hlfir.assign %[[ELEMENTAL]] to %[[V1]] : !hlfir.expr, !fir.box> +! CHECK: acc.yield %arg0 : !fir.box> +! CHECK: } + ! CHECK-LABEL: acc.reduction.recipe @reduction_mul_ref_z32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[REAL:.*]] = arith.constant 1.000000e+00 : f32 @@ -1005,3 +1060,27 @@ subroutine acc_reduction_add_static_slice(a) ! FIR: %[[RED:.*]] = acc.reduction varPtr(%[[ARG0]] : !fir.ref>) bounds(%[[BOUND]]) -> !fir.ref> {name = "a(11:20)"} ! HLFIR: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#1 : !fir.ref>) bounds(%[[BOUND]]) -> !fir.ref> {name = "a(11:20)"} ! CHECK: acc.parallel reduction(@reduction_add_ref_100xi32 -> %[[RED]] : !fir.ref>) + +subroutine acc_reduction_add_dynamic_extent_add(a) + integer :: a(:) + !$acc parallel reduction(+:a) + !$acc end parallel +end subroutine + +! CHECK-LABEL: func.func @_QPacc_reduction_add_dynamic_extent_add( +! CHECK-SAME: %[[ARG0:.*]]: !fir.box> {fir.bindc_name = "a"}) +! HLFIR: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]] +! HLFIR: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) bounds(%{{.*}}) -> !fir.ref> {name = "a"} +! HLFIR: acc.parallel reduction(@"reduction_add_ref_?xi32" -> %[[RED:.*]] : !fir.ref>) + +subroutine acc_reduction_add_dynamic_extent_max(a) + real :: a(:) + !$acc parallel reduction(max:a) + !$acc end parallel +end subroutine + +! CHECK-LABEL: func.func @_QPacc_reduction_add_dynamic_extent_max( +! CHECK-SAME: %[[ARG0:.*]]: !fir.box> {fir.bindc_name = "a"}) +! HLFIR: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]] +! HLFIR: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) bounds(%{{.*}}) -> !fir.ref> {name = "a"} +! HLFIR: acc.parallel reduction(@"reduction_max_ref_?xf32" -> %[[RED]] : !fir.ref>) {