diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp index 3ddaf1f2af8fd..3da8666d7c53f 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp @@ -791,26 +791,35 @@ struct ElementalOpConversion // Assign the element value to the temp element for this iteration. auto tempElement = hlfir::getElementAt(loc, builder, temp, loopNest.oneBasedIndices); - // FIXME: if the elemental result is a function result temporary - // of a derived type, we have to make sure that we are either - // deallocate any allocatable/automatic components after the assignment - // or that we do not do the deep copy with the AssignOp. The latter - // seems to be preferrable, because the deep copy is more expensive. - // The shallow copy may be done with a load/store of the RecordType scalar. - builder.create(loc, elementValue, tempElement, - /*realloc=*/false, - /*keep_lhs_length_if_realloc=*/false, - /*temporary_lhs=*/true); - // hlfir.yield_element implicitly marks the end-of-life its operand if - // it is an expression created in the hlfir.elemental (since it is its - // last use and an hlfir.destroy could not be created afterwards) - // Now that this node has been removed and the expression has been used in - // the assign, insert an hlfir.destroy to mark the expression end-of-life. - // If the expression creation allocated a buffer on the heap inside the - // loop, this will ensure the buffer properly deallocated. - if (elementValue.getType().isa() && - wasCreatedInCurrentBlock(elementValue, builder)) - builder.create(loc, elementValue); + // If the elemental result is a temporary of a derived type, + // we can avoid the deep copy implied by the AssignOp and just + // do the shallow copy with load/store. This helps avoiding the overhead + // of deallocating allocatable components of the temporary (if any) + // on each iteration of the elemental operation. + auto asExpr = elementValue.getDefiningOp(); + auto elemType = hlfir::getFortranElementType(elementValue.getType()); + if (asExpr && asExpr.isMove() && mlir::isa(elemType) && + hlfir::mayHaveAllocatableComponent(elemType) && + wasCreatedInCurrentBlock(elementValue, builder)) { + auto load = builder.create(loc, asExpr.getVar()); + builder.create(loc, load, tempElement); + } else { + builder.create(loc, elementValue, tempElement, + /*realloc=*/false, + /*keep_lhs_length_if_realloc=*/false, + /*temporary_lhs=*/true); + + // hlfir.yield_element implicitly marks the end-of-life its operand if + // it is an expression created in the hlfir.elemental (since it is its + // last use and an hlfir.destroy could not be created afterwards) + // Now that this node has been removed and the expression has been used in + // the assign, insert an hlfir.destroy to mark the expression end-of-life. + // If the expression creation allocated a buffer on the heap inside the + // loop, this will ensure the buffer properly deallocated. + if (elementValue.getType().isa() && + wasCreatedInCurrentBlock(elementValue, builder)) + builder.create(loc, elementValue); + } builder.restoreInsertionPoint(insPt); mlir::Value bufferizedExpr = diff --git a/flang/test/HLFIR/elemental-shallow-copy.fir b/flang/test/HLFIR/elemental-shallow-copy.fir new file mode 100644 index 0000000000000..c57a2766e318d --- /dev/null +++ b/flang/test/HLFIR/elemental-shallow-copy.fir @@ -0,0 +1,31 @@ +// Check that an elemental result of a derived type with an allocatable +// component is shallow-copied into the array result. +// RUN: fir-opt %s --bufferize-hlfir | FileCheck %s + +func.func @_QMtypesPtest() { + %false = arith.constant false + %c1 = arith.constant 1 : index + %0 = fir.alloca !fir.type<_QMtypesTt{x:!fir.box>}> {bindc_name = ".result"} + %11 = fir.shape %c1 : (index) -> !fir.shape<1> + %18 = fir.alloca !fir.array<1x!fir.type<_QMtypesTt{x:!fir.box>}>> {bindc_name = "y", uniq_name = "_QMtypesFtestEy"} + %19:2 = hlfir.declare %18(%11) {uniq_name = "_QMtypesFtestEy"} : (!fir.ref>}>>>, !fir.shape<1>) -> (!fir.ref>}>>>, !fir.ref>}>>>) + %23 = hlfir.elemental %11 : (!fir.shape<1>) -> !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box>}>> { + ^bb0(%arg0: index): + %26:2 = hlfir.declare %0 {uniq_name = ".tmp.func_result"} : (!fir.ref>}>>) -> (!fir.ref>}>>, !fir.ref>}>>) + %27 = hlfir.as_expr %26#0 move %false : (!fir.ref>}>>, i1) -> !hlfir.expr>}>> + hlfir.yield_element %27 : !hlfir.expr>}>> + } + hlfir.assign %23 to %19#0 : !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box>}>>, !fir.ref>}>>> + hlfir.destroy %23 : !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box>}>> + return +} +// CHECK-LABEL: func.func @_QMtypesPtest() { +// CHECK: %[[VAL_2:.*]] = fir.alloca !fir.type<_QMtypesTt{x:!fir.box>}> {bindc_name = ".result"} +// CHECK: %[[VAL_6:.*]] = fir.allocmem !fir.array<1x!fir.type<_QMtypesTt{x:!fir.box>}>> {bindc_name = ".tmp.array", uniq_name = ""} +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%{{.*}}) {uniq_name = ".tmp.array"} : (!fir.heap>}>>>, !fir.shape<1>) -> (!fir.heap>}>>>, !fir.heap>}>>>) +// CHECK: fir.do_loop %[[VAL_10:.*]] = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = ".tmp.func_result"} : (!fir.ref>}>>) -> (!fir.ref>}>>, !fir.ref>}>>) +// CHECK: %[[VAL_15:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_10]]) : (!fir.heap>}>>>, index) -> !fir.ref>}>> +// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref>}>> +// CHECK: fir.store %[[VAL_16]] to %[[VAL_15]] : !fir.ref>}>> +// CHECK: }