-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang][hlfir] Shallow copy elemental results with allocatable components. #68040
Conversation
…ents. To avoid the overhead of deallocating allocatable components of the elemental temporary result on every iteration of the elemental operation, we can use a shallow copy instead of deep-copy assign.
@llvm/pr-subscribers-flang-fir-hlfir ChangesTo avoid the overhead of deallocating allocatable components of the elemental Full diff: https://github.com/llvm/llvm-project/pull/68040.diff 2 Files Affected:
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index 3ddaf1f2af8fddb..3da8666d7c53f70 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -791,26 +791,35 @@ struct ElementalOpConversion
// Assign the element value to the temp element for this iteration.
auto tempElement =
hlfir::getElementAt(loc, builder, temp, loopNest.oneBasedIndices);
- // FIXME: if the elemental result is a function result temporary
- // of a derived type, we have to make sure that we are either
- // deallocate any allocatable/automatic components after the assignment
- // or that we do not do the deep copy with the AssignOp. The latter
- // seems to be preferrable, because the deep copy is more expensive.
- // The shallow copy may be done with a load/store of the RecordType scalar.
- builder.create<hlfir::AssignOp>(loc, elementValue, tempElement,
- /*realloc=*/false,
- /*keep_lhs_length_if_realloc=*/false,
- /*temporary_lhs=*/true);
- // hlfir.yield_element implicitly marks the end-of-life its operand if
- // it is an expression created in the hlfir.elemental (since it is its
- // last use and an hlfir.destroy could not be created afterwards)
- // Now that this node has been removed and the expression has been used in
- // the assign, insert an hlfir.destroy to mark the expression end-of-life.
- // If the expression creation allocated a buffer on the heap inside the
- // loop, this will ensure the buffer properly deallocated.
- if (elementValue.getType().isa<hlfir::ExprType>() &&
- wasCreatedInCurrentBlock(elementValue, builder))
- builder.create<hlfir::DestroyOp>(loc, elementValue);
+ // If the elemental result is a temporary of a derived type,
+ // we can avoid the deep copy implied by the AssignOp and just
+ // do the shallow copy with load/store. This helps avoiding the overhead
+ // of deallocating allocatable components of the temporary (if any)
+ // on each iteration of the elemental operation.
+ auto asExpr = elementValue.getDefiningOp<hlfir::AsExprOp>();
+ auto elemType = hlfir::getFortranElementType(elementValue.getType());
+ if (asExpr && asExpr.isMove() && mlir::isa<fir::RecordType>(elemType) &&
+ hlfir::mayHaveAllocatableComponent(elemType) &&
+ wasCreatedInCurrentBlock(elementValue, builder)) {
+ auto load = builder.create<fir::LoadOp>(loc, asExpr.getVar());
+ builder.create<fir::StoreOp>(loc, load, tempElement);
+ } else {
+ builder.create<hlfir::AssignOp>(loc, elementValue, tempElement,
+ /*realloc=*/false,
+ /*keep_lhs_length_if_realloc=*/false,
+ /*temporary_lhs=*/true);
+
+ // hlfir.yield_element implicitly marks the end-of-life its operand if
+ // it is an expression created in the hlfir.elemental (since it is its
+ // last use and an hlfir.destroy could not be created afterwards)
+ // Now that this node has been removed and the expression has been used in
+ // the assign, insert an hlfir.destroy to mark the expression end-of-life.
+ // If the expression creation allocated a buffer on the heap inside the
+ // loop, this will ensure the buffer properly deallocated.
+ if (elementValue.getType().isa<hlfir::ExprType>() &&
+ wasCreatedInCurrentBlock(elementValue, builder))
+ builder.create<hlfir::DestroyOp>(loc, elementValue);
+ }
builder.restoreInsertionPoint(insPt);
mlir::Value bufferizedExpr =
diff --git a/flang/test/HLFIR/elemental-shallow-copy.fir b/flang/test/HLFIR/elemental-shallow-copy.fir
new file mode 100644
index 000000000000000..c57a2766e318dee
--- /dev/null
+++ b/flang/test/HLFIR/elemental-shallow-copy.fir
@@ -0,0 +1,31 @@
+// Check that an elemental result of a derived type with an allocatable
+// component is shallow-copied into the array result.
+// RUN: fir-opt %s --bufferize-hlfir | FileCheck %s
+
+func.func @_QMtypesPtest() {
+ %false = arith.constant false
+ %c1 = arith.constant 1 : index
+ %0 = fir.alloca !fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}> {bindc_name = ".result"}
+ %11 = fir.shape %c1 : (index) -> !fir.shape<1>
+ %18 = fir.alloca !fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {bindc_name = "y", uniq_name = "_QMtypesFtestEy"}
+ %19:2 = hlfir.declare %18(%11) {uniq_name = "_QMtypesFtestEy"} : (!fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>)
+ %23 = hlfir.elemental %11 : (!fir.shape<1>) -> !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {
+ ^bb0(%arg0: index):
+ %26:2 = hlfir.declare %0 {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>)
+ %27 = hlfir.as_expr %26#0 move %false : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, i1) -> !hlfir.expr<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+ hlfir.yield_element %27 : !hlfir.expr<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+ }
+ hlfir.assign %23 to %19#0 : !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>
+ hlfir.destroy %23 : !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+ return
+}
+// CHECK-LABEL: func.func @_QMtypesPtest() {
+// CHECK: %[[VAL_2:.*]] = fir.alloca !fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}> {bindc_name = ".result"}
+// CHECK: %[[VAL_6:.*]] = fir.allocmem !fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {bindc_name = ".tmp.array", uniq_name = ""}
+// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%{{.*}}) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.shape<1>) -> (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>)
+// CHECK: fir.do_loop %[[VAL_10:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>)
+// CHECK: %[[VAL_15:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_10]]) : (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, index) -> !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+// CHECK: fir.store %[[VAL_16]] to %[[VAL_15]] : !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+// CHECK: }
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, looks great
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good to me, thanks!
To avoid the overhead of deallocating allocatable components of the elemental
temporary result on every iteration of the elemental operation, we can use
a shallow copy instead of deep-copy assign.