diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td index 1fa4090a0e133..b8c502d12e1f6 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td @@ -327,6 +327,12 @@ def TensorCopyInsertion : Pass<"tensor-copy-insertion"> { were decided to bufferize out-of-place. After running this pass, a bufferization can write to buffers directly (without making copies) and no longer has to care about potential read-after-write conflicts. + + Note: By default, all newly inserted tensor copies/allocs (i.e., newly + created `bufferization.alloc_tensor` ops) that do not escape block are + annotated with `escape = false`. If `create-allocs` is unset, all newly + inserted tensor copies/allocs are annotated with `escape = true`. In that + case, they are not getting deallocated when bufferizing the IR. }]; let options = [ Option<"allowReturnAllocs", "allow-return-allocs", "bool", @@ -335,6 +341,8 @@ def TensorCopyInsertion : Pass<"tensor-copy-insertion"> { Option<"bufferizeFunctionBoundaries", "bufferize-function-boundaries", "bool", /*default=*/"0", "Bufferize function boundaries (experimental).">, + Option<"createDeallocs", "create-deallocs", "bool", /*default=*/"true", + "Specify if new allocations should be deallocated.">, ]; let constructor = "mlir::bufferization::createTensorCopyInsertionPass()"; } diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp index 975b97086f907..467c0188783fe 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -75,8 +75,10 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts( Operation *op = getOperation(); SmallVector outOfPlaceOpOperands; DenseSet copiedOpOperands; + DenseSet escapingOpOperandCopies; SmallVector outOfPlaceOpResults; DenseSet copiedOpResults; + DenseSet escapingOpResultCopies; // Find all out-of-place OpOperands. for (OpOperand &opOperand : op->getOpOperands()) { @@ -90,6 +92,14 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts( SmallVector aliasingOpResults = state.getAliasingOpResult(opOperand); + // Is the result yielded from a block? Or are deallocations turned off + // entirely? In either case, mark the allocation as "escaping", so that it + // will not be deallocated. + bool escape = !state.getOptions().createDeallocs || + llvm::any_of(aliasingOpResults, [&](Value v) { + return state.isTensorYielded(v); + }); + if (aliasingOpResults.size() == 1 && !state.bufferizesToMemoryWrite(opOperand) && state.getAliasingOpOperand(aliasingOpResults.front()).size() == 1) { @@ -100,23 +110,24 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts( outOfPlaceOpResults.push_back(aliasingOpResults.front()); if (!state.canOmitTensorCopy(opOperand)) copiedOpResults.insert(aliasingOpResults.front()); + if (escape) + escapingOpResultCopies.insert(aliasingOpResults.front()); } else { // In all other cases, make a copy of the OpOperand. outOfPlaceOpOperands.push_back(&opOperand); if (!state.canOmitTensorCopy(opOperand)) copiedOpOperands.insert(&opOperand); + if (escape) + escapingOpOperandCopies.insert(&opOperand); } } // Insert copies of OpOperands. rewriter.setInsertionPoint(op); for (OpOperand *opOperand : outOfPlaceOpOperands) { - SmallVector aliasingOpResults = - state.getAliasingOpResult(*opOperand); - bool escape = llvm::any_of( - aliasingOpResults, [&](Value v) { return state.isTensorYielded(v); }); Value copy = allocateTensorForShapedValue( - rewriter, op->getLoc(), opOperand->get(), escape, + rewriter, op->getLoc(), opOperand->get(), + escapingOpOperandCopies.contains(opOperand), copiedOpOperands.contains(opOperand)); rewriter.updateRootInPlace(op, [&]() { opOperand->set(copy); }); } @@ -124,9 +135,9 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts( // Insert copies of OpResults. rewriter.setInsertionPointAfter(op); for (OpResult opResult : outOfPlaceOpResults) { - bool escape = state.isTensorYielded(opResult); Value copy = - allocateTensorForShapedValue(rewriter, op->getLoc(), opResult, escape, + allocateTensorForShapedValue(rewriter, op->getLoc(), opResult, + escapingOpResultCopies.contains(opResult), copiedOpResults.count(opResult)); SmallVector uses = llvm::to_vector(llvm::map_range( opResult.getUses(), [](OpOperand &use) { return &use; })); @@ -392,7 +403,45 @@ bool AnalysisState::hasUndefinedContents(OpOperand *opOperand) const { bool AnalysisState::isTensorYielded(Value tensor) const { // In the absence of analysis information, the conservative answer is "true". - return true; + if (!tensor.getDefiningOp()) + return true; + + // For AllocTensorOp results, we can do better: They do not alias with any + // preceding value, so we can follow SSA use-def chains and do a simple + // analysis. + SmallVector worklist; + for (OpOperand &use : tensor.getUses()) + worklist.push_back(&use); + + while (!worklist.empty()) { + OpOperand *operand = worklist.pop_back_val(); + Operation *op = operand->getOwner(); + + // If the op is not bufferizable, we can safely assume that the value is not + // yielded. (When bufferizing that op, it must handle such cases.) + if (!options.dynCastBufferizableOp(op)) + continue; + + // We cannot analyze through ToMemrefOps, so we have to conservatively + // assume that the value is yielded. + if (isa(op)) + return true; + + // Check if the op is returning/yielding. + if (isRegionReturnLike(op)) + return true; + + // Add all aliasing OpResults to the worklist. + // Note: In the absence of detailed analysis information (e.g., there may be + // no function call analysis information), this `getAliasingOpResult` is + // conservative and may report additional OpResults as potentially aliasing. + for (OpResult opResult : getAliasingOpResult(*operand)) + for (OpOperand &use : opResult.getUses()) + worklist.push_back(&use); + } + + // No ReturnLike op found: The value is not yielded. + return false; } // bufferization.to_memref is not allowed to change the rank. diff --git a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp index e04f1e386ee91..21d93dec3b0d9 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp @@ -54,7 +54,8 @@ mlir::bufferization::insertTensorCopies(Operation *op, if (auto allocTensorOp = dyn_cast(op)) { if (allocTensorOp.escape()) return WalkResult::advance(); - bool escape = state.isTensorYielded(allocTensorOp.result()); + bool escape = !state.getOptions().createDeallocs || + state.isTensorYielded(allocTensorOp.result()); allocTensorOp.escapeAttr(rewriter.getBoolAttr(escape)); return WalkResult::advance(); } @@ -92,6 +93,7 @@ struct TensorCopyInsertionPass OneShotBufferizationOptions options; options.allowReturnAllocs = allowReturnAllocs; options.bufferizeFunctionBoundaries = bufferizeFunctionBoundaries; + options.createDeallocs = createDeallocs; if (failed(insertTensorCopies(getOperation(), options))) signalPassFailure(); } diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir index cb6977c013a4f..c36a0a69ca65a 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir @@ -1,14 +1,17 @@ // RUN: mlir-opt %s -tensor-copy-insertion -split-input-file | FileCheck %s // RUN: mlir-opt %s -tensor-copy-insertion="bufferize-function-boundaries allow-return-allocs" -split-input-file | FileCheck %s --check-prefix=CHECK-FUNC +// RUN: mlir-opt %s -tensor-copy-insertion="create-deallocs=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-DEALLOC // CHECK-LABEL: func @read_after_write_conflict( // CHECK-SAME: %[[t:.*]]: tensor // CHECK-FUNC-LABEL: func @read_after_write_conflict( +// CHECK-NO-DEALLOC-LABEL: func @read_after_write_conflict( func.func @read_after_write_conflict(%t: tensor, %idx: index, %f: f32) -> (tensor, tensor) { // CHECK: %[[copy:.*]] = bufferization.alloc_tensor() copy(%[[t]]) {escape = false} : tensor // CHECK-FUNC: bufferization.alloc_tensor() copy(%{{.*}}) {escape = true} : tensor + // CHECK-NO-DEALLOC: bufferization.alloc_tensor() copy(%{{.*}}) {escape = true} : tensor // CHECK: %[[insert:.*]] = tensor.insert %{{.*}} into %[[copy]] %0 = tensor.insert %f into %t[%idx] : tensor // CHECK: return %[[insert]], %[[t]] @@ -19,9 +22,11 @@ func.func @read_after_write_conflict(%t: tensor, %idx: index, %f: f32) // CHECK-LABEL: func @return_alloc_tensor // CHECK-FUNC-LABEL: func @return_alloc_tensor +// CHECK-NO-DEALLOC-LABEL: func @return_alloc_tensor func.func @return_alloc_tensor() -> (tensor<5xf32>) { // CHECK: bufferization.alloc_tensor() {escape = false} : tensor<5xf32> // CHECK-FUNC: bufferization.alloc_tensor() {escape = true} : tensor<5xf32> + // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {escape = true} : tensor<5xf32> %0 = bufferization.alloc_tensor() : tensor<5xf32> return %0 : tensor<5xf32> } @@ -29,12 +34,16 @@ func.func @return_alloc_tensor() -> (tensor<5xf32>) { // ----- // CHECK-LABEL: func @do_not_copy_undefined_tensor +// CHECK-NO-DEALLOC-LABEL: func @do_not_copy_undefined_tensor func.func @do_not_copy_undefined_tensor(%f: f32, %idx: index) -> (tensor<5xf32>, tensor<5xf32>) { // CHECK: bufferization.alloc_tensor() {escape = false} : tensor<5xf32> // The second alloc_tensor should not have a copy operand. // CHECK: bufferization.alloc_tensor() {escape = false} : tensor<5xf32> + + // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {escape = true} : tensor<5xf32> + // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {escape = true} : tensor<5xf32> %0 = bufferization.alloc_tensor() : tensor<5xf32> %1 = tensor.insert %f into %0[%idx] : tensor<5xf32> return %0, %1 : tensor<5xf32>, tensor<5xf32>