diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index 1fa4090a0e133..b8c502d12e1f6 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -327,6 +327,12 @@ def TensorCopyInsertion : Pass<"tensor-copy-insertion"> {
     were decided to bufferize out-of-place. After running this pass, a
     bufferization can write to buffers directly (without making copies) and no
     longer has to care about potential read-after-write conflicts.
+
+    Note: By default, all newly inserted tensor copies/allocs (i.e., newly
+    created `bufferization.alloc_tensor` ops) that do not escape block are
+    annotated with `escape = false`. If `create-allocs` is unset, all newly
+    inserted tensor copies/allocs are annotated with `escape = true`. In that
+    case, they are not getting deallocated when bufferizing the IR.
   }];
   let options = [
     Option<"allowReturnAllocs", "allow-return-allocs", "bool",
@@ -335,6 +341,8 @@ def TensorCopyInsertion : Pass<"tensor-copy-insertion"> {
     Option<"bufferizeFunctionBoundaries", "bufferize-function-boundaries",
            "bool", /*default=*/"0",
            "Bufferize function boundaries (experimental).">,
+    Option<"createDeallocs", "create-deallocs", "bool", /*default=*/"true",
+           "Specify if new allocations should be deallocated.">,
   ];
   let constructor = "mlir::bufferization::createTensorCopyInsertionPass()";
 }
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index 975b97086f907..467c0188783fe 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -75,8 +75,10 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts(
   Operation *op = getOperation();
   SmallVector<OpOperand *> outOfPlaceOpOperands;
   DenseSet<OpOperand *> copiedOpOperands;
+  DenseSet<OpOperand *> escapingOpOperandCopies;
   SmallVector<OpResult> outOfPlaceOpResults;
   DenseSet<OpResult> copiedOpResults;
+  DenseSet<OpResult> escapingOpResultCopies;
 
   // Find all out-of-place OpOperands.
   for (OpOperand &opOperand : op->getOpOperands()) {
@@ -90,6 +92,14 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts(
 
     SmallVector<OpResult> aliasingOpResults =
         state.getAliasingOpResult(opOperand);
+    // Is the result yielded from a block? Or are deallocations turned off
+    // entirely? In either case, mark the allocation as "escaping", so that it
+    // will not be deallocated.
+    bool escape = !state.getOptions().createDeallocs ||
+                  llvm::any_of(aliasingOpResults, [&](Value v) {
+                    return state.isTensorYielded(v);
+                  });
+
     if (aliasingOpResults.size() == 1 &&
         !state.bufferizesToMemoryWrite(opOperand) &&
         state.getAliasingOpOperand(aliasingOpResults.front()).size() == 1) {
@@ -100,23 +110,24 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts(
       outOfPlaceOpResults.push_back(aliasingOpResults.front());
       if (!state.canOmitTensorCopy(opOperand))
         copiedOpResults.insert(aliasingOpResults.front());
+      if (escape)
+        escapingOpResultCopies.insert(aliasingOpResults.front());
     } else {
       // In all other cases, make a copy of the OpOperand.
       outOfPlaceOpOperands.push_back(&opOperand);
       if (!state.canOmitTensorCopy(opOperand))
         copiedOpOperands.insert(&opOperand);
+      if (escape)
+        escapingOpOperandCopies.insert(&opOperand);
     }
   }
 
   // Insert copies of OpOperands.
   rewriter.setInsertionPoint(op);
   for (OpOperand *opOperand : outOfPlaceOpOperands) {
-    SmallVector<OpResult> aliasingOpResults =
-        state.getAliasingOpResult(*opOperand);
-    bool escape = llvm::any_of(
-        aliasingOpResults, [&](Value v) { return state.isTensorYielded(v); });
     Value copy = allocateTensorForShapedValue(
-        rewriter, op->getLoc(), opOperand->get(), escape,
+        rewriter, op->getLoc(), opOperand->get(),
+        escapingOpOperandCopies.contains(opOperand),
         copiedOpOperands.contains(opOperand));
     rewriter.updateRootInPlace(op, [&]() { opOperand->set(copy); });
   }
@@ -124,9 +135,9 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts(
   // Insert copies of OpResults.
   rewriter.setInsertionPointAfter(op);
   for (OpResult opResult : outOfPlaceOpResults) {
-    bool escape = state.isTensorYielded(opResult);
     Value copy =
-        allocateTensorForShapedValue(rewriter, op->getLoc(), opResult, escape,
+        allocateTensorForShapedValue(rewriter, op->getLoc(), opResult,
+                                     escapingOpResultCopies.contains(opResult),
                                      copiedOpResults.count(opResult));
     SmallVector<OpOperand *> uses = llvm::to_vector(llvm::map_range(
         opResult.getUses(), [](OpOperand &use) { return &use; }));
@@ -392,7 +403,45 @@ bool AnalysisState::hasUndefinedContents(OpOperand *opOperand) const {
 
 bool AnalysisState::isTensorYielded(Value tensor) const {
   // In the absence of analysis information, the conservative answer is "true".
-  return true;
+  if (!tensor.getDefiningOp<AllocTensorOp>())
+    return true;
+
+  // For AllocTensorOp results, we can do better: They do not alias with any
+  // preceding value, so we can follow SSA use-def chains and do a simple
+  // analysis.
+  SmallVector<OpOperand *> worklist;
+  for (OpOperand &use : tensor.getUses())
+    worklist.push_back(&use);
+
+  while (!worklist.empty()) {
+    OpOperand *operand = worklist.pop_back_val();
+    Operation *op = operand->getOwner();
+
+    // If the op is not bufferizable, we can safely assume that the value is not
+    // yielded. (When bufferizing that op, it must handle such cases.)
+    if (!options.dynCastBufferizableOp(op))
+      continue;
+
+    // We cannot analyze through ToMemrefOps, so we have to conservatively
+    // assume that the value is yielded.
+    if (isa<ToMemrefOp>(op))
+      return true;
+
+    // Check if the op is returning/yielding.
+    if (isRegionReturnLike(op))
+      return true;
+
+    // Add all aliasing OpResults to the worklist.
+    // Note: In the absence of detailed analysis information (e.g., there may be
+    // no function call analysis information), this `getAliasingOpResult` is
+    // conservative and may report additional OpResults as potentially aliasing.
+    for (OpResult opResult : getAliasingOpResult(*operand))
+      for (OpOperand &use : opResult.getUses())
+        worklist.push_back(&use);
+  }
+
+  // No ReturnLike op found: The value is not yielded.
+  return false;
 }
 
 // bufferization.to_memref is not allowed to change the rank.
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
index e04f1e386ee91..21d93dec3b0d9 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
@@ -54,7 +54,8 @@ mlir::bufferization::insertTensorCopies(Operation *op,
     if (auto allocTensorOp = dyn_cast<AllocTensorOp>(op)) {
       if (allocTensorOp.escape())
         return WalkResult::advance();
-      bool escape = state.isTensorYielded(allocTensorOp.result());
+      bool escape = !state.getOptions().createDeallocs ||
+                    state.isTensorYielded(allocTensorOp.result());
       allocTensorOp.escapeAttr(rewriter.getBoolAttr(escape));
       return WalkResult::advance();
     }
@@ -92,6 +93,7 @@ struct TensorCopyInsertionPass
       OneShotBufferizationOptions options;
       options.allowReturnAllocs = allowReturnAllocs;
       options.bufferizeFunctionBoundaries = bufferizeFunctionBoundaries;
+      options.createDeallocs = createDeallocs;
       if (failed(insertTensorCopies(getOperation(), options)))
         signalPassFailure();
     }
diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
index cb6977c013a4f..c36a0a69ca65a 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
@@ -1,14 +1,17 @@
 // RUN: mlir-opt %s -tensor-copy-insertion -split-input-file | FileCheck %s
 // RUN: mlir-opt %s -tensor-copy-insertion="bufferize-function-boundaries allow-return-allocs" -split-input-file | FileCheck %s --check-prefix=CHECK-FUNC
+// RUN: mlir-opt %s -tensor-copy-insertion="create-deallocs=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-DEALLOC
 
 // CHECK-LABEL: func @read_after_write_conflict(
 //  CHECK-SAME:     %[[t:.*]]: tensor<?xf32>
 // CHECK-FUNC-LABEL: func @read_after_write_conflict(
+// CHECK-NO-DEALLOC-LABEL: func @read_after_write_conflict(
 func.func @read_after_write_conflict(%t: tensor<?xf32>, %idx: index, %f: f32)
   -> (tensor<?xf32>, tensor<?xf32>)
 {
   // CHECK: %[[copy:.*]] = bufferization.alloc_tensor() copy(%[[t]]) {escape = false} : tensor<?xf32>
   // CHECK-FUNC: bufferization.alloc_tensor() copy(%{{.*}}) {escape = true} : tensor<?xf32>
+  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() copy(%{{.*}}) {escape = true} : tensor<?xf32>
   // CHECK: %[[insert:.*]] = tensor.insert %{{.*}} into %[[copy]]
   %0 = tensor.insert %f into %t[%idx] : tensor<?xf32>
   // CHECK: return %[[insert]], %[[t]]
@@ -19,9 +22,11 @@ func.func @read_after_write_conflict(%t: tensor<?xf32>, %idx: index, %f: f32)
 
 // CHECK-LABEL: func @return_alloc_tensor
 // CHECK-FUNC-LABEL: func @return_alloc_tensor
+// CHECK-NO-DEALLOC-LABEL: func @return_alloc_tensor
 func.func @return_alloc_tensor() -> (tensor<5xf32>) {
   // CHECK: bufferization.alloc_tensor() {escape = false} : tensor<5xf32>
   // CHECK-FUNC: bufferization.alloc_tensor() {escape = true} : tensor<5xf32>
+  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {escape = true} : tensor<5xf32>
   %0 = bufferization.alloc_tensor() : tensor<5xf32>
   return %0 : tensor<5xf32>
 }
@@ -29,12 +34,16 @@ func.func @return_alloc_tensor() -> (tensor<5xf32>) {
 // -----
 
 // CHECK-LABEL: func @do_not_copy_undefined_tensor
+// CHECK-NO-DEALLOC-LABEL: func @do_not_copy_undefined_tensor
 func.func @do_not_copy_undefined_tensor(%f: f32, %idx: index)
   -> (tensor<5xf32>, tensor<5xf32>)
 {
   // CHECK: bufferization.alloc_tensor() {escape = false} : tensor<5xf32>
   // The second alloc_tensor should not have a copy operand.
   // CHECK: bufferization.alloc_tensor() {escape = false} : tensor<5xf32>
+
+  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {escape = true} : tensor<5xf32>
+  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {escape = true} : tensor<5xf32>
   %0 = bufferization.alloc_tensor() : tensor<5xf32>
   %1 = tensor.insert %f into %0[%idx] : tensor<5xf32>
   return %0, %1 : tensor<5xf32>, tensor<5xf32>