From 9467dd65f53e13affebdffe7ab27da04bd0ef7e2 Mon Sep 17 00:00:00 2001 From: dubov diana Date: Sun, 19 Oct 2025 16:20:49 +0300 Subject: [PATCH 01/10] Adding to execute_region_op some missing support --- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td | 4 +- mlir/lib/Dialect/SCF/IR/SCF.cpp | 87 +++++++++++++++++++++- mlir/test/Dialect/SCF/canonicalize.mlir | 22 ++++++ 3 files changed, 111 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td index fadd3fc10bfc4..70b94695f4a09 100644 --- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td @@ -77,7 +77,9 @@ def ConditionOp : SCF_Op<"condition", [ //===----------------------------------------------------------------------===// def ExecuteRegionOp : SCF_Op<"execute_region", [ - DeclareOpInterfaceMethods]> { + DeclareOpInterfaceMethods, + RecursiveMemoryEffects, + DeclareOpInterfaceMethods]> {//, SingleBlockImplicitTerminator<"scf::YieldOp">]> { //, RecursiveMemoryEffects]> { let summary = "operation that executes its region exactly once"; let description = [{ The `scf.execute_region` operation is used to allow multiple blocks within SCF diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 9bd13f3236cfc..cbaed7cd68e95 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -291,9 +291,94 @@ struct MultiBlockExecuteInliner : public OpRewritePattern { } }; +// Pattern to eliminate ExecuteRegionOp results when it only forwards external +// values. It operates only on execute regions with single terminator yield +// operation. +struct ExecuteRegionForwardingEliminator + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ExecuteRegionOp op, + PatternRewriter &rewriter) const override { + if (op.getNumResults() == 0) + return failure(); + + SmallVector yieldOps; + for (Block &block : op.getRegion()) { + if (auto yield = dyn_cast(block.getTerminator())) { + if (yield.getResults().empty()) + continue; + yieldOps.push_back(yield.getOperation()); + } + } + + if (yieldOps.size() != 1) + return failure(); + + auto yieldOp = dyn_cast(yieldOps.front()); + auto yieldedValues = yieldOp.getOperands(); + // Check if all yielded values are from outside the region + bool allExternal = true; + for (Value yieldedValue : yieldedValues) { + if (isValueFromInsideRegion(yieldedValue, op)) { + allExternal = false; + break; + } + } + + if (!allExternal) + return failure(); + + // All yielded values are external - create a new execute_region with no + // results. + auto newOp = rewriter.create(op.getLoc(), TypeRange{}); + newOp->setAttrs(op->getAttrs()); + + // Move the region content to the new operation + newOp.getRegion().takeBody(op.getRegion()); + + // Replace the yield operation with a new yield operation with no results. + rewriter.setInsertionPoint(yieldOp); + rewriter.eraseOp(yieldOp); + rewriter.create(yieldOp.getLoc()); + + // Replace the old operation with the external values directly. + rewriter.replaceOp(op, yieldedValues); + return success(); + } + +private: + bool isValueFromInsideRegion(Value value, + ExecuteRegionOp executeRegionOp) const { + // Check if the value is defined within the execute_region + if (Operation *defOp = value.getDefiningOp()) { + return executeRegionOp.getRegion().isAncestor(defOp->getParentRegion()); + } + + // If it's a block argument, check if it's from within the region + if (BlockArgument blockArg = dyn_cast(value)) { + return executeRegionOp.getRegion().isAncestor(blockArg.getParentRegion()); + } + + return false; // Value is from outside the region + } +}; + void ExecuteRegionOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); + results.add(context); +} + +void ExecuteRegionOp::getEffects( + SmallVectorImpl> + &effects) { + if (!getNoInline()) + return; + // In case there is attribute no_inline we want the region not to be inlined + // into the parent operation. + effects.emplace_back(MemoryEffects::Write::get(), + SideEffects::DefaultResource::get()); } void ExecuteRegionOp::getSuccessorRegions( diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir index 2bec63672e783..4529cdb4a298c 100644 --- a/mlir/test/Dialect/SCF/canonicalize.mlir +++ b/mlir/test/Dialect/SCF/canonicalize.mlir @@ -1604,6 +1604,28 @@ func.func @func_execute_region_inline_multi_yield() { // ----- +// CHECK-LABEL: func.func private @canonicalize_execute_region_yeilding_external_value( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x120xui8>) -> tensor<1x60xui8> { +// CHECK: %[[VAL_1:.*]] = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8> +// CHECK: scf.execute_region no_inline { +// CHECK: scf.yield +// CHECK: } +// CHECK: %[[VAL_2:.*]] = bufferization.to_tensor %[[VAL_1]] : memref<1x60xui8> to tensor<1x60xui8> +// CHECK: return %[[VAL_2]] : tensor<1x60xui8> +// CHECK: } + +func.func private @canonicalize_execute_region_yeilding_external_value(%arg0: tensor<1x120xui8>) -> tensor<1x60xui8> { + %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8> + %0 = bufferization.to_tensor %alloc : memref<1x60xui8> to tensor<1x60xui8> + %1 = scf.execute_region -> memref<1x60xui8> no_inline { + scf.yield %alloc : memref<1x60xui8> + } + %2 = bufferization.to_tensor %1 : memref<1x60xui8> to tensor<1x60xui8> + return %2 : tensor<1x60xui8> +} + +// ----- + // CHECK-LABEL: func @canonicalize_parallel_insert_slice_indices( // CHECK-SAME: %[[arg0:.*]]: tensor<1x5xf32>, %[[arg1:.*]]: tensor func.func @canonicalize_parallel_insert_slice_indices( From 4f94ba0fa1aee6b2a9e9fe3bb1aa6a297bc1f540 Mon Sep 17 00:00:00 2001 From: ddubov100 <155631080+ddubov100@users.noreply.github.com> Date: Mon, 20 Oct 2025 10:33:34 +0300 Subject: [PATCH 02/10] Update mlir/lib/Dialect/SCF/IR/SCF.cpp Co-authored-by: Mehdi Amini --- mlir/lib/Dialect/SCF/IR/SCF.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index cbaed7cd68e95..8ee8d54d8c6d9 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -315,7 +315,7 @@ struct ExecuteRegionForwardingEliminator if (yieldOps.size() != 1) return failure(); - auto yieldOp = dyn_cast(yieldOps.front()); + auto yieldOp = cast(yieldOps.front()); auto yieldedValues = yieldOp.getOperands(); // Check if all yielded values are from outside the region bool allExternal = true; From 86776d303aa3228a25215f1d5e3f8964c1939b2f Mon Sep 17 00:00:00 2001 From: ddubov100 <155631080+ddubov100@users.noreply.github.com> Date: Mon, 20 Oct 2025 10:33:50 +0300 Subject: [PATCH 03/10] Update mlir/lib/Dialect/SCF/IR/SCF.cpp Co-authored-by: Mehdi Amini --- mlir/lib/Dialect/SCF/IR/SCF.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 8ee8d54d8c6d9..0ca71663bf795 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -351,9 +351,8 @@ struct ExecuteRegionForwardingEliminator bool isValueFromInsideRegion(Value value, ExecuteRegionOp executeRegionOp) const { // Check if the value is defined within the execute_region - if (Operation *defOp = value.getDefiningOp()) { - return executeRegionOp.getRegion().isAncestor(defOp->getParentRegion()); - } + if (Operation *defOp = value.getDefiningOp()) + return executeRegionOp.getRegion() = defOp->getParentRegion(); // If it's a block argument, check if it's from within the region if (BlockArgument blockArg = dyn_cast(value)) { From fc3308941e680cc3692ff92a67285c16baec163b Mon Sep 17 00:00:00 2001 From: ddubov100 <155631080+ddubov100@users.noreply.github.com> Date: Mon, 20 Oct 2025 10:34:27 +0300 Subject: [PATCH 04/10] Update mlir/lib/Dialect/SCF/IR/SCF.cpp Co-authored-by: Mehdi Amini --- mlir/lib/Dialect/SCF/IR/SCF.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 0ca71663bf795..aba9734eb8756 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -356,7 +356,7 @@ struct ExecuteRegionForwardingEliminator // If it's a block argument, check if it's from within the region if (BlockArgument blockArg = dyn_cast(value)) { - return executeRegionOp.getRegion().isAncestor(blockArg.getParentRegion()); + return executeRegionOp.getRegion() == blockArg.getParentRegion()); } return false; // Value is from outside the region From 4784d37f7a3e38fd7f925d23265f795668169ef8 Mon Sep 17 00:00:00 2001 From: dubov diana Date: Mon, 20 Oct 2025 17:31:59 +0300 Subject: [PATCH 05/10] Code review comments --- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td | 3 +- mlir/lib/Dialect/SCF/IR/SCF.cpp | 47 +++--- .../Transforms/one-shot-module-bufferize.mlir | 15 -- mlir/test/Dialect/SCF/canonicalize.mlir | 143 ++++++++++++++++-- 4 files changed, 153 insertions(+), 55 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td index 70b94695f4a09..2b5020a5ebd01 100644 --- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td @@ -78,8 +78,7 @@ def ConditionOp : SCF_Op<"condition", [ def ExecuteRegionOp : SCF_Op<"execute_region", [ DeclareOpInterfaceMethods, - RecursiveMemoryEffects, - DeclareOpInterfaceMethods]> {//, SingleBlockImplicitTerminator<"scf::YieldOp">]> { //, RecursiveMemoryEffects]> { + RecursiveMemoryEffects]> { let summary = "operation that executes its region exactly once"; let description = [{ The `scf.execute_region` operation is used to allow multiple blocks within SCF diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index aba9734eb8756..3d1cf3666957a 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -27,6 +27,7 @@ #include "mlir/Interfaces/ValueBoundsOpInterface.h" #include "mlir/Transforms/InliningUtils.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DebugLog.h" @@ -306,20 +307,25 @@ struct ExecuteRegionForwardingEliminator SmallVector yieldOps; for (Block &block : op.getRegion()) { if (auto yield = dyn_cast(block.getTerminator())) { - if (yield.getResults().empty()) + if (yield.getOperands().empty()) continue; yieldOps.push_back(yield.getOperation()); } } - if (yieldOps.size() != 1) + if (yieldOps.empty()) return failure(); - auto yieldOp = cast(yieldOps.front()); - auto yieldedValues = yieldOp.getOperands(); + // Check if all yield operations have the same operands. + auto yieldOpsOperands = yieldOps[0]->getOperands(); + for (auto *yieldOp : yieldOps) { + if (yieldOp->getOperands() != yieldOpsOperands) + return failure(); + } + // Check if all yielded values are from outside the region bool allExternal = true; - for (Value yieldedValue : yieldedValues) { + for (Value yieldedValue : yieldOpsOperands) { if (isValueFromInsideRegion(yieldedValue, op)) { allExternal = false; break; @@ -337,13 +343,16 @@ struct ExecuteRegionForwardingEliminator // Move the region content to the new operation newOp.getRegion().takeBody(op.getRegion()); - // Replace the yield operation with a new yield operation with no results. - rewriter.setInsertionPoint(yieldOp); - rewriter.eraseOp(yieldOp); - rewriter.create(yieldOp.getLoc()); + // Replace all yield operations with a new yield operation with no results. + // scf.execute_region must have at least one yield operation. + for (auto *yieldOp : yieldOps) { + rewriter.setInsertionPoint(yieldOp); + rewriter.eraseOp(yieldOp); + rewriter.create(yieldOp->getLoc()); + } // Replace the old operation with the external values directly. - rewriter.replaceOp(op, yieldedValues); + rewriter.replaceOp(op, yieldOpsOperands); return success(); } @@ -352,12 +361,11 @@ struct ExecuteRegionForwardingEliminator ExecuteRegionOp executeRegionOp) const { // Check if the value is defined within the execute_region if (Operation *defOp = value.getDefiningOp()) - return executeRegionOp.getRegion() = defOp->getParentRegion(); + return &executeRegionOp.getRegion() == defOp->getParentRegion(); // If it's a block argument, check if it's from within the region - if (BlockArgument blockArg = dyn_cast(value)) { - return executeRegionOp.getRegion() == blockArg.getParentRegion()); - } + if (BlockArgument blockArg = dyn_cast(value)) + return &executeRegionOp.getRegion() == blockArg.getParentRegion(); return false; // Value is from outside the region } @@ -369,17 +377,6 @@ void ExecuteRegionOp::getCanonicalizationPatterns(RewritePatternSet &results, ExecuteRegionForwardingEliminator>(context); } -void ExecuteRegionOp::getEffects( - SmallVectorImpl> - &effects) { - if (!getNoInline()) - return; - // In case there is attribute no_inline we want the region not to be inlined - // into the parent operation. - effects.emplace_back(MemoryEffects::Write::get(), - SideEffects::DefaultResource::get()); -} - void ExecuteRegionOp::getSuccessorRegions( RegionBranchPoint point, SmallVectorImpl ®ions) { // If the predecessor is the ExecuteRegionOp, branch into the body. diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir index d5f834bce9b83..eaafdeefc6de1 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -377,21 +377,6 @@ func.func private @execute_region_test(%t1 : tensor) // CHECK: return %{{.*}}, %{{.*}} : f32, f32 return %0, %1, %2 : f32, tensor, f32 } - -// ----- - -// CHECK-LABEL: func @no_inline_execute_region_not_canonicalized -func.func @no_inline_execute_region_not_canonicalized() { - %c = arith.constant 42 : i32 - // CHECK: scf.execute_region - // CHECK-SAME: no_inline - %v = scf.execute_region -> i32 no_inline { - scf.yield %c : i32 - } - // CHECK: return - return -} - // ----- // CHECK: func private @some_external_func(memref>) diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir index 4529cdb4a298c..377f5b18dccae 100644 --- a/mlir/test/Dialect/SCF/canonicalize.mlir +++ b/mlir/test/Dialect/SCF/canonicalize.mlir @@ -1604,24 +1604,141 @@ func.func @func_execute_region_inline_multi_yield() { // ----- -// CHECK-LABEL: func.func private @canonicalize_execute_region_yeilding_external_value( -// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x120xui8>) -> tensor<1x60xui8> { -// CHECK: %[[VAL_1:.*]] = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8> +// Test case with single scf.yield op inside execute_region and its operand is defined outside the execute_region op. +// Make scf.execute_region not to return anything. + // CHECK: scf.execute_region no_inline { // CHECK: scf.yield // CHECK: } -// CHECK: %[[VAL_2:.*]] = bufferization.to_tensor %[[VAL_1]] : memref<1x60xui8> to tensor<1x60xui8> -// CHECK: return %[[VAL_2]] : tensor<1x60xui8> -// CHECK: } -func.func private @canonicalize_execute_region_yeilding_external_value(%arg0: tensor<1x120xui8>) -> tensor<1x60xui8> { - %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8> - %0 = bufferization.to_tensor %alloc : memref<1x60xui8> to tensor<1x60xui8> - %1 = scf.execute_region -> memref<1x60xui8> no_inline { - scf.yield %alloc : memref<1x60xui8> +module { +func.func private @foo()->() +func.func private @execute_region_yeilding_external_value() -> memref<1x60xui8> { + %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8> + %1 = scf.execute_region -> memref<1x60xui8> no_inline { + func.call @foo():()->() + scf.yield %alloc: memref<1x60xui8> + } + return %1 : memref<1x60xui8> +} +} + +// ----- + +// Test case with scf.yield op inside execute_region with multiple operands. +// One of operands is defined outside the execute_region op. +// In this case scf.execute_region doesn't change. + +// CHECK: %[[VAL_1:.*]]:2 = scf.execute_region -> (memref<1x60xui8>, memref<1x120xui8>) no_inline { +// CHECK: scf.yield %{{.*}}, %{{.*}} : memref<1x60xui8>, memref<1x120xui8> + +module { +func.func private @foo()->() +func.func private @execute_region_yeilding_external_and_local_values() -> (memref<1x60xui8>, memref<1x120xui8>) { + %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8> + %1, %2 = scf.execute_region -> (memref<1x60xui8>, memref<1x120xui8>) no_inline { + %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<1x120xui8> + func.call @foo():()->() + scf.yield %alloc, %alloc_1: memref<1x60xui8>, memref<1x120xui8> + } + return %1, %2 : memref<1x60xui8>, memref<1x120xui8> +} +} + +// ----- + +// Test case with multiple scf.yield ops inside execute_region with same operands and those operands are defined outside the execute_region op.. +// Make scf.execute_region not to return anything. +// scf.yield must remain, cause scf.execute_region can't be empty. + +// CHECK: scf.execute_region no_inline { +// CHECK: %[[VAL_3:.*]] = "test.cmp"() : () -> i1 +// CHECK: cf.cond_br %[[VAL_3]], ^bb1, ^bb2 +// CHECK: ^bb1: +// CHECK: scf.yield +// CHECK: ^bb2: +// CHECK: scf.yield +// CHECK: } + +module { + func.func private @foo()->() + func.func private @execute_region_multiple_yields_same_operands() -> (memref<1x60xui8>, memref<1x120xui8>) { + %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8> + %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<1x120xui8> + %1, %2 = scf.execute_region -> (memref<1x60xui8>, memref<1x120xui8>) no_inline { + %c = "test.cmp"() : () -> i1 + cf.cond_br %c, ^bb2, ^bb3 + ^bb2: + func.call @foo():()->() + scf.yield %alloc, %alloc_1 : memref<1x60xui8>, memref<1x120xui8> + ^bb3: + func.call @foo():()->() + scf.yield %alloc, %alloc_1 : memref<1x60xui8>, memref<1x120xui8> + } + return %1, %2 : memref<1x60xui8>, memref<1x120xui8> } - %2 = bufferization.to_tensor %1 : memref<1x60xui8> to tensor<1x60xui8> - return %2 : tensor<1x60xui8> +} + +// ----- + +// Test case with multiple scf.yield ops with at least one different operand, then no change. + +// CHECK: %[[VAL_3:.*]]:2 = scf.execute_region -> (memref<1x60xui8>, memref<1x120xui8>) no_inline { +// CHECK: ^bb1: +// CHECK: scf.yield %{{.*}}, %{{.*}} : memref<1x60xui8>, memref<1x120xui8> +// CHECK: ^bb2: +// CHECK: scf.yield %{{.*}}, %{{.*}} : memref<1x60xui8>, memref<1x120xui8> +// CHECK: } + +module { + func.func private @foo()->() + func.func private @execute_region_multiple_yields_different_operands() -> (memref<1x60xui8>, memref<1x120xui8>) { + %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8> + %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<1x120xui8> + %alloc_2 = memref.alloc() {alignment = 64 : i64} : memref<1x120xui8> + %1, %2 = scf.execute_region -> (memref<1x60xui8>, memref<1x120xui8>) no_inline { + %c = "test.cmp"() : () -> i1 + cf.cond_br %c, ^bb2, ^bb3 + ^bb2: + func.call @foo():()->() + scf.yield %alloc, %alloc_1 : memref<1x60xui8>, memref<1x120xui8> + ^bb3: + func.call @foo():()->() + scf.yield %alloc, %alloc_2 : memref<1x60xui8>, memref<1x120xui8> + } + return %1, %2 : memref<1x60xui8>, memref<1x120xui8> + } +} + +// ----- + +// Test case with multiple scf.yield ops each has different operand. +// In this case scf.execute_region isn't changed. + +// CHECK: %[[VAL_2:.*]] = scf.execute_region -> memref<1x60xui8> no_inline { +// CHECK: ^bb1: +// CHECK: scf.yield %{{.*}} : memref<1x60xui8> +// CHECK: ^bb2: +// CHECK: scf.yield %{{.*}} : memref<1x60xui8> +// CHECK: } + +module { +func.func private @foo()->() +func.func private @execute_region_multiple_yields_different_operands() -> (memref<1x60xui8>) { + %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8> + %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8> + %1 = scf.execute_region -> (memref<1x60xui8>) no_inline { + %c = "test.cmp"() : () -> i1 + cf.cond_br %c, ^bb2, ^bb3 + ^bb2: + func.call @foo():()->() + scf.yield %alloc : memref<1x60xui8> + ^bb3: + func.call @foo():()->() + scf.yield %alloc_1 : memref<1x60xui8> + } + return %1 : memref<1x60xui8> +} } // ----- From 0492acb966c566f2cd74b385f61e5a0af52df77a Mon Sep 17 00:00:00 2001 From: dubov diana Date: Tue, 21 Oct 2025 11:06:23 +0300 Subject: [PATCH 06/10] CR 2 --- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td | 3 +- mlir/lib/Dialect/SCF/IR/SCF.cpp | 60 ++++++++++++------- .../Transforms/one-shot-module-bufferize.mlir | 17 ++++++ mlir/test/Dialect/SCF/canonicalize.mlir | 11 ++-- 4 files changed, 62 insertions(+), 29 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td index 2b5020a5ebd01..fadd3fc10bfc4 100644 --- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td @@ -77,8 +77,7 @@ def ConditionOp : SCF_Op<"condition", [ //===----------------------------------------------------------------------===// def ExecuteRegionOp : SCF_Op<"execute_region", [ - DeclareOpInterfaceMethods, - RecursiveMemoryEffects]> { + DeclareOpInterfaceMethods]> { let summary = "operation that executes its region exactly once"; let description = [{ The `scf.execute_region` operation is used to allow multiple blocks within SCF diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 3d1cf3666957a..c3371e4d69387 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -292,9 +292,10 @@ struct MultiBlockExecuteInliner : public OpRewritePattern { } }; -// Pattern to eliminate ExecuteRegionOp results when it only forwards external -// values. It operates only on execute regions with single terminator yield -// operation. +// Pattern to eliminate ExecuteRegionOp results which forward external +// values from the region. In case there are multiple yield operations, +// all of them must have the same operands iin order for the pattern to be +// applicable. struct ExecuteRegionForwardingEliminator : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -306,11 +307,8 @@ struct ExecuteRegionForwardingEliminator SmallVector yieldOps; for (Block &block : op.getRegion()) { - if (auto yield = dyn_cast(block.getTerminator())) { - if (yield.getOperands().empty()) - continue; + if (auto yield = dyn_cast(block.getTerminator())) yieldOps.push_back(yield.getOperation()); - } } if (yieldOps.empty()) @@ -323,36 +321,52 @@ struct ExecuteRegionForwardingEliminator return failure(); } - // Check if all yielded values are from outside the region - bool allExternal = true; - for (Value yieldedValue : yieldOpsOperands) { + SmallVector externalValues; + SmallVector internalValues; + SmallVector opResultsToReplaceWithExternalValues; + SmallVector opResultsToKeep; + for (auto [index, yieldedValue] : llvm::enumerate(yieldOpsOperands)) { if (isValueFromInsideRegion(yieldedValue, op)) { - allExternal = false; - break; + internalValues.push_back(yieldedValue); + opResultsToKeep.push_back(op.getResult(index)); + } else { + externalValues.push_back(yieldedValue); + opResultsToReplaceWithExternalValues.push_back(op.getResult(index)); } } - - if (!allExternal) + // No yeilded external values - nothing to do. + if (externalValues.empty()) return failure(); - // All yielded values are external - create a new execute_region with no - // results. - auto newOp = rewriter.create(op.getLoc(), TypeRange{}); + // There are yielded external values - create a new execute_region returning + // just the internal values. + SmallVector resultTypes; + for (Value value : internalValues) + resultTypes.push_back(value.getType()); + auto newOp = + rewriter.create(op.getLoc(), TypeRange(resultTypes)); newOp->setAttrs(op->getAttrs()); - // Move the region content to the new operation - newOp.getRegion().takeBody(op.getRegion()); + // Move old op's region to the new operation. + rewriter.inlineRegionBefore(op.getRegion(), newOp.getRegion(), + newOp.getRegion().end()); - // Replace all yield operations with a new yield operation with no results. - // scf.execute_region must have at least one yield operation. + // Replace all yield operations with a new yield operation with updated + // results. scf.execute_region must have at least one yield operation. for (auto *yieldOp : yieldOps) { rewriter.setInsertionPoint(yieldOp); rewriter.eraseOp(yieldOp); - rewriter.create(yieldOp->getLoc()); + rewriter.create(yieldOp->getLoc(), + ValueRange(internalValues)); } // Replace the old operation with the external values directly. - rewriter.replaceOp(op, yieldOpsOperands); + rewriter.replaceAllUsesWith(opResultsToReplaceWithExternalValues, + externalValues); + // Replace the old operation's remaining results with the new operation's + // results. + rewriter.replaceAllUsesWith(opResultsToKeep, newOp.getResults()); + rewriter.eraseOp(op); return success(); } diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir index eaafdeefc6de1..95846491f039c 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -379,6 +379,23 @@ func.func private @execute_region_test(%t1 : tensor) } // ----- +// CHECK-LABEL: func @no_inline_execute_region_not_canonicalized +module { + func.func private @foo()->() + func.func @no_inline_execute_region_not_canonicalized() { + %c = arith.constant 42 : i32 + // CHECK: scf.execute_region + // CHECK-SAME: no_inline + %v = scf.execute_region -> i32 no_inline { + func.call @foo():()->() + scf.yield %c : i32 + } + // CHECK: return + return + } +} +// ----- + // CHECK: func private @some_external_func(memref>) func.func private @some_external_func(tensor) diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir index 377f5b18dccae..084c3fc065de3 100644 --- a/mlir/test/Dialect/SCF/canonicalize.mlir +++ b/mlir/test/Dialect/SCF/canonicalize.mlir @@ -1608,6 +1608,7 @@ func.func @func_execute_region_inline_multi_yield() { // Make scf.execute_region not to return anything. // CHECK: scf.execute_region no_inline { +// CHECK: func.call @foo() : () -> () // CHECK: scf.yield // CHECK: } @@ -1627,11 +1628,13 @@ func.func private @execute_region_yeilding_external_value() -> memref<1x60xui8> // Test case with scf.yield op inside execute_region with multiple operands. // One of operands is defined outside the execute_region op. -// In this case scf.execute_region doesn't change. - -// CHECK: %[[VAL_1:.*]]:2 = scf.execute_region -> (memref<1x60xui8>, memref<1x120xui8>) no_inline { -// CHECK: scf.yield %{{.*}}, %{{.*}} : memref<1x60xui8>, memref<1x120xui8> +// Remove just this operand from the op results. +// CHECK: %[[VAL_1:.*]] = scf.execute_region -> memref<1x120xui8> no_inline { +// CHECK: %[[VAL_2:.*]] = memref.alloc() {alignment = 64 : i64} : memref<1x120xui8> +// CHECK: func.call @foo() : () -> () +// CHECK: scf.yield %[[VAL_2]] : memref<1x120xui8> +// CHECK: } module { func.func private @foo()->() func.func private @execute_region_yeilding_external_and_local_values() -> (memref<1x60xui8>, memref<1x120xui8>) { From 4185a879ec799179c3504d23dae31ebe74ce8aee Mon Sep 17 00:00:00 2001 From: dubov diana Date: Tue, 21 Oct 2025 14:36:16 +0300 Subject: [PATCH 07/10] CR 3 and removed update of a test, cause moved to another PR --- mlir/lib/Dialect/SCF/IR/SCF.cpp | 2 +- .../Transforms/one-shot-module-bufferize.mlir | 22 +++++++++---------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index c3371e4d69387..762728ad0f8f9 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -334,7 +334,7 @@ struct ExecuteRegionForwardingEliminator opResultsToReplaceWithExternalValues.push_back(op.getResult(index)); } } - // No yeilded external values - nothing to do. + // No yielded external values - nothing to do. if (externalValues.empty()) return failure(); diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir index 95846491f039c..d5f834bce9b83 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -377,23 +377,21 @@ func.func private @execute_region_test(%t1 : tensor) // CHECK: return %{{.*}}, %{{.*}} : f32, f32 return %0, %1, %2 : f32, tensor, f32 } + // ----- // CHECK-LABEL: func @no_inline_execute_region_not_canonicalized -module { - func.func private @foo()->() - func.func @no_inline_execute_region_not_canonicalized() { - %c = arith.constant 42 : i32 - // CHECK: scf.execute_region - // CHECK-SAME: no_inline - %v = scf.execute_region -> i32 no_inline { - func.call @foo():()->() - scf.yield %c : i32 - } - // CHECK: return - return +func.func @no_inline_execute_region_not_canonicalized() { + %c = arith.constant 42 : i32 + // CHECK: scf.execute_region + // CHECK-SAME: no_inline + %v = scf.execute_region -> i32 no_inline { + scf.yield %c : i32 } + // CHECK: return + return } + // ----- // CHECK: func private @some_external_func(memref>) From f608cf4103dd647ccd94b8242882f14c1f321fd6 Mon Sep 17 00:00:00 2001 From: dubov diana Date: Wed, 22 Oct 2025 12:30:58 +0300 Subject: [PATCH 08/10] Changed yield op replacment code --- mlir/lib/Dialect/SCF/IR/SCF.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 762728ad0f8f9..56dacb2a6db7b 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -355,9 +355,8 @@ struct ExecuteRegionForwardingEliminator // results. scf.execute_region must have at least one yield operation. for (auto *yieldOp : yieldOps) { rewriter.setInsertionPoint(yieldOp); - rewriter.eraseOp(yieldOp); - rewriter.create(yieldOp->getLoc(), - ValueRange(internalValues)); + rewriter.replaceOpWithNewOp(yieldOp, + ValueRange(internalValues)); } // Replace the old operation with the external values directly. From d8a451f0b2926876142f7471ebd27d332206e57c Mon Sep 17 00:00:00 2001 From: dubov diana Date: Thu, 23 Oct 2025 11:46:22 +0300 Subject: [PATCH 09/10] Fixed deperecated create op. --- mlir/lib/Dialect/SCF/IR/SCF.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 56dacb2a6db7b..9d4db7aa0b1cd 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -344,7 +344,7 @@ struct ExecuteRegionForwardingEliminator for (Value value : internalValues) resultTypes.push_back(value.getType()); auto newOp = - rewriter.create(op.getLoc(), TypeRange(resultTypes)); + ExecuteRegionOp::create(rewriter, op.getLoc(), TypeRange(resultTypes)); newOp->setAttrs(op->getAttrs()); // Move old op's region to the new operation. From 8a32f6ca0bcf071b4b957d50959ee831c5574dbf Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 23 Oct 2025 04:36:19 -0700 Subject: [PATCH 10/10] Apply suggestion from @joker-eph --- mlir/lib/Dialect/SCF/IR/SCF.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 9d4db7aa0b1cd..744a5951330a3 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -294,7 +294,7 @@ struct MultiBlockExecuteInliner : public OpRewritePattern { // Pattern to eliminate ExecuteRegionOp results which forward external // values from the region. In case there are multiple yield operations, -// all of them must have the same operands iin order for the pattern to be +// all of them must have the same operands in order for the pattern to be // applicable. struct ExecuteRegionForwardingEliminator : public OpRewritePattern {