-
Notifications
You must be signed in to change notification settings - Fork 15k
Adding to execute_region_op some missing support #164159
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding to execute_region_op some missing support #164159
Conversation
|
@llvm/pr-subscribers-mlir-bufferization @llvm/pr-subscribers-mlir-scf Author: None (ddubov100) ChangesAdded following to execute_region_op.
Full diff: https://github.com/llvm/llvm-project/pull/164159.diff 3 Files Affected:
diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
index fadd3fc10bfc4..70b94695f4a09 100644
--- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
+++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
@@ -77,7 +77,9 @@ def ConditionOp : SCF_Op<"condition", [
//===----------------------------------------------------------------------===//
def ExecuteRegionOp : SCF_Op<"execute_region", [
- DeclareOpInterfaceMethods<RegionBranchOpInterface>]> {
+ DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+ RecursiveMemoryEffects,
+ DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {//, SingleBlockImplicitTerminator<"scf::YieldOp">]> { //, RecursiveMemoryEffects]> {
let summary = "operation that executes its region exactly once";
let description = [{
The `scf.execute_region` operation is used to allow multiple blocks within SCF
diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp
index a9da6c2c8320a..727c2e3ff469f 100644
--- a/mlir/lib/Dialect/SCF/IR/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp
@@ -291,9 +291,89 @@ struct MultiBlockExecuteInliner : public OpRewritePattern<ExecuteRegionOp> {
}
};
+// Pattern to eliminate ExecuteRegionOp results when it only forwards external values.
+// It operates only on execute regions with single terminator yield operation.
+struct ExecuteRegionForwardingEliminator : public OpRewritePattern<ExecuteRegionOp> {
+ using OpRewritePattern<ExecuteRegionOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(ExecuteRegionOp op,
+ PatternRewriter &rewriter) const override {
+ if (op.getNumResults() == 0)
+ return failure();
+
+ SmallVector<Operation*> yieldOps;
+ for (Block &block : op.getRegion()) {
+ if (auto yield = dyn_cast<scf::YieldOp>(block.getTerminator())) {
+ if (yield.getResults().empty())
+ continue;
+ yieldOps.push_back(yield.getOperation());
+ }
+ }
+
+ if(yieldOps.size() != 1)
+ return failure();
+
+ auto yieldOp = dyn_cast<scf::YieldOp>(yieldOps.front());
+ auto yieldedValues = yieldOp.getOperands();
+ // Check if all yielded values are from outside the region
+ bool allExternal = true;
+ for (Value yieldedValue : yieldedValues) {
+ if (isValueFromInsideRegion(yieldedValue, op)) {
+ allExternal = false;
+ break;
+ }
+ }
+
+ if (!allExternal)
+ return failure();
+
+ // All yielded values are external - create a new execute_region with no results.
+ auto newOp = rewriter.create<ExecuteRegionOp>(op.getLoc(), TypeRange{});
+ newOp->setAttrs(op->getAttrs());
+
+ // Move the region content to the new operation
+ newOp.getRegion().takeBody(op.getRegion());
+
+ // Replace the yield operation with a new yield operation with no results.
+ rewriter.setInsertionPoint(yieldOp);
+ rewriter.eraseOp(yieldOp);
+ rewriter.create<scf::YieldOp>(yieldOp.getLoc());
+
+ // Replace the old operation with the external values directly.
+ rewriter.replaceOp(op, yieldedValues);
+ return success();
+ }
+
+private:
+ bool isValueFromInsideRegion(Value value, ExecuteRegionOp executeRegionOp) const {
+ // Check if the value is defined within the execute_region
+ if (Operation *defOp = value.getDefiningOp()) {
+ return executeRegionOp.getRegion().isAncestor(defOp->getParentRegion());
+ }
+
+ // If it's a block argument, check if it's from within the region
+ if (BlockArgument blockArg = dyn_cast<BlockArgument>(value)) {
+ return executeRegionOp.getRegion().isAncestor(blockArg.getParentRegion());
+ }
+
+ return false; // Value is from outside the region
+ }
+};
+
void ExecuteRegionOp::getCanonicalizationPatterns(RewritePatternSet &results,
- MLIRContext *context) {
- results.add<SingleBlockExecuteInliner, MultiBlockExecuteInliner>(context);
+ MLIRContext *context) {
+ results.add<SingleBlockExecuteInliner, MultiBlockExecuteInliner,
+ ExecuteRegionForwardingEliminator>(context);
+}
+
+void ExecuteRegionOp::getEffects(
+ SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+ &effects) {
+if (!getNoInline())
+ return;
+// In case there is attribute no_inline we want the region not to be inlined into the parent operation.
+effects.emplace_back(MemoryEffects::Write::get(),
+ SideEffects::DefaultResource::get());
}
void ExecuteRegionOp::getSuccessorRegions(
diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir
index 2bec63672e783..4529cdb4a298c 100644
--- a/mlir/test/Dialect/SCF/canonicalize.mlir
+++ b/mlir/test/Dialect/SCF/canonicalize.mlir
@@ -1604,6 +1604,28 @@ func.func @func_execute_region_inline_multi_yield() {
// -----
+// CHECK-LABEL: func.func private @canonicalize_execute_region_yeilding_external_value(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x120xui8>) -> tensor<1x60xui8> {
+// CHECK: %[[VAL_1:.*]] = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8>
+// CHECK: scf.execute_region no_inline {
+// CHECK: scf.yield
+// CHECK: }
+// CHECK: %[[VAL_2:.*]] = bufferization.to_tensor %[[VAL_1]] : memref<1x60xui8> to tensor<1x60xui8>
+// CHECK: return %[[VAL_2]] : tensor<1x60xui8>
+// CHECK: }
+
+func.func private @canonicalize_execute_region_yeilding_external_value(%arg0: tensor<1x120xui8>) -> tensor<1x60xui8> {
+ %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8>
+ %0 = bufferization.to_tensor %alloc : memref<1x60xui8> to tensor<1x60xui8>
+ %1 = scf.execute_region -> memref<1x60xui8> no_inline {
+ scf.yield %alloc : memref<1x60xui8>
+ }
+ %2 = bufferization.to_tensor %1 : memref<1x60xui8> to tensor<1x60xui8>
+ return %2 : tensor<1x60xui8>
+}
+
+// -----
+
// CHECK-LABEL: func @canonicalize_parallel_insert_slice_indices(
// CHECK-SAME: %[[arg0:.*]]: tensor<1x5xf32>, %[[arg1:.*]]: tensor<?x?xf32>
func.func @canonicalize_parallel_insert_slice_indices(
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
a33cc2f to
e793c80
Compare
|
@matthias-springer @jungpark-mlir @wsmoses please take a look |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suspect we should have separate PR for each of the 3 changes mentioned in the description, can you split it please?
Let's keep this one for the added canonicalization pattern.
d3c2c21 to
5b80374
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice, thanks!
cbd48e0 to
5bddd42
Compare
Co-authored-by: Mehdi Amini <joker.eph@gmail.com>
Co-authored-by: Mehdi Amini <joker.eph@gmail.com>
Co-authored-by: Mehdi Amini <joker.eph@gmail.com>
Head branch was pushed to by a user without write access
992d27b to
d8a451f
Compare
Adding canonicalization pattern in case execute_region op has yieldOps which operands are from outside the execute_region, then it simplifies the op to return just internal values. The pattern is applied only in case all yieldOps within execute_region_op have same operands