[mlir][RemoveDeadValues] Simplify branch op handling using ub.poison by felichita · Pull Request #182711 · llvm/llvm-project

felichita · 2026-02-21T23:09:56Z

Previously, the canonicalization step only covered RegionBranchOpInterface
ops. This change extends it to also include BranchOpInterface ops, which
is necessary to clean up block arguments that were replaced with ub.poison
by the dead value removal phase.

Also add a new canonicalization pattern
SimplifyCondBranchBlockArgWithUniformIncomingValues to the cf dialect,
which replaces block argument uses with a uniform incoming value
ub.poison when all predecessors pass the same value. This allows other
canonicalization patterns to remove the block arguments entirely.

Fixes #182263

llvmbot · 2026-02-21T23:10:25Z

@llvm/pr-subscribers-mlir-spirv
@llvm/pr-subscribers-mlir-cf
@llvm/pr-subscribers-mlir
@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-mlir-core

Author: Fedor Nikolaev (felichita)

Changes

The RemoveDeadValues pass was crashing with an assertion failure when processing IR containing gpu.launch operations. The root cause was that gpu.launch was missing an explicit Write memory effect, causing wouldOpBeTriviallyDead to incorrectly consider it dead via recursive analysis of its region.

Fixes #182263

Full diff: https://github.com/llvm/llvm-project/pull/182711.diff

4 Files Affected:

(modified) mlir/include/mlir/Dialect/GPU/IR/GPUOps.td (+1)
(modified) mlir/lib/Analysis/DataFlow/LivenessAnalysis.cpp (+7)
(modified) mlir/lib/Dialect/GPU/IR/GPUDialect.cpp (+6)
(modified) mlir/lib/Transforms/RemoveDeadValues.cpp (+41-10)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 48de1a8bf118e..3515da360e129 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -808,6 +808,7 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
 def GPU_LaunchOp : GPU_Op<"launch", [
       AffineScope, AutomaticAllocationScope, AttrSizedOperandSegments,
       DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
+      DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
       GPU_AsyncOpInterface, RecursiveMemoryEffects]>,
     Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
                Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
diff --git a/mlir/lib/Analysis/DataFlow/LivenessAnalysis.cpp b/mlir/lib/Analysis/DataFlow/LivenessAnalysis.cpp
index 4afc35d23fafa..ec9827b38be39 100644
--- a/mlir/lib/Analysis/DataFlow/LivenessAnalysis.cpp
+++ b/mlir/lib/Analysis/DataFlow/LivenessAnalysis.cpp
@@ -17,6 +17,7 @@
 #include <mlir/IR/Operation.h>
 #include <mlir/IR/Value.h>
 #include <mlir/Interfaces/CallInterfaces.h>
+#include <mlir/Interfaces/FunctionInterfaces.h>
 #include <mlir/Interfaces/SideEffectInterfaces.h>
 #include <mlir/Support/LLVM.h>
 
@@ -237,6 +238,12 @@ RunLivenessAnalysis::RunLivenessAnalysis(Operation *op) {
         for (auto blockArg : llvm::enumerate(block.getArguments())) {
           if (getLiveness(blockArg.value()))
             continue;
+          // Skip block args of ops with regions that are not
+          // RegionBranchOpInterface or FunctionOpInterface
+          // (e.g. gpu.launch) - solver doesn't analyze their regions
+          if (!isa<RegionBranchOpInterface>(op) &&
+              !isa<FunctionOpInterface>(op))
+            continue;
           LDBG() << "Block argument: " << blockArg.index() << " of "
                  << OpWithFlags(op, OpPrintingFlags().skipRegions())
                  << " has no liveness info, mark dead";
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index a66a83b7e3ca1..9822fa29ffe8a 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -942,6 +942,12 @@ static void printSizeAssignment(OpAsmPrinter &p, KernelDim3 size,
   p << size.z << " = " << operands.z << ')';
 }
 
+void LaunchOp::getEffects(
+    SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+        &effects) {
+  effects.emplace_back(MemoryEffects::Write::get());
+}
+
 void LaunchOp::print(OpAsmPrinter &p) {
   if (getAsyncToken()) {
     p << " async";
diff --git a/mlir/lib/Transforms/RemoveDeadValues.cpp b/mlir/lib/Transforms/RemoveDeadValues.cpp
index 12a47ba2fb65a..7874a92012ac1 100644
--- a/mlir/lib/Transforms/RemoveDeadValues.cpp
+++ b/mlir/lib/Transforms/RemoveDeadValues.cpp
@@ -235,6 +235,15 @@ static void processSimpleOp(Operation *op, RunLivenessAnalysis &la,
   // "dead" if it had a side-effecting user that is reachable.
   bool hasDeadOperand =
       markLives(op->getOperands(), nonLiveSet, la).flip().any();
+
+  if (!isMemoryEffectFree(op) || hasLive(op->getResults(), nonLiveSet, la)) {
+    LDBG() << "Simple op is not memory effect free or has live results, "
+              "preserving it: "
+           << OpWithFlags(op,
+                          OpPrintingFlags().skipRegions().printGenericOpForm());
+    return;
+  }
+
   if (hasDeadOperand) {
     LDBG() << "Simple op has dead operands, so the op must be dead: "
            << OpWithFlags(op,
@@ -247,14 +256,6 @@ static void processSimpleOp(Operation *op, RunLivenessAnalysis &la,
     return;
   }
 
-  if (!isMemoryEffectFree(op) || hasLive(op->getResults(), nonLiveSet, la)) {
-    LDBG() << "Simple op is not memory effect free or has live results, "
-              "preserving it: "
-           << OpWithFlags(op,
-                          OpPrintingFlags().skipRegions().printGenericOpForm());
-    return;
-  }
-
   LDBG()
       << "Simple op has all dead results and is memory effect free, scheduling "
          "for removal: "
@@ -511,8 +512,10 @@ static void processBranchOp(BranchOpInterface branchOp, RunLivenessAnalysis &la,
     // Do (2)
     BitVector successorNonLive =
         markLives(operandValues, nonLiveSet, la).flip();
-    collectNonLiveValues(nonLiveSet, successorBlock->getArguments(),
-                         successorNonLive);
+    if (std::distance(successorBlock->pred_begin(),
+                      successorBlock->pred_end()) <= 1)
+      collectNonLiveValues(nonLiveSet, successorBlock->getArguments(),
+                           successorNonLive);
 
     // Do (3)
     cl.blocks.push_back({successorBlock, successorNonLive});
@@ -561,6 +564,7 @@ static void cleanUpDeadVals(MLIRContext *ctx, RDVFinalCleanupList &list) {
   // 1. Blocks, We must remove the block arguments and successor operands before
   // deleting the operation, as they may reside in the region operation.
   LDBG() << "Cleaning up " << list.blocks.size() << " block argument lists";
+  DenseSet<Block *> processedBlocks;
   for (auto &b : list.blocks) {
     // blocks that are accessed via multiple codepaths processed once
     if (b.b->getNumArguments() != b.nonLiveArgs.size())
@@ -573,6 +577,20 @@ static void cleanUpDeadVals(MLIRContext *ctx, RDVFinalCleanupList &list) {
          << OpWithFlags(b.b->getParent()->getParentOp(),
                         OpPrintingFlags().skipRegions().printGenericOpForm());
     });
+    // Skip if already processed
+    if (processedBlocks.count(b.b))
+      continue;
+    // Check if any entry has this block with live args
+    bool hasLiveFromAnyPred = false;
+    for (auto &other : list.blocks) {
+      if (other.b == b.b && other.nonLiveArgs.none()) {
+        hasLiveFromAnyPred = true;
+        break;
+      }
+    }
+    if (hasLiveFromAnyPred)
+      continue;
+    processedBlocks.insert(b.b);
     // Note: Iterate from the end to make sure that that indices of not yet
     // processes arguments do not change.
     for (int i = b.nonLiveArgs.size() - 1; i >= 0; --i) {
@@ -599,6 +617,19 @@ static void cleanUpDeadVals(MLIRContext *ctx, RDVFinalCleanupList &list) {
          << OpWithFlags(op.branch.getOperation(),
                         OpPrintingFlags().skipRegions().printGenericOpForm());
     });
+
+    // Check if any other branch to same block has live operands
+    Block *succBlock = op.branch->getSuccessor(op.successorIndex);
+    bool otherLivePred = false;
+    for (auto &other : list.successorOperands) {
+      Block *otherSucc = other.branch->getSuccessor(other.successorIndex);
+      if (otherSucc == succBlock && other.nonLiveOperands.none()) {
+        otherLivePred = true;
+        break;
+      }
+    }
+    if (otherLivePred)
+      continue;
     // it iterates backwards because erase invalidates all successor indexes
     for (int i = successorOperands.size() - 1; i >= 0; --i) {
       if (!op.nonLiveOperands[i])

felichita · 2026-02-21T23:15:55Z

Still has some underlying issue with poison on bb2 . Any thoughts?

./build/bin/mlir-opt build/rm-dead-values.mlir --remove-dead-values
module {
  func.func @main() -> i64 {
    %c0_i64 = arith.constant 0 : i64
    %c1_i64 = arith.constant 1 : i64
    %0 = arith.addi %c0_i64, %c1_i64 : i64
    %c2_i64 = arith.constant 2 : i64
    %1 = arith.muli %0, %c2_i64 : i64
    %c3_i64 = arith.constant 3 : i64
    %2 = arith.divsi %1, %c3_i64 : i64
    %c4_i64 = arith.constant 4 : i64
    %3 = arith.subi %2, %c4_i64 : i64
    %c5_i64 = arith.constant 5 : i64
    %4 = arith.cmpi slt, %3, %c5_i64 : i64
    cf.cond_br %4, ^bb1, ^bb2
  ^bb1:  // pred: ^bb0
    %c10_i64 = arith.constant 10 : i64
    cf.br ^bb3(%c10_i64 : i64)
  ^bb2:  // pred: ^bb0
    %5 = ub.poison : i64
    cf.br ^bb3(%5 : i64)
  ^bb3(%6: i64):  // 2 preds: ^bb1, ^bb2
    %7 = arith.addi %3, %6 : i64
    return %7 : i64
  }
}

github-actions · 2026-02-21T23:18:40Z

🐧 Linux x64 Test Results

7594 tests passed
603 tests skipped

✅ The build succeeded and all tests passed.

github-actions · 2026-02-21T23:18:40Z

🪟 Windows x64 Test Results

3519 tests passed
416 tests skipped

✅ The build succeeded and all tests passed.

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

matthias-springer

The commit message is outdated. Can you describe in detail what this PR is changing and the reason for the implementation choice?

mlir/lib/Analysis/DataFlow/LivenessAnalysis.cpp

mlir/lib/Transforms/RemoveDeadValues.cpp

mlir/test/Transforms/remove-dead-values.mlir

matthias-springer

I took a brief look at this bug. I believe the general design of the branch op handling is flawed. (Or a the very least overly complex.) I would suggest to adopt a design similar #173505. I.e., do only the bare minimum during remove-dead-values: replace the operands of branch ops with ub.poison. Then rely on the canonicalizer to actually remove the block arguments.

felichita · 2026-02-25T23:31:05Z

@matthias-springer I've done the same design as #173505 - dead successor operands are now replaced with ub.poison instead of being removed directly, and block arguments are left for the canonicalizer to clean up.

The BlockArgsToCleanup and SuccessorOperandsToCleanup structures have been removed, along with the corresponding cleanup logic in cleanUpDeadVals. Existing tests have been updated to reflect the new behavior.

matthias-springer · 2026-02-26T07:18:02Z

mlir/test/Transforms/remove-dead-values.mlir

-  // CHECK: cf.br ^[[BB3:bb[0-9]+]]
-  // CHECK-NOT: i32
+  // CHECK: cf.br ^[[BB3:bb[0-9]+]](%{{.*}}, %{{.*}} : i32, i32)
+  // CHECK-CANONICALIZE: cf.br ^[[BB3:bb[0-9]+]](%{{.*}}, %{{.*}} : i32, i32)


This is more or less what I had in mind, but now we have a regression here.

I think this will some design exploration: It looks like canonicalizer is not able to further simplify the IR. Are we missing some canonicalization patterns? What does IR look before the canonicalizer is run?

E.g., we may be missing pattern for:

^bb0(%arg0: ...) use(%arg0)

If all predecessor values of %arg0 are the same constant (such as ub.poison), materialize a replacement constant as a replacement for %arg0. The block arg can then fold away. (I'm just guessing, I haven't actually looked at the IR before the canonicalizer run.)

The canonicalizer doesn't simplify the IR at all in this case. Unfortunately, the output before and after canonicalization is identical.

func.func @acceptable_ir_has_cleanable_loop_of_conditional_and_branch_op(%arg0: i1) { %0 = ub.poison : i32 cf.br ^bb1(%0 : i32) ^bb1(%1: i32): // 2 preds: ^bb0, ^bb2 %2 = ub.poison : i32 %3 = ub.poison : i32 cf.br ^bb2(%2, %3 : i32, i32) ^bb2(%4: i32, %5: i32): // pred: ^bb1 %6 = ub.poison : i32 %7 = ub.poison : i32 cf.cond_br %arg0, ^bb1(%6 : i32), ^bb3(%7 : i32) ^bb3(%8: i32): // pred: ^bb2 return } }

The existing simplifyBrToBlockWithSinglePred pattern only handles blocks with a single predecessor. There is no pattern that handles the case you described above, when all predecessor values for a block argument are the same ub.poison.

To fix the regression, I would need to add a new canonicalization pattern to ControlFlowOps.cpp that replaces uses of a block argument with the common incoming value...

Exactly, that's what I meant. We do the same for region-branch control flow: the MakeRegionBranchOpSuccessorInputsDead performs that kind of canonicalization for region-based ops.

You picked a kind of tricky issue from the bug tracker here. It took me about a month to implement + merge all this for region-based ops. But on the bright side, you can learn a lot from this issue.

To give some background on RemoveDeadValues: this pass has broken for a long time and we have fixed numerous bugs over the last year. In the beginning, by patching up issues, as you tried in the beginning. But we always ran into more problems.

It turned out that a better design is to keep this pass as minimal as possible. Ideally, the canonicalizer pass would already perform all of these optimizations, but it has only local knowledge about the IR. That's because it cannot perform a dataflow analysis, which would give additional insight about the liveness of values.

The new, improved design is to use RemoveDeadValues to just "inject" the required information into the IR. E.g., by swapping out uses with ub.poison. Afterwards, the canonicalizer pass can take over. (In fact, we apply some canonicalization patterns at the end of this pass.)

Hmm, actually the canonicalizer successfully simplifies the above IR, even without -cse:

func.func @acceptable_ir_has_cleanable_loop_of_conditional_and_branch_op(%arg0: i1) { cf.br ^bb1 ^bb1: // 2 preds: ^bb0, ^bb1 cf.cond_br %arg0, ^bb1, ^bb2 ^bb2: // pred: ^bb1 return }

A few patterns may still be missing. E.g., it does nothing on this IR. (edit: remove-dead-values doesn't do anything with this either.)

func.func @foo(%val: f32, %c:i1) -> f32{ cf.cond_br %c, ^bb0(%val : f32), ^bb1(%val : f32) ^bb0(%arg0: f32): return %arg0: f32 ^bb1(%arg1: f32): return %arg1: f32 }

@joker-eph Running CSE inside the pass would be redundant users who already run CSE in their pipeline would pay the cost twice

Looks like CSE is not necessary here. I ran -canonicalize on the IR that you posted above and it simplified.

You likely just have to update this to also include branching ops.

// Canonicalize all region branch ops. SmallVector<Operation *> opsToCanonicalize; module->walk([&](RegionBranchOpInterface regionBranchOp) { opsToCanonicalize.push_back(regionBranchOp.getOperation()); });

I ended up using applyPatternsGreedily over the module to apply canonicalization patterns for RegionBranchOpInterface and BranchOpInterface ops. I'm not sure this is the best solution, but it works.

I also had to lower the benefit of SimplifyCondBranchBlockArgWithUniformIncomingValues to 0 so it runs last - otherwise it was breaking other canonicalization tests by interfering with SimplifyCondBranchIdenticalSuccessors.

When using applyOpPatternsGreedily on a collected list of ops, the poison block args are not fully simplified in a single pass - multiple canonicalization rounds are needed to remove all unnecessary block args and branches. Switching to applyPatternsGreedily(module, *argv) fixes this and fully simplifies the IR, but I'm hitting a failure in the SPIRV tests that I need to investigate. Apparently, there are plenty func with the same symptoms besides SPIRV

OK, I added two canonicalization in SPIRV test to according to remove-dead-values. Should work then

matthias-springer

The new test cases for canonicalize.mlir already canonicalize without your changes to ControlFlowOps.cpp. Can you provide test cases that show what's new?

Also, let's split the change to ControlFlowOps.cpp into a new PR. Any improvements to the canonicalization are useful even without -remove-dead-values.

mlir/test/Dialect/ControlFlow/canonicalize.mlir

matthias-springer · 2026-02-27T09:36:29Z

mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp

+    }
+
+    if (allSame && commonValue && commonValue != arg) {
+      if (!commonValue.getDefiningOp<ub::PoisonOp>())


We shouldn't hard-code any ops here. If that's necessary, it would indicate a problem with the design to me.

(I'd recommend to start with a pattern that checks for same SSA values. That would already be useful. And then we can iterate from there.)

might be worth adding a matcher for poison constants

I would handle this by extracting a constant as an Attribute. If both Attributes are the same, the SSA values are equal.

@matthias-springer For the constant attribute equality follow-up: when all incoming values match the same constant attribute, what is the best way to materialize the replacement value clone one of the existing constant ops from the IR or use rewriter.createOrFold?

There are possible ways:

Clone one of the existing ops.

Use the constant materializer. (value.getDefiningOp()->getDialect()->materializeConstant(...))

I believe the second option is safer. Cloning could be problematic because the op that defines the constant could have a side effect. Make sure to also account for the case where materializeConstant fails (E.g., because the dialect does not implement constant materialization.)

Add a canonicalization pattern that replaces block arguments with a common SSA value when all predecessors pass the same value for that argument. This allows the block argument to be removed by dead code elimination. Ref llvm#182711

) Add a canonicalization pattern that replaces block arguments with a common SSA value when all predecessors pass the same value for that argument. This allows the block argument to be removed by dead code elimination. First itteration Idea from #182711

joker-eph · 2026-03-04T13:35:07Z

mlir/lib/Transforms/RemoveDeadValues.cpp

-  if (failed(applyOpPatternsGreedily(opsToCanonicalize,
-                                     std::move(owningPatterns)))) {
+  });
+  if (failed(applyPatternsGreedily(module, std::move(owningPatterns)))) {


Why this change where we're not restricting ourselves to the list of pre-defined ops?

The old approach collected a list of ops and applied patterns only to those, but at the same time new canonicalizer needs to propagate across block boundaries to eliminate the corresponding block arguments. applyOpPatternsGreedily on a fixed op list can't do that applyPatternsGreedily on the module allows patterns to canonicalize wherever needed

Can you point to the code where this happens? Looks like a property of the driver unrelated to patterns.

If I do smth like this:

module->walk([&](Operation *op) { if (isa<RegionBranchOpInterface, BranchOpInterface>(op)) opsToCanonicalize.push_back(op); });

then block args will not be removed, only uses will be replaced. I guess it's not right, so I choose module instead specific ops.

Another way, if I add successor block implicitly inside the loop, smth like this:

for (Block *succ : branch->getSuccessors()) if (Operation *term = succ->getTerminator()) opsToCanonicalize.push_back(term);

example mlir where it doesn't work safely without additional bfs

func.func @ex(%cond: i1, %a: i32) -> i32 { cf.cond_br %cond, ^bb1, ^bb2 ^bb1: cf.br ^bb3(%a : i32) // branchOp - in the list ^bb2: cf.br ^bb3(%a : i32) // in the list as well ^bb3(%arg0: i32): // successor - terminator cf.br ^bb4(%arg0 : i32) // will be added as well like a terminator ^bb4(%arg1: i32): // but ^bb4 is not ... return %arg1 : i32 }

The module traversal seemed like prose at first glance, although it was necessary to change the approach. Apparently, there is better way to do it

Your implementation rewrites uses of block arguments, but the canonicalization pattern (to further clean up) is registered on cf.br. You could collect all "affected" branch ops (terminators of predecessor blocks). (Just collecting all branch ops, as you're doing here, would also be fine with me.)

Long term, I think we should remove canonicalization from this pass entirely. Users probably use -remove-dead-values in conjunction with the canonicalizer anyway. (This pass is supposed to clean up things that the canonicalizer cannot see, and after running this pass there may be additional cleanup opportunities for the canonicalizer. So you'd probably want to run the "full" canonicalizer.)

I actually don't really support such a direction: canonicalizer is heavy and costly, if a pass has specific knowledge that can do targeted simplification, I rather do these directly and as cheaply as possible.

Note: using ub ops instead of performing direct simplifications in the IR is already a bit unfortunate from this perspective.

OK, then what about running canonicalization patterns on all RegionBranchOpInterface ops and BranchOpInterface ops (kind of what this PR is doing in its current state)? That's already much more lightweight.

There's one problem with narrowing it down even further to only branch ops that are also somehow related to dropped uses of block arguments. By applying canonicalization, we get a few extra simplifications such as folding pass-through branches. It would be somewhat odd if these simplifications are applied only to basic block from which this pass happened to drop block arguments. (Makes it hard to explain what exactly this pass is doing.)

Note: using ub ops instead of performing direct simplifications in the IR is already a bit unfortunate from this perspective.

Would be nice, but not practical IMO. How many bugs did we have in this pass over the last year? I have given up on that approach, it's too complicated. I'd rather pay the extra compilation cost and have it correct in return...

Interestingly, another thing that removed a lot of complexity (related to region branch ops) was that it's no longer one large piece of code, but 3 smaller patterns that are applied to a fixed-point. With intermediate IR instead of internal C++ data structures / mappings. Definitely more expensive from a compilation time perspective, but also much easier to understand.

My thinking here was, when compilation time turns out to be a problem in your project, you can always implement op-specific cleanup passes that do not query interfaces and do not have to account for the full generality of what RegionBranchOpInterface etc. allow.

Here is a comparison of the three approaches on the same input:

// 1. main branch (old direct removal): func.func @acceptable_ir_has_cleanable_loop_of_conditional_and_branch_op(%arg0: i1) { cf.br ^bb1 ^bb1: // 2 preds: ^bb0, ^bb2 cf.br ^bb2 ^bb2: // pred: ^bb1 cf.cond_br %arg0, ^bb1, ^bb3 ^bb3: // pred: ^bb2 return } // ----- // 2. module-level traversal (this PR): func.func @acceptable_ir_has_cleanable_loop_of_conditional_and_branch_op(%arg0: i1) { cf.br ^bb1 ^bb1: // 2 preds: ^bb0, ^bb1 cf.cond_br %arg0, ^bb1, ^bb2 ^bb2: // pred: ^bb1 return } // ----- // 3. fixed op list (BranchOpInterface + RegionBranchOpInterface only): func.func @acceptable_ir_has_cleanable_loop_of_conditional_and_branch_op(%arg0: i1) { %0 = ub.poison : i32 cf.br ^bb1(%0 : i32) ^bb1(%1: i32): // 2 preds: ^bb0, ^bb1 %2 = ub.poison : i32 %3 = ub.poison : i32 cf.cond_br %arg0, ^bb1(%2 : i32), ^bb2(%3 : i32) ^bb2(%4: i32): // pred: ^bb1 return }

Option 3 is a regression on that way. Are we ok with that, relying on a follow-up --canonicalize to clean it up?

Would be nice, but not practical IMO. How many bugs did we have in this pass over the last year? I have given up on that approach, it's too complicated. I'd rather pay the extra compilation cost and have it correct in return...

To me the wrong approach was applied in the first place, with too many unforeseen holes, and then a whack-a-mole approach of fixing bugs in a very ad-hoc way was taken: that's hardly a convincing argument to me that this cannot just be done by starting from first principles and building on top of it.

My thinking here was, when compilation time turns out to be a problem in your project, you can always implement op-specific cleanup passes that do not query interfaces and do not have to account for the full generality of what RegionBranchOpInterface etc. allow.

That absolutely does not address the issue: when you perform transformation you have direct knowledge of the specific piece of IR that can be simplified. There is no "lightweight cleanup" replacement for this.

Let's agree to disagree. I already stamped the PR, so this good from my perspective, but feel free to keep iterating with @felichita.

…#183966) Add a canonicalization pattern that replaces block arguments with a common SSA value when all predecessors pass the same value for that argument. This allows the block argument to be removed by dead code elimination. First itteration Idea from llvm#182711

mlir/test/Transforms/remove-dead-values.mlir

…#183966) Add a canonicalization pattern that replaces block arguments with a common SSA value when all predecessors pass the same value for that argument. This allows the block argument to be removed by dead code elimination. First itteration Idea from llvm#182711

mlir/test/Transforms/remove-dead-values.mlir

Replace the complex block argument removal logic in processBranchOp with a simpler design based on ub.poison replacement, similar to the approach used for region branch ops. Now, dead successor operands are replaced with ub.poison and block arguments are left intact, relying on the canonicalizer to remove them once all incoming operands are poison. Fixes llvm#182263

felichita requested a review from fabianmcg as a code owner February 21, 2026 23:09

llvmbot added mlir:core MLIR Core Infrastructure mlir:gpu mlir labels Feb 21, 2026

felichita force-pushed the remove-dead-values-nested-region-crash branch from 9140ab3 to 657b854 Compare February 22, 2026 13:37

matthias-springer reviewed Feb 22, 2026

View reviewed changes

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp Outdated Show resolved Hide resolved

matthias-springer requested changes Feb 22, 2026

View reviewed changes

felichita force-pushed the remove-dead-values-nested-region-crash branch from 657b854 to 193924b Compare February 22, 2026 14:47

felichita requested a review from matthias-springer February 22, 2026 14:48

matthias-springer reviewed Feb 23, 2026

View reviewed changes

felichita changed the title ~~[mlir][gpu] Fix crash in RemoveDeadValues pass with gpu.launch ops~~ [mlir][opt][RemoveDeadValues] Fix crash when processing ops with regions that don't implement RegionBranchOpInterface Feb 23, 2026

felichita force-pushed the remove-dead-values-nested-region-crash branch from 193924b to c0187d0 Compare February 23, 2026 10:17

felichita requested a review from matthias-springer February 23, 2026 10:22

joker-eph reviewed Feb 23, 2026

View reviewed changes

mlir/lib/Transforms/RemoveDeadValues.cpp Outdated Show resolved Hide resolved

felichita force-pushed the remove-dead-values-nested-region-crash branch 2 times, most recently from 4abe4e0 to 9514cd5 Compare February 23, 2026 16:42

felichita requested a review from joker-eph February 23, 2026 16:57

felichita force-pushed the remove-dead-values-nested-region-crash branch from 9514cd5 to 3e615e5 Compare February 23, 2026 20:33

matthias-springer reviewed Feb 25, 2026

View reviewed changes

mlir/test/Transforms/remove-dead-values.mlir Outdated Show resolved Hide resolved

matthias-springer reviewed Feb 25, 2026

View reviewed changes

felichita force-pushed the remove-dead-values-nested-region-crash branch from 3e615e5 to c59511f Compare February 25, 2026 23:24

felichita requested a review from matthias-springer February 25, 2026 23:25

felichita changed the title ~~[mlir][opt][RemoveDeadValues] Fix crash when processing ops with regions that don't implement RegionBranchOpInterface~~ [mlir][RemoveDeadValues] Simplify branch op handling using ub.poison Feb 25, 2026

matthias-springer reviewed Feb 26, 2026

View reviewed changes

felichita force-pushed the remove-dead-values-nested-region-crash branch from c59511f to 3994508 Compare February 26, 2026 14:58

llvmbot added the mlir:cf label Feb 26, 2026

felichita force-pushed the remove-dead-values-nested-region-crash branch from 3994508 to 43e539f Compare February 26, 2026 16:34

felichita requested review from antiagainst and kuhar as code owners February 26, 2026 16:34

llvmbot added the mlir:spirv label Feb 26, 2026

felichita requested a review from matthias-springer February 26, 2026 16:50

matthias-springer reviewed Feb 27, 2026

View reviewed changes

mlir/test/Dialect/ControlFlow/canonicalize.mlir Outdated Show resolved Hide resolved

mlir/test/Dialect/ControlFlow/canonicalize.mlir Outdated Show resolved Hide resolved

matthias-springer reviewed Feb 27, 2026

View reviewed changes

felichita mentioned this pull request Feb 28, 2026

[mlir][cf] Canonicalize block args with uniform incoming values #183966

Merged

felichita force-pushed the remove-dead-values-nested-region-crash branch from 43e539f to d8486e1 Compare March 4, 2026 13:31

joker-eph reviewed Mar 4, 2026

View reviewed changes

matthias-springer approved these changes Mar 4, 2026

View reviewed changes

mlir/test/Transforms/remove-dead-values.mlir Outdated Show resolved Hide resolved

felichita force-pushed the remove-dead-values-nested-region-crash branch from d8486e1 to 4624c48 Compare March 5, 2026 13:30

felichita requested a review from matthias-springer March 5, 2026 13:31

matthias-springer approved these changes Mar 6, 2026

View reviewed changes

mlir/test/Transforms/remove-dead-values.mlir Show resolved Hide resolved

mlir/test/Transforms/remove-dead-values.mlir Outdated Show resolved Hide resolved

felichita force-pushed the remove-dead-values-nested-region-crash branch from 4624c48 to f7b395c Compare March 6, 2026 11:11

Conversation

felichita commented Feb 21, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Feb 21, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

felichita commented Feb 21, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Feb 21, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🐧 Linux x64 Test Results

Uh oh!

github-actions bot commented Feb 21, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🪟 Windows x64 Test Results

Uh oh!

Uh oh!

matthias-springer left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

matthias-springer left a comment

Choose a reason for hiding this comment

Uh oh!

felichita commented Feb 25, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

matthias-springer Feb 26, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

matthias-springer Feb 26, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

felichita Feb 26, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

matthias-springer left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

matthias-springer Feb 27, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

matthias-springer Mar 4, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

joker-eph Mar 4, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

felichita commented Feb 21, 2026 •

edited

Loading

llvmbot commented Feb 21, 2026 •

edited

Loading

felichita commented Feb 21, 2026 •

edited

Loading

github-actions bot commented Feb 21, 2026 •

edited

Loading

github-actions bot commented Feb 21, 2026 •

edited

Loading

felichita commented Feb 25, 2026 •

edited

Loading

matthias-springer Feb 26, 2026 •

edited

Loading

matthias-springer Feb 26, 2026 •

edited

Loading

felichita Feb 26, 2026 •

edited

Loading

matthias-springer Feb 27, 2026 •

edited

Loading

matthias-springer Mar 4, 2026 •

edited

Loading

joker-eph Mar 4, 2026 •

edited

Loading

matthias-springer Mar 4, 2026 •

edited

Loading

joker-eph Mar 4, 2026 •

edited

Loading

matthias-springer Mar 4, 2026 •

edited

Loading

matthias-springer Mar 5, 2026 •

edited

Loading