-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[llvm][instcombine] Add Missed Optimization for Folding Min Max intrinsic into PHI instruction #84619
base: main
Are you sure you want to change the base?
Conversation
…ics in phi instcombine pass
@llvm/pr-subscribers-llvm-transforms Author: None (PeterChou1) ChangesResolves: #84342 This implementation heavily borrows from foldICmpWithMinMax in InstCombineCompares.cpp This is also one my first non-doc pull request for llvm so apologies in advance if the code looks bad. Full diff: https://github.com/llvm/llvm-project/pull/84619.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 57148d719d9b61..c9221dae533346 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -624,6 +624,9 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Instruction *foldPHIArgLoadIntoPHI(PHINode &PN);
Instruction *foldPHIArgZextsIntoPHI(PHINode &PN);
Instruction *foldPHIArgIntToPtrToPHI(PHINode &PN);
+ Instruction *foldPHIWithMinMax(PHINode &PN);
+ Instruction *foldPHIWithMinMaxHelper(PHINode &PN, Instruction *I, Value *Z,
+ ICmpInst::Predicate Pred);
/// If an integer typed PHI has only one use which is an IntToPtr operation,
/// replace the PHI with an existing pointer typed PHI if it exists. Otherwise
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 46bca4b722a03a..71f8cbeb9a9dbd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -611,6 +611,94 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
return NewGEP;
}
+/// helper function for foldPHIWithMinMax
+Instruction *
+InstCombinerImpl::foldPHIWithMinMaxHelper(PHINode &PN, Instruction *I, Value *Z,
+ ICmpInst::Predicate Pred) {
+
+ auto IsCondKnownTrue = [](Value *Val) -> std::optional<bool> {
+ if (!Val)
+ return std::nullopt;
+ if (match(Val, m_One()))
+ return true;
+ if (match(Val, m_Zero()))
+ return false;
+ return std::nullopt;
+ };
+
+ ICmpInst::Predicate SwappedPred =
+ ICmpInst::getNonStrictPredicate(ICmpInst::getSwappedPredicate(Pred));
+ for (unsigned OpNum = 0; OpNum != PN.getNumIncomingValues(); ++OpNum) {
+ if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(PN.getIncomingValue(OpNum))) {
+ if (Pred != MinMax->getPredicate())
+ continue;
+
+ Value *X = MinMax->getLHS();
+ Value *Y = MinMax->getRHS();
+
+ SimplifyQuery Q = SQ.getWithInstruction(I);
+
+ auto CmpXZ = IsCondKnownTrue(simplifyICmpInst(SwappedPred, X, Z, Q));
+ auto CmpYZ = IsCondKnownTrue(simplifyICmpInst(SwappedPred, Y, Z, Q));
+
+ if (!CmpXZ.has_value() && !CmpYZ.has_value())
+ continue;
+ if (CmpXZ.has_value() && CmpYZ.has_value())
+ continue;
+
+ // swap XZ with YZ so XZ always has value
+ if (!CmpXZ.has_value()) {
+ std::swap(X, Y);
+ std::swap(CmpXZ, CmpYZ);
+ }
+
+ switch (Pred) {
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT:
+ // if X >= Z
+ // %min = llvm.min ( X, Y )
+ // %phi = phi %min ... => %phi = phi Y ..
+ // %cmp = icmp lt %phi, Z %cmp = icmp %phi, Z
+ // if X <= Z
+ // %max = llvm.max ( X, Y )
+ // %phi = phi %max ... => %phi = phi Y ..
+ // %cmp = icmp gt %phi, Z %cmp = icmp %phi, Z
+ if (CmpXZ.value()) {
+ if (MinMax->hasOneUse()) {
+ MinMax->eraseFromParent();
+ }
+ PN.setIncomingValue(OpNum, Y);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ return nullptr;
+}
+
+/// Fold max min intrinsic into PHI instruction
+Instruction *InstCombinerImpl::foldPHIWithMinMax(PHINode &PN) {
+
+ if (!PN.hasOneUse())
+ return nullptr;
+
+ // The PHI instruction must only be used by a ICmp Instruction
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(PN.getUniqueUndroppableUser())) {
+ Value *Op0 = ICmp->getOperand(0), *Op1 = ICmp->getOperand(1);
+ // case 1: icmp <op> %phi, %intrinsic
+ if (isa<PHINode>(Op0))
+ return foldPHIWithMinMaxHelper(PN, ICmp, Op1, ICmp->getPredicate());
+ // case 2: icmp <op> %intrinsic, %phi
+ else if (isa<PHINode>(Op1))
+ return foldPHIWithMinMaxHelper(PN, ICmp, Op0, ICmp->getSwappedPredicate());
+ }
+ return nullptr;
+}
+
/// Return true if we know that it is safe to sink the load out of the block
/// that defines it. This means that it must be obvious the value of the load is
/// not changed from the point of the load to the end of the block it is in.
@@ -1651,5 +1739,8 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
if (Value *Res = foldDependentIVs(PN, Builder))
return replaceInstUsesWith(PN, Res);
+ if (Instruction *Result = foldPHIWithMinMax(PN))
+ return Result;
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/fold-phi-minmax.ll b/llvm/test/Transforms/InstCombine/fold-phi-minmax.ll
new file mode 100644
index 00000000000000..55b464cdc58248
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-phi-minmax.ll
@@ -0,0 +1,314 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+
+; test phi combine less than (equal)
+define i1 @src0(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK: then:
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: loop:
+; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ br i1 %c, label %then, label %loop
+then:
+ %min = call i32 @llvm.umin.i32(i32 6, i32 %a)
+ br label %loop
+loop:
+ %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+ %cmp = icmp ult i32 %ind, 6
+ ret i1 %cmp
+}
+
+; test phi combine less than (swapped) good
+define i1 @src1(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK: then:
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: loop:
+; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[IND]], 6
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ br i1 %c, label %then, label %loop
+then:
+ %min = call i32 @llvm.umax.i32(i32 6, i32 %a)
+ br label %loop
+loop:
+ %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+ %cmp = icmp ult i32 6, %ind
+ ret i1 %cmp
+}
+
+; test phi combine less than (swapped) bad
+define i1 @src2(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 6)
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: loop:
+; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[MIN]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ br i1 %c, label %then, label %loop
+then:
+ %min = call i32 @llvm.umax.i32(i32 6, i32 %a)
+ br label %loop
+loop:
+ %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+ %cmp = icmp ugt i32 6, %ind
+ ret i1 %cmp
+}
+
+
+; test phi combine less than (reversed)
+define i1 @src3(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK: then:
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: loop:
+; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ br i1 %c, label %then, label %loop
+then:
+ %min = call i32 @llvm.umin.i32(i32 %a, i32 6)
+ br label %loop
+
+loop:
+ %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+ %cmp = icmp ult i32 %ind, 6
+ ret i1 %cmp
+}
+
+; test phi combine less than (over)
+define i1 @src4(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK: then:
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: loop:
+; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ br i1 %c, label %then, label %loop
+then:
+ %min = call i32 @llvm.umin.i32(i32 %a, i32 7)
+ br label %loop
+
+loop:
+ %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+ %cmp = icmp ult i32 %ind, 6
+ ret i1 %cmp
+}
+
+; test phi combine less than (under)
+define i1 @src5(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src5(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 5)
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: loop:
+; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[MIN]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ br i1 %c, label %then, label %loop
+then:
+ %min = call i32 @llvm.umin.i32(i32 %a, i32 5)
+ br label %loop
+
+loop:
+ %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+ %cmp = icmp ult i32 %ind, 6
+ ret i1 %cmp
+}
+
+; test phi combine greater than (equal)
+define i1 @src6(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src6(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK: then:
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: loop:
+; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[IND]], 6
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ br i1 %c, label %then, label %loop
+then:
+ %min = call i32 @llvm.umax.i32(i32 %a, i32 6)
+ br label %loop
+
+loop:
+ %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+ %cmp = icmp ugt i32 %ind, 6
+ ret i1 %cmp
+}
+
+; test phi combine greater than (over)
+define i1 @src7(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src7(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 7)
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: loop:
+; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[MIN]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[IND]], 6
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ br i1 %c, label %then, label %loop
+then:
+ %min = call i32 @llvm.umax.i32(i32 %a, i32 7)
+ br label %loop
+
+loop:
+ %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+ %cmp = icmp ugt i32 %ind, 6
+ ret i1 %cmp
+}
+
+; test phi combine greater than (under)
+define i1 @src8(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src8(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK: then:
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: loop:
+; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[IND]], 6
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ br i1 %c, label %then, label %loop
+then:
+ %min = call i32 @llvm.umax.i32(i32 %a, i32 5)
+ br label %loop
+
+loop:
+ %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+ %cmp = icmp ugt i32 %ind, 6
+ ret i1 %cmp
+}
+
+; test phi combine greater than (swapped-equal) good
+define i1 @src9(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src9(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK: then:
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: loop:
+; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ br i1 %c, label %then, label %loop
+then:
+ %min = call i32 @llvm.umin.i32(i32 %a, i32 6)
+ br label %loop
+loop:
+ %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+ %cmp = icmp ugt i32 6, %ind
+ ret i1 %cmp
+}
+
+; test phi combine greater than (swapped-over) good
+define i1 @src11(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK: then:
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: loop:
+; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ br i1 %c, label %then, label %loop
+then:
+ %min = call i32 @llvm.umin.i32(i32 %a, i32 7)
+ br label %loop
+loop:
+ %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+ %cmp = icmp ugt i32 6, %ind
+ ret i1 %cmp
+}
+
+; test phi combine greater than (swapped-under) bad
+define i1 @src12(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src12(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 5)
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: loop:
+; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[MIN]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ br i1 %c, label %then, label %loop
+then:
+ %min = call i32 @llvm.umin.i32(i32 %a, i32 5)
+ br label %loop
+loop:
+ %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+ %cmp = icmp ugt i32 6, %ind
+ ret i1 %cmp
+}
+
+; test phi combine less than (swapped-equal) bad
+define i1 @src13(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src13(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 6)
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: loop:
+; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[MIN]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[IND]], 6
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ br i1 %c, label %then, label %loop
+then:
+ %min = call i32 @llvm.umin.i32(i32 %a, i32 6)
+ br label %loop
+loop:
+ %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+ %cmp = icmp ult i32 6, %ind
+ ret i1 %cmp
+}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please provide the alive2 proof for general cases?
Sure, I've construct alive2 proofs for every case in the optimization below min intrinsic optimization only works if x >= z
case 1. x = z should optimize: https://alive2.llvm.org/ce/z/IAXE_- swapped min intrinsic optimization only works if x >= z
case 5. x = z should optimize: https://alive2.llvm.org/ce/z/8RZEVx max intrinsic optimization only works if x <= z
case 8. x = z should optimize: https://alive2.llvm.org/ce/z/QYFiRR swapped max intrinsic optimization only works if x <= z
case 11. x = z should optimize: https://alive2.llvm.org/ce/z/B7VRzx These proofs generalize over smin, and smax intrinsics as well, but I will omit them for brevity |
6e36fbc
to
4156401
Compare
I think I've accounted for most of changes requested let me know if anything else changed |
if (CmpXZ.has_value() && CmpYZ.has_value()) { | ||
auto CmpXY = IsCondKnownTrue(simplifyICmpInst(SwappedPred, X, Y, Q)); | ||
if (!CmpXY.has_value()) | ||
continue; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you need to continue here?
I would think you could just ignore if both have a value and do you simplification of X
/Z
.
if Y
/Z
relationship is known, that will simplify later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see so we can revert to how it was implemented before. I assume some other pass will probably take care of it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The point is that irrelevant of the ordering of X
/Y
, if X >= Z
then min(X, Y) < Z
-> Y < Z
. Likewise for max.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also I think if we have an ordering for X
/Y
, the min
/max
will have simplified.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Apologies I think I've misunderstood your comment. I think you're suggesting to remove the code block
if (!CmpXY.has_value())
continue;
and just use X/Z to simplify since Y/Z relationship will be simplified later. I'm just failing to see how the Y/Z relationship will be simplified
Resolves: #84342
This implementation heavily borrows from foldICmpWithMinMax in InstCombineCompares.cpp
This is also one my first non-doc pull request for llvm so apologies in advance if the code looks bad.