[llvm][instcombine] Add Missed Optimization for Folding Min Max intrinsic into PHI instruction #84619

PeterChou1 · 2024-03-09T09:41:08Z

Resolves: #84342

This implementation heavily borrows from foldICmpWithMinMax in InstCombineCompares.cpp

This is also one my first non-doc pull request for llvm so apologies in advance if the code looks bad.

…ics in phi instcombine pass

…i instructions

llvmbot · 2024-03-09T09:41:38Z

@llvm/pr-subscribers-llvm-transforms

Author: None (PeterChou1)

Changes

Resolves: #84342

This implementation heavily borrows from foldICmpWithMinMax in InstCombineCompares.cpp

This is also one my first non-doc pull request for llvm so apologies in advance if the code looks bad.

Full diff: https://github.com/llvm/llvm-project/pull/84619.diff

3 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineInternal.h (+3)
(modified) llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp (+91)
(added) llvm/test/Transforms/InstCombine/fold-phi-minmax.ll (+314)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 57148d719d9b61..c9221dae533346 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -624,6 +624,9 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Instruction *foldPHIArgLoadIntoPHI(PHINode &PN);
   Instruction *foldPHIArgZextsIntoPHI(PHINode &PN);
   Instruction *foldPHIArgIntToPtrToPHI(PHINode &PN);
+  Instruction *foldPHIWithMinMax(PHINode &PN);
+  Instruction *foldPHIWithMinMaxHelper(PHINode &PN, Instruction *I, Value *Z,
+                                       ICmpInst::Predicate Pred);
 
   /// If an integer typed PHI has only one use which is an IntToPtr operation,
   /// replace the PHI with an existing pointer typed PHI if it exists. Otherwise
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 46bca4b722a03a..71f8cbeb9a9dbd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -611,6 +611,94 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
   return NewGEP;
 }
 
+/// helper function for foldPHIWithMinMax
+Instruction *
+InstCombinerImpl::foldPHIWithMinMaxHelper(PHINode &PN, Instruction *I, Value *Z,
+                                          ICmpInst::Predicate Pred) {
+
+  auto IsCondKnownTrue = [](Value *Val) -> std::optional<bool> {
+    if (!Val)
+      return std::nullopt;
+    if (match(Val, m_One()))
+      return true;
+    if (match(Val, m_Zero()))
+      return false;
+    return std::nullopt;
+  };
+
+  ICmpInst::Predicate SwappedPred =
+      ICmpInst::getNonStrictPredicate(ICmpInst::getSwappedPredicate(Pred));
+  for (unsigned OpNum = 0; OpNum != PN.getNumIncomingValues(); ++OpNum) {
+    if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(PN.getIncomingValue(OpNum))) {
+      if (Pred != MinMax->getPredicate())
+        continue;
+
+      Value *X = MinMax->getLHS();
+      Value *Y = MinMax->getRHS();
+
+      SimplifyQuery Q = SQ.getWithInstruction(I);
+
+      auto CmpXZ = IsCondKnownTrue(simplifyICmpInst(SwappedPred, X, Z, Q));
+      auto CmpYZ = IsCondKnownTrue(simplifyICmpInst(SwappedPred, Y, Z, Q));
+
+      if (!CmpXZ.has_value() && !CmpYZ.has_value())
+        continue;
+      if (CmpXZ.has_value() && CmpYZ.has_value())
+        continue;
+
+      // swap XZ with YZ so XZ always has value
+      if (!CmpXZ.has_value()) {
+        std::swap(X, Y);
+        std::swap(CmpXZ, CmpYZ);
+      }
+
+      switch (Pred) {
+      case ICmpInst::ICMP_SLT:
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_SGT:
+      case ICmpInst::ICMP_UGT:
+        // if X >= Z
+        // %min = llvm.min ( X, Y )
+        // %phi = phi %min ...         =>  %phi = phi Y ..
+        // %cmp = icmp lt %phi, Z          %cmp = icmp %phi, Z
+        // if X <= Z
+        // %max = llvm.max ( X, Y )
+        // %phi = phi %max ...         =>  %phi = phi Y ..
+        // %cmp = icmp gt %phi, Z          %cmp = icmp %phi, Z
+        if (CmpXZ.value()) {
+          if (MinMax->hasOneUse()) {
+            MinMax->eraseFromParent();
+          }
+          PN.setIncomingValue(OpNum, Y);
+        }
+        break;
+      default:
+        break;
+      }
+    }
+  }
+  return nullptr;
+}
+
+/// Fold max min intrinsic into PHI instruction
+Instruction *InstCombinerImpl::foldPHIWithMinMax(PHINode &PN) {
+
+  if (!PN.hasOneUse())
+    return nullptr;
+
+  // The PHI instruction must only be used by a ICmp Instruction
+  if (ICmpInst *ICmp = dyn_cast<ICmpInst>(PN.getUniqueUndroppableUser())) {
+    Value *Op0 = ICmp->getOperand(0), *Op1 = ICmp->getOperand(1);
+    // case 1: icmp <op> %phi, %intrinsic
+    if (isa<PHINode>(Op0))
+      return foldPHIWithMinMaxHelper(PN, ICmp, Op1, ICmp->getPredicate());
+    // case 2: icmp <op> %intrinsic, %phi
+    else if (isa<PHINode>(Op1))
+      return foldPHIWithMinMaxHelper(PN, ICmp, Op0, ICmp->getSwappedPredicate());
+  }
+  return nullptr;
+}
+
 /// Return true if we know that it is safe to sink the load out of the block
 /// that defines it. This means that it must be obvious the value of the load is
 /// not changed from the point of the load to the end of the block it is in.
@@ -1651,5 +1739,8 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
   if (Value *Res = foldDependentIVs(PN, Builder))
     return replaceInstUsesWith(PN, Res);
 
+  if (Instruction *Result = foldPHIWithMinMax(PN))
+    return Result;
+
   return nullptr;
 }
diff --git a/llvm/test/Transforms/InstCombine/fold-phi-minmax.ll b/llvm/test/Transforms/InstCombine/fold-phi-minmax.ll
new file mode 100644
index 00000000000000..55b464cdc58248
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-phi-minmax.ll
@@ -0,0 +1,314 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+
+; test phi combine less than (equal)
+define i1 @src0(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  br i1 %c, label %then, label %loop
+then:
+  %min = call i32 @llvm.umin.i32(i32 6, i32 %a)
+  br label %loop
+loop:
+  %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+  %cmp = icmp ult i32 %ind, 6
+  ret i1 %cmp
+}
+
+; test phi combine less than (swapped) good
+define i1 @src1(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[IND]], 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  br i1 %c, label %then, label %loop
+then:
+  %min = call i32 @llvm.umax.i32(i32 6, i32 %a)
+  br label %loop
+loop:
+  %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+  %cmp = icmp ult i32 6, %ind
+  ret i1 %cmp
+}
+
+; test phi combine less than (swapped) bad
+define i1 @src2(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[MIN:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 6)
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ [[MIN]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  br i1 %c, label %then, label %loop
+then:
+  %min = call i32 @llvm.umax.i32(i32 6, i32 %a)
+  br label %loop
+loop:
+  %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+  %cmp = icmp ugt i32 6, %ind
+  ret i1 %cmp
+}
+
+
+; test phi combine less than (reversed)
+define i1 @src3(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  br i1 %c, label %then, label %loop
+then:
+  %min = call i32 @llvm.umin.i32(i32 %a, i32 6)
+  br label %loop
+
+loop:
+  %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+  %cmp = icmp ult i32 %ind, 6
+  ret i1 %cmp
+}
+
+; test phi combine less than (over)
+define i1 @src4(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  br i1 %c, label %then, label %loop
+then:
+  %min = call i32 @llvm.umin.i32(i32 %a, i32 7)
+  br label %loop
+
+loop:
+  %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+  %cmp = icmp ult i32 %ind, 6
+  ret i1 %cmp
+}
+
+; test phi combine less than (under)
+define i1 @src5(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 5)
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ [[MIN]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  br i1 %c, label %then, label %loop
+then:
+  %min = call i32 @llvm.umin.i32(i32 %a, i32 5)
+  br label %loop
+
+loop:
+  %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+  %cmp = icmp ult i32 %ind, 6
+  ret i1 %cmp
+}
+
+; test phi combine greater than (equal)
+define i1 @src6(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[IND]], 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  br i1 %c, label %then, label %loop
+then:
+  %min = call i32 @llvm.umax.i32(i32 %a, i32 6)
+  br label %loop
+
+loop:
+  %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+  %cmp = icmp ugt i32 %ind, 6
+  ret i1 %cmp
+}
+
+; test phi combine greater than (over)
+define i1 @src7(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[MIN:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 7)
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ [[MIN]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[IND]], 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  br i1 %c, label %then, label %loop
+then:
+  %min = call i32 @llvm.umax.i32(i32 %a, i32 7)
+  br label %loop
+
+loop:
+  %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+  %cmp = icmp ugt i32 %ind, 6
+  ret i1 %cmp
+}
+
+; test phi combine greater than (under)
+define i1 @src8(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[IND]], 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  br i1 %c, label %then, label %loop
+then:
+  %min = call i32 @llvm.umax.i32(i32 %a, i32 5)
+  br label %loop
+
+loop:
+  %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+  %cmp = icmp ugt i32 %ind, 6
+  ret i1 %cmp
+}
+
+; test phi combine greater than (swapped-equal) good
+define i1 @src9(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src9(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  br i1 %c, label %then, label %loop
+then:
+  %min = call i32 @llvm.umin.i32(i32 %a, i32 6)
+  br label %loop
+loop:
+  %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+  %cmp = icmp ugt i32 6, %ind
+  ret i1 %cmp
+}
+
+; test phi combine greater than (swapped-over) good
+define i1 @src11(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ [[A:%.*]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  br i1 %c, label %then, label %loop
+then:
+  %min = call i32 @llvm.umin.i32(i32 %a, i32 7)
+  br label %loop
+loop:
+  %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+  %cmp = icmp ugt i32 6, %ind
+  ret i1 %cmp
+}
+
+; test phi combine greater than (swapped-under) bad
+define i1 @src12(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src12(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 5)
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ [[MIN]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IND]], 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  br i1 %c, label %then, label %loop
+then:
+  %min = call i32 @llvm.umin.i32(i32 %a, i32 5)
+  br label %loop
+loop:
+  %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+  %cmp = icmp ugt i32 6, %ind
+  ret i1 %cmp
+}
+
+; test phi combine less than (swapped-equal) bad
+define i1 @src13(i32 %a, i32 %b, i1 %c) #0 {
+; CHECK-LABEL: @src13(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[LOOP:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 6)
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ [[MIN]], [[THEN]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[IND]], 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  br i1 %c, label %then, label %loop
+then:
+  %min = call i32 @llvm.umin.i32(i32 %a, i32 6)
+  br label %loop
+loop:
+  %ind = phi i32 [ %min, %then ], [ %b, %entry ]
+  %cmp = icmp ult i32 6, %ind
+  ret i1 %cmp
+}

github-actions · 2024-03-09T09:43:30Z

✅ With the latest revision this PR passed the C/C++ code formatter.

dtcxzyw

Could you please provide the alive2 proof for general cases?

llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp

llvm/test/Transforms/InstCombine/fold-phi-minmax.ll

PeterChou1 · 2024-03-11T14:48:53Z

Could you please provide the alive2 proof for general cases?

Sure, I've construct alive2 proofs for every case in the optimization below

min intrinsic optimization only works if x >= z

%min = llvm.umin ( X, Y )
%phi = phi %min ...         =>  %phi = phi Y ..
%cmp = icmp ult %phi, Z          %cmp = icmp ult %phi, Z

case 1. x = z should optimize: https://alive2.llvm.org/ce/z/IAXE_-
case 2. x > z should optimize: https://alive2.llvm.org/ce/z/cSqDmr
case 3. x < z should NOT optimize: https://alive2.llvm.org/ce/z/7r0b4A

swapped min intrinsic optimization only works if x >= z

%min = llvm.umin ( X, Y )
%phi = phi %min ...             =>  %phi = phi Y ..
%cmp = icmp ugt Z, %phi          %cmp = icmp ugt %phi, Z

case 5. x = z should optimize: https://alive2.llvm.org/ce/z/8RZEVx
case 6. x > z should optimize: https://alive2.llvm.org/ce/z/8cgUAV
case 7. x < z should NOT optimize: https://alive2.llvm.org/ce/z/S_f5bU

max intrinsic optimization only works if x <= z

%max = llvm.umax ( X, Y )
%phi = phi %max ...         =>  %phi = phi Y ..
%cmp = icmp ugt %phi, Z          %cmp = icmp ugt %phi, Z

case 8. x = z should optimize: https://alive2.llvm.org/ce/z/QYFiRR
case 9. x < z should optimize: https://alive2.llvm.org/ce/z/djpU8u
case 10. x > z should NOT optimize: https://alive2.llvm.org/ce/z/2S3seD

swapped max intrinsic optimization only works if x <= z

%max = llvm.umax ( X, Y )
%phi  = phi %max ...         =>   %phi = phi Y ..
%cmp = icmp ult Z, %phi          %cmp = icmp ult Z, %phi

case 11. x = z should optimize: https://alive2.llvm.org/ce/z/B7VRzx
case 12. x < z should optimize: https://alive2.llvm.org/ce/z/5UuFHF
case 13. x > z should NOT optimize: https://alive2.llvm.org/ce/z/wAwfex

These proofs generalize over smin, and smax intrinsics as well, but I will omit them for brevity

llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp

…tCombineCompares

PeterChou1 · 2024-03-11T18:17:11Z

@dtcxzyw @nikic @goldsteinn

I think I've accounted for most of changes requested let me know if anything else changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

goldsteinn · 2024-03-11T18:27:46Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+      if (CmpXZ.has_value() && CmpYZ.has_value()) {
+        auto CmpXY = IsCondKnownTrue(simplifyICmpInst(SwappedPred, X, Y, Q));
+        if (!CmpXY.has_value())
+          continue;


Do you need to continue here?
I would think you could just ignore if both have a value and do you simplification of X/Z.
if Y/Z relationship is known, that will simplify later.

I see so we can revert to how it was implemented before. I assume some other pass will probably take care of it?

The point is that irrelevant of the ordering of X/Y, if X >= Z then min(X, Y) < Z -> Y < Z. Likewise for max.

Also I think if we have an ordering for X/Y, the min/max will have simplified.

Apologies I think I've misunderstood your comment. I think you're suggesting to remove the code block

if (!CmpXY.has_value()) continue;

and just use X/Z to simplify since Y/Z relationship will be simplified later. I'm just failing to see how the Y/Z relationship will be simplified

PeterChou1 added 2 commits March 9, 2024 01:45

[llvm][instcombine] adds missed fold optimization for min max intrins…

93d3293

…ics in phi instcombine pass

[llvm][instcombine] added test for folding min/max intrinsics into ph…

d2798ee

…i instructions

PeterChou1 requested a review from nikic as a code owner March 9, 2024 09:41

llvmbot added the llvm:transforms label Mar 9, 2024

PeterChou1 changed the title ~~Instcombinephi~~ [llvm][instcombine] Add Missed Optimization for Folding Min Max intrinsic into PHI instruction Mar 9, 2024

[llvm][instcombine] format file for InstCombinePHI

6de82a7

nikic requested review from dtcxzyw and goldsteinn March 9, 2024 10:22

dtcxzyw requested changes Mar 9, 2024

View reviewed changes

goldsteinn reviewed Mar 11, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp Outdated Show resolved Hide resolved

goldsteinn reviewed Mar 11, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp Outdated Show resolved Hide resolved

[llvm][instcombine] refactor optimization for folding PHI ICmp to Ins…

4156401

…tCombineCompares

PeterChou1 force-pushed the instcombinephi branch from 6e36fbc to 4156401 Compare March 11, 2024 18:08

dtcxzyw reviewed Mar 11, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp Outdated Show resolved Hide resolved

goldsteinn reviewed Mar 11, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp Show resolved Hide resolved

goldsteinn reviewed Mar 11, 2024

View reviewed changes

[llvm][InstCombine] clang-tidy clean up for foldICmpPHIWithMinMax

a769f7b

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[llvm][instcombine] Add Missed Optimization for Folding Min Max intrinsic into PHI instruction #84619

[llvm][instcombine] Add Missed Optimization for Folding Min Max intrinsic into PHI instruction #84619

PeterChou1 commented Mar 9, 2024

llvmbot commented Mar 9, 2024

github-actions bot commented Mar 9, 2024 •

edited

dtcxzyw left a comment

PeterChou1 commented Mar 11, 2024

PeterChou1 commented Mar 11, 2024

goldsteinn Mar 11, 2024

PeterChou1 Mar 11, 2024

goldsteinn Mar 11, 2024

goldsteinn Mar 11, 2024

PeterChou1 Mar 12, 2024

[llvm][instcombine] Add Missed Optimization for Folding Min Max intrinsic into PHI instruction #84619

Are you sure you want to change the base?

[llvm][instcombine] Add Missed Optimization for Folding Min Max intrinsic into PHI instruction #84619

Conversation

PeterChou1 commented Mar 9, 2024

llvmbot commented Mar 9, 2024

github-actions bot commented Mar 9, 2024 • edited

dtcxzyw left a comment

Choose a reason for hiding this comment

PeterChou1 commented Mar 11, 2024

PeterChou1 commented Mar 11, 2024

goldsteinn Mar 11, 2024

Choose a reason for hiding this comment

PeterChou1 Mar 11, 2024

Choose a reason for hiding this comment

goldsteinn Mar 11, 2024

Choose a reason for hiding this comment

goldsteinn Mar 11, 2024

Choose a reason for hiding this comment

PeterChou1 Mar 12, 2024

Choose a reason for hiding this comment

github-actions bot commented Mar 9, 2024 •

edited