Skip to content

Conversation

@ParkHanbum
Copy link
Contributor

This patch implements a fold for the pattern
icmp slt (sub nsw %x, umin(%x, %c)), %c2.

Typically, the sub nsw ... umin sequence is canonicalized to
@llvm.usub.sat. However, that transformation discards the nsw
constraint, which provides valuable range information
(guaranteeing no signed underflow). By detecting this pattern before
canonicalization, we can merge the range check into a single
comparison

This optimization is valid under the following constraints:

  • %c is non-negative (C >= 0).
  • %c2 is strictly positive (C2 > 0).
  • The sum %c + %c2 does not cause a signed overflow.

Alive Proof : https://alive2.llvm.org/ce/z/JG3SnP
Fixed: #167055

This patch implements a fold for the pattern
icmp slt (sub nsw %x, umin(%x, %c)), %c2.

Typically, the sub nsw ... umin sequence is canonicalized to
@llvm.usub.sat. However, that transformation discards the nsw
constraint, which provides valuable range information
(guaranteeing no signed underflow). By detecting this pattern before
canonicalization, we can merge the range check into a single
comparison

This optimization is valid under the following constraints:
- %c is non-negative (C >= 0).
- %c2 is strictly positive (C2 > 0).
- The sum %c + %c2 does not cause a signed overflow.

Alive Proof : https://alive2.llvm.org/ce/z/JG3SnP
Fixed: llvm#167055
@ParkHanbum ParkHanbum requested a review from nikic as a code owner December 3, 2025 05:44
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Dec 3, 2025
@llvmbot
Copy link
Member

llvmbot commented Dec 3, 2025

@llvm/pr-subscribers-llvm-transforms

Author: hanbeom (ParkHanbum)

Changes

This patch implements a fold for the pattern
icmp slt (sub nsw %x, umin(%x, %c)), %c2.

Typically, the sub nsw ... umin sequence is canonicalized to
@llvm.usub.sat. However, that transformation discards the nsw
constraint, which provides valuable range information
(guaranteeing no signed underflow). By detecting this pattern before
canonicalization, we can merge the range check into a single
comparison

This optimization is valid under the following constraints:

  • %c is non-negative (C >= 0).
  • %c2 is strictly positive (C2 > 0).
  • The sum %c + %c2 does not cause a signed overflow.

Alive Proof : https://alive2.llvm.org/ce/z/JG3SnP
Fixed: #167055


Full diff: https://github.com/llvm/llvm-project/pull/170422.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp (+27-3)
  • (modified) llvm/test/Transforms/InstCombine/icmp-sub.ll (+121-1)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 9bee523c7b7e5..df096e7b806ad 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2857,9 +2857,33 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
         I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op1}));
 
   // Op0 - umin(X, Op0) --> usub.sat(Op0, X)
-  if (match(Op1, m_OneUse(m_c_UMin(m_Value(X), m_Specific(Op0)))))
-    return replaceInstUsesWith(
-        I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {Op0, X}));
+  if (match(Op1, m_OneUse(m_c_UMin(m_Value(X), m_Specific(Op0))))) {
+    // Op0 - umin(Op0, C) s< C2 --> Op0 s< C2 + C
+    // Constraints:
+    // 1. C >= 0: Range [0, C) must be valid.
+    // 2. C2 > 0: Range [C, C+C2) must imply extension.
+    // 3. No Signed Overflow: Merged upper bound (C + C2) must be safe.
+    const APInt *C;
+    if (I.hasOneUse() && I.hasNoSignedWrap() &&
+        match(X, m_APInt(C)) && C->isNonNegative()) {
+      const APInt *C2;
+      CmpPredicate Pred;
+      Instruction *MustICmp = cast<Instruction>(I.user_back());
+      if (match(MustICmp, m_ICmp(Pred, m_Specific(&I), m_APInt(C2))) &&
+          Pred == CmpInst::ICMP_SLT) {
+        bool Overflow;
+        APInt Sum = C->sadd_ov(*C2, Overflow);
+        if (C2->isStrictlyPositive() && !Overflow) {
+          Value *NewCmp = Builder.CreateICmpSLT(Op0, ConstantInt::get(Ty, Sum));
+          eraseInstFromFunction(*replaceInstUsesWith(*MustICmp, NewCmp));
+          // return eraseInstFromFunction(I);
+          return nullptr;
+        }
+      }
+    }
+    Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {Op0, X});
+    return replaceInstUsesWith(I, USub);
+  }
 
   // Op0 - umax(X, Op0) --> 0 - usub.sat(X, Op0)
   if (match(Op1, m_OneUse(m_c_UMax(m_Value(X), m_Specific(Op0))))) {
diff --git a/llvm/test/Transforms/InstCombine/icmp-sub.ll b/llvm/test/Transforms/InstCombine/icmp-sub.ll
index 13ed7ba0c1703..e261e38697c69 100644
--- a/llvm/test/Transforms/InstCombine/icmp-sub.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-sub.ll
@@ -3,6 +3,125 @@
 
 declare void @use(i32)
 declare void @use_vec(<2 x i8>)
+declare i8 @llvm.umin.i8(i8, i8)
+
+; 1. Positive Tests
+; Basic valid case
+define i1 @test_basic_opt(i8 %x) {
+; CHECK-LABEL: @test_basic_opt(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 118
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], -108
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 10)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, 20
+  ret i1 %cmp
+}
+
+; Boundary case - C is 0
+define i1 @test_c_is_zero(i8 %x) {
+; CHECK-LABEL: @test_c_is_zero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 10
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 0)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, 10
+  ret i1 %cmp
+}
+
+; Boundary case - Sum is exactly SMAX (127)
+define i1 @test_sum_is_smax(i8 %x) {
+; CHECK-LABEL: @test_sum_is_smax(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 28
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], -101
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 100)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, 27
+  ret i1 %cmp
+}
+
+; 2. Negative Tests
+; Missing 'nsw' flag on sub
+define i1 @fail_no_nsw(i8 %x) {
+; CHECK-LABEL: @fail_no_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 118
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], -108
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 10)
+  %sub = sub i8 %x, %min
+  %cmp = icmp slt i8 %sub, 20
+  ret i1 %cmp
+}
+
+; C is Negative (Constraint 1 Violation)
+define i1 @fail_c_negative(i8 %x) {
+; CHECK-LABEL: @fail_c_negative(
+; CHECK-NEXT:    ret i1 true
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 -10)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, 20
+  ret i1 %cmp
+}
+
+; C2 is Zero (Constraint 2 Violation)
+define i1 @fail_c2_zero(i8 %x) {
+; CHECK-LABEL: @fail_c2_zero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[X:%.*]], -119
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 10)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, 0
+  ret i1 %cmp
+}
+
+; C2 is Negative (Constraint 2 Violation)
+define i1 @fail_c2_negative(i8 %x) {
+; CHECK-LABEL: @fail_c2_negative(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[X:%.*]], -119
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 10)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, -5
+  ret i1 %cmp
+}
+
+; Signed Overflow in C + C2 (Constraint 3 Violation)
+define i1 @fail_sum_overflow(i8 %x) {
+; CHECK-LABEL: @fail_sum_overflow(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 28
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], -98
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 100)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, 30
+  ret i1 %cmp
+}
+
+; Multi-use of sub instruction
+define i8 @fail_multi_use(i8 %x) {
+; CHECK-LABEL: @fail_multi_use(
+; CHECK-NEXT:    [[SUB:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[X:%.*]], i8 10)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SUB]], 20
+; CHECK-NEXT:    [[RES:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[RET:%.*]] = add nuw i8 [[SUB]], [[RES]]
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 10)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, 20
+  %res = zext i1 %cmp to i8
+  %ret = add i8 %sub, %res
+  ret i8 %ret
+}
 
 define i1 @test_nuw_and_unsigned_pred(i64 %x) {
 ; CHECK-LABEL: @test_nuw_and_unsigned_pred(
@@ -290,7 +409,8 @@ define i1 @subC_nsw_ne(i32 %x) {
 ; CHECK-LABEL: @subC_nsw_ne(
 ; CHECK-NEXT:    [[SUBX:%.*]] = sub nsw i32 -2147483647, [[X:%.*]]
 ; CHECK-NEXT:    call void @use(i32 [[SUBX]])
-; CHECK-NEXT:    ret i1 true
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[X]], 2147483603
+; CHECK-NEXT:    ret i1 [[R]]
 ;
   %subx = sub nsw i32 -2147483647, %x
   call void @use(i32 %subx)

@github-actions
Copy link

github-actions bot commented Dec 3, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@nikic nikic requested a review from dtcxzyw December 3, 2025 06:59
@nikic
Copy link
Contributor

nikic commented Dec 3, 2025

Doesn't this go against the optimization approach suggested in #168880?

@ParkHanbum
Copy link
Contributor Author

I attempted to implement it using the approach discussed in that issue. it could find no alternative method besides adding multiple instructions and removing them from the worklist. Since it hadn't seen this approach implemented elsewhere, I changed the implementation method.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

Missed Optimization: Fold x - umin(x,13) < 2 into single compare x < 15

3 participants