Skip to content

Conversation

@Michael-Chen-NJU
Copy link
Contributor

Original pattern (simplified):
add nsw (select (Cond, (sub nsw 0, B), smin (sub nsw A, B), -1)), B

The pattern is folded into:
select (Cond, 0, smin(A, B - 1))

Rationale

This transformation is based on the algebraic identity:
smin(A - B, -1) + B == smin(A, B - 1)
(Under the assumption that add nsw holds, preventing signed overflow).

Fixes #166885

@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Nov 11, 2025
@llvmbot
Copy link
Member

llvmbot commented Nov 11, 2025

@llvm/pr-subscribers-llvm-transforms

Author: 陈子昂 (Michael-Chen-NJU)

Changes

Original pattern (simplified):
add nsw (select (Cond, (sub nsw 0, B), smin (sub nsw A, B), -1)), B

The pattern is folded into:
select (Cond, 0, smin(A, B - 1))

Rationale

This transformation is based on the algebraic identity:
smin(A - B, -1) + B == smin(A, B - 1)
(Under the assumption that add nsw holds, preventing signed overflow).

Fixes #166885


Full diff: https://github.com/llvm/llvm-project/pull/167478.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp (+28)
  • (added) llvm/test/Transforms/InstCombine/add_smin_sub_fold.ll (+137)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 9bee523c7b7e5..b85d557a64540 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1870,6 +1870,34 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
         Builder.CreateIntrinsic(Intrinsic::umax, {I.getType()}, {A, B}));
   }
 
+  // add nsw (select (Cond, (sub nsw 0, B), smin (sub nsw A, B), -1)), B
+  //   -> select (Cond, 0, smin(A, B - 1))
+  Value *Cond = nullptr;
+  Constant *K = nullptr;
+  Instruction *SubABInst = nullptr;
+  Value *V0_Captured = nullptr;
+  if (I.hasNoSignedWrap() &&
+      match(&I,
+            m_c_BinOp(m_Select(m_Value(Cond), m_Value(V0_Captured),
+                               m_OneUse(m_Intrinsic<Intrinsic::smin>(
+                                   m_Instruction(SubABInst), m_Constant(K)))),
+                      m_Value(B)))) {
+
+    if (match(V0_Captured, m_NSWSub(m_Zero(), m_Deferred(B))) &&
+        match(SubABInst, m_NSWSub(m_Value(A), m_Deferred(B))) && A != B) {
+
+      Constant *One = ConstantInt::get(I.getType(), 1);
+      Value *B_Minus_1 =
+          Builder.CreateSub(B, One, "", /*HasNUW=*/false, /*HasNSW=*/true);
+      Value *NewSMin = Builder.CreateIntrinsic(Intrinsic::smin, {I.getType()},
+                                               {A, B_Minus_1});
+      Value *Zero = Constant::getNullValue(I.getType());
+      Value *NewSelect = Builder.CreateSelect(Cond, Zero, NewSMin, "");
+
+      return replaceInstUsesWith(I, NewSelect);
+    }
+  }
+
   // ctpop(A) + ctpop(B) => ctpop(A | B) if A and B have no bits set in common.
   if (match(LHS, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(A)))) &&
       match(RHS, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(B)))) &&
diff --git a/llvm/test/Transforms/InstCombine/add_smin_sub_fold.ll b/llvm/test/Transforms/InstCombine/add_smin_sub_fold.ll
new file mode 100644
index 0000000000000..0f292fcd6eb81
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/add_smin_sub_fold.ll
@@ -0,0 +1,137 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+declare i16 @llvm.smin.i16(i16, i16)
+declare i32 @llvm.smin.i32(i32, i32)
+declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)
+declare void @use_i16(i16)
+
+define i16 @test_issue_166885(i16 %arg0, i16 %arg1) {
+; CHECK-LABEL: @test_issue_166885(
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i16 [[ARG1:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0:%.*]], i16 [[TMP1]])
+; CHECK-NEXT:    [[V2_INV:%.*]] = icmp sgt i16 [[ARG1]], 0
+; CHECK-NEXT:    [[V5:%.*]] = select i1 [[V2_INV]], i16 [[TMP2]], i16 0
+; CHECK-NEXT:    ret i16 [[V5]]
+;
+  %v0 = sub nsw i16 0, %arg1
+  %v1 = sub nsw i16 %arg0, %arg1
+  %v2 = icmp slt i16 %arg1, 1
+  %v3 = tail call i16 @llvm.smin.i16(i16 %v1, i16 -1)
+  %v4 = select i1 %v2, i16 %v0, i16 %v3
+  %v5 = add nsw i16 %v4, %arg1
+  ret i16 %v5
+}
+
+define i16 @test_commutative(i16 %a, i16 %b) {
+; CHECK-LABEL: @test_commutative(
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i16 [[B:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[A:%.*]], i16 [[TMP1]])
+; CHECK-NEXT:    [[V2_INV:%.*]] = icmp sgt i16 [[B]], 0
+; CHECK-NEXT:    [[V5:%.*]] = select i1 [[V2_INV]], i16 [[TMP2]], i16 0
+; CHECK-NEXT:    ret i16 [[V5]]
+;
+  %v0 = sub nsw i16 0, %b
+  %v1 = sub nsw i16 %a, %b
+  %v2 = icmp slt i16 %b, 1
+  %v3 = tail call i16 @llvm.smin.i16(i16 %v1, i16 -1)
+  %v4 = select i1 %v2, i16 %v0, i16 %v3
+  %v5 = add nsw i16 %b, %v4
+  ret i16 %v5
+}
+
+define i32 @test_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[B:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[A:%.*]], i32 [[TMP1]])
+; CHECK-NEXT:    [[V2_INV:%.*]] = icmp sgt i32 [[B]], 0
+; CHECK-NEXT:    [[V5:%.*]] = select i1 [[V2_INV]], i32 [[TMP2]], i32 0
+; CHECK-NEXT:    ret i32 [[V5]]
+;
+  %v0 = sub nsw i32 0, %b
+  %v1 = sub nsw i32 %a, %b
+  %v2 = icmp slt i32 %b, 1
+  %v3 = tail call i32 @llvm.smin.i32(i32 %v1, i32 -1)
+  %v4 = select i1 %v2, i32 %v0, i32 %v3
+  %v5 = add nsw i32 %v4, %b
+  ret i32 %v5
+}
+
+define <2 x i16> @test_vector(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: @test_vector(
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i16> [[B:%.*]], splat (i16 -1)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[A:%.*]], <2 x i16> [[TMP1]])
+; CHECK-NEXT:    [[V2_INV:%.*]] = icmp sgt <2 x i16> [[B]], zeroinitializer
+; CHECK-NEXT:    [[V5:%.*]] = select <2 x i1> [[V2_INV]], <2 x i16> [[TMP2]], <2 x i16> zeroinitializer
+; CHECK-NEXT:    ret <2 x i16> [[V5]]
+;
+  %v0 = sub nsw <2 x i16> zeroinitializer, %b
+  %v1 = sub nsw <2 x i16> %a, %b
+  %v2 = icmp slt <2 x i16> %b, <i16 1, i16 1>
+  %v_minus_one = sub nsw <2 x i16> zeroinitializer, <i16 1, i16 1>
+
+  %v3 = tail call <2 x i16> @llvm.smin.v2i16(<2 x i16> %v1, <2 x i16> %v_minus_one)
+  %v4 = select <2 x i1> %v2, <2 x i16> %v0, <2 x i16> %v3
+  %v5 = add nsw <2 x i16> %v4, %b
+  ret <2 x i16> %v5
+}
+
+define i16 @test_multi_use(i16 %a, i16 %b) {
+; CHECK-LABEL: @test_multi_use(
+; CHECK-NEXT:    [[V0:%.*]] = sub nsw i16 0, [[B:%.*]]
+; CHECK-NEXT:    [[V1:%.*]] = sub nsw i16 [[A:%.*]], [[B]]
+; CHECK-NEXT:    [[V2:%.*]] = icmp slt i16 [[B]], 1
+; CHECK-NEXT:    [[V3:%.*]] = tail call i16 @llvm.smin.i16(i16 [[V1]], i16 -1)
+; CHECK-NEXT:    call void @use_i16(i16 [[V3]])
+; CHECK-NEXT:    [[V4:%.*]] = select i1 [[V2]], i16 [[V0]], i16 [[V3]]
+; CHECK-NEXT:    [[V5:%.*]] = add nsw i16 [[V4]], [[B]]
+; CHECK-NEXT:    ret i16 [[V5]]
+;
+  %v0 = sub nsw i16 0, %b
+  %v1 = sub nsw i16 %a, %b
+  %v2 = icmp slt i16 %b, 1
+  %v3 = tail call i16 @llvm.smin.i16(i16 %v1, i16 -1)
+  call void @use_i16(i16 %v3)
+  %v4 = select i1 %v2, i16 %v0, i16 %v3
+  %v5 = add nsw i16 %v4, %b
+  ret i16 %v5
+}
+
+
+define i16 @test_negative_no_nsw_add(i16 %a, i16 %b) {
+; CHECK-LABEL: @test_negative_no_nsw_add(
+; CHECK-NEXT:    [[V0:%.*]] = sub nsw i16 0, [[B:%.*]]
+; CHECK-NEXT:    [[V1:%.*]] = sub nsw i16 [[A:%.*]], [[B]]
+; CHECK-NEXT:    [[V2:%.*]] = icmp slt i16 [[B]], 1
+; CHECK-NEXT:    [[V3:%.*]] = tail call i16 @llvm.smin.i16(i16 [[V1]], i16 -1)
+; CHECK-NEXT:    [[V4:%.*]] = select i1 [[V2]], i16 [[V0]], i16 [[V3]]
+; CHECK-NEXT:    [[V5:%.*]] = add i16 [[V4]], [[B]]
+; CHECK-NEXT:    ret i16 [[V5]]
+;
+  %v0 = sub nsw i16 0, %b
+  %v1 = sub nsw i16 %a, %b
+  %v2 = icmp slt i16 %b, 1
+  %v3 = tail call i16 @llvm.smin.i16(i16 %v1, i16 -1)
+  %v4 = select i1 %v2, i16 %v0, i16 %v3
+  %v5 = add i16 %v4, %b
+  ret i16 %v5
+}
+
+define i16 @test_negative_no_nsw_sub(i16 %a, i16 %b) {
+; CHECK-LABEL: @test_negative_no_nsw_sub(
+; CHECK-NEXT:    [[V0:%.*]] = sub nsw i16 0, [[B:%.*]]
+; CHECK-NEXT:    [[V1:%.*]] = sub i16 [[A:%.*]], [[B]]
+; CHECK-NEXT:    [[V2:%.*]] = icmp slt i16 [[B]], 1
+; CHECK-NEXT:    [[V3:%.*]] = tail call i16 @llvm.smin.i16(i16 [[V1]], i16 -1)
+; CHECK-NEXT:    [[V4:%.*]] = select i1 [[V2]], i16 [[V0]], i16 [[V3]]
+; CHECK-NEXT:    [[V5:%.*]] = add nsw i16 [[V4]], [[B]]
+; CHECK-NEXT:    ret i16 [[V5]]
+;
+  %v0 = sub nsw i16 0, %b
+  %v1 = sub i16 %a, %b
+  %v2 = icmp slt i16 %b, 1
+  %v3 = tail call i16 @llvm.smin.i16(i16 %v1, i16 -1)
+  %v4 = select i1 %v2, i16 %v0, i16 %v3
+  %v5 = add nsw i16 %v4, %b
+  ret i16 %v5
+}

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we adjust foldOpIntoSelect to perform a simpler transform first?

add nsw (select Cond, (sub nsw 0, B), smin ((sub nsw A, B), -1)), B ->
select Cond, 0, (add nsw smin ((sub nsw A, B), -1)), B)

Then add nsw smin ((sub nsw A, B), -1), B -> smin (A, B - 1) should be covered by #166878.

@Michael-Chen-NJU
Copy link
Contributor Author

Can we adjust foldOpIntoSelect to perform a simpler transform first?

add nsw (select Cond, (sub nsw 0, B), smin ((sub nsw A, B), -1)), B ->
select Cond, 0, (add nsw smin ((sub nsw A, B), -1)), B)

Then add nsw smin ((sub nsw A, B), -1), B -> smin (A, B - 1) should be covered by #166878.

Thanks for the suggestion! I've see your point—decomposing this transformation makes the code more modular.

I will investigate foldOpIntoSelect to see if we can enable the add sinking for this pattern (transforming (-B) + B into 0). If #166878 covers the algebraic simplification of smin(A - B, -1) + B -> smin(A, B - 1), then relying on that combination is definitely a cleaner approach.

I'll verify this locally and update the PR accordingly.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

Missed Optimization: Fold smin(a - b, -1) + b into a clamp — select(b < 1, 0, smin(a, b - 1))

3 participants