-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[InstCombine] Fold smin(A - B, -1) + B into a clamp-like select #167478
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[InstCombine] Fold smin(A - B, -1) + B into a clamp-like select #167478
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: 陈子昂 (Michael-Chen-NJU) ChangesOriginal pattern (simplified): The pattern is folded into: RationaleThis transformation is based on the algebraic identity: Fixes #166885 Full diff: https://github.com/llvm/llvm-project/pull/167478.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 9bee523c7b7e5..b85d557a64540 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1870,6 +1870,34 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
Builder.CreateIntrinsic(Intrinsic::umax, {I.getType()}, {A, B}));
}
+ // add nsw (select (Cond, (sub nsw 0, B), smin (sub nsw A, B), -1)), B
+ // -> select (Cond, 0, smin(A, B - 1))
+ Value *Cond = nullptr;
+ Constant *K = nullptr;
+ Instruction *SubABInst = nullptr;
+ Value *V0_Captured = nullptr;
+ if (I.hasNoSignedWrap() &&
+ match(&I,
+ m_c_BinOp(m_Select(m_Value(Cond), m_Value(V0_Captured),
+ m_OneUse(m_Intrinsic<Intrinsic::smin>(
+ m_Instruction(SubABInst), m_Constant(K)))),
+ m_Value(B)))) {
+
+ if (match(V0_Captured, m_NSWSub(m_Zero(), m_Deferred(B))) &&
+ match(SubABInst, m_NSWSub(m_Value(A), m_Deferred(B))) && A != B) {
+
+ Constant *One = ConstantInt::get(I.getType(), 1);
+ Value *B_Minus_1 =
+ Builder.CreateSub(B, One, "", /*HasNUW=*/false, /*HasNSW=*/true);
+ Value *NewSMin = Builder.CreateIntrinsic(Intrinsic::smin, {I.getType()},
+ {A, B_Minus_1});
+ Value *Zero = Constant::getNullValue(I.getType());
+ Value *NewSelect = Builder.CreateSelect(Cond, Zero, NewSMin, "");
+
+ return replaceInstUsesWith(I, NewSelect);
+ }
+ }
+
// ctpop(A) + ctpop(B) => ctpop(A | B) if A and B have no bits set in common.
if (match(LHS, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(A)))) &&
match(RHS, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(B)))) &&
diff --git a/llvm/test/Transforms/InstCombine/add_smin_sub_fold.ll b/llvm/test/Transforms/InstCombine/add_smin_sub_fold.ll
new file mode 100644
index 0000000000000..0f292fcd6eb81
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/add_smin_sub_fold.ll
@@ -0,0 +1,137 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+declare i16 @llvm.smin.i16(i16, i16)
+declare i32 @llvm.smin.i32(i32, i32)
+declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)
+declare void @use_i16(i16)
+
+define i16 @test_issue_166885(i16 %arg0, i16 %arg1) {
+; CHECK-LABEL: @test_issue_166885(
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i16 [[ARG1:%.*]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0:%.*]], i16 [[TMP1]])
+; CHECK-NEXT: [[V2_INV:%.*]] = icmp sgt i16 [[ARG1]], 0
+; CHECK-NEXT: [[V5:%.*]] = select i1 [[V2_INV]], i16 [[TMP2]], i16 0
+; CHECK-NEXT: ret i16 [[V5]]
+;
+ %v0 = sub nsw i16 0, %arg1
+ %v1 = sub nsw i16 %arg0, %arg1
+ %v2 = icmp slt i16 %arg1, 1
+ %v3 = tail call i16 @llvm.smin.i16(i16 %v1, i16 -1)
+ %v4 = select i1 %v2, i16 %v0, i16 %v3
+ %v5 = add nsw i16 %v4, %arg1
+ ret i16 %v5
+}
+
+define i16 @test_commutative(i16 %a, i16 %b) {
+; CHECK-LABEL: @test_commutative(
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i16 [[B:%.*]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[A:%.*]], i16 [[TMP1]])
+; CHECK-NEXT: [[V2_INV:%.*]] = icmp sgt i16 [[B]], 0
+; CHECK-NEXT: [[V5:%.*]] = select i1 [[V2_INV]], i16 [[TMP2]], i16 0
+; CHECK-NEXT: ret i16 [[V5]]
+;
+ %v0 = sub nsw i16 0, %b
+ %v1 = sub nsw i16 %a, %b
+ %v2 = icmp slt i16 %b, 1
+ %v3 = tail call i16 @llvm.smin.i16(i16 %v1, i16 -1)
+ %v4 = select i1 %v2, i16 %v0, i16 %v3
+ %v5 = add nsw i16 %b, %v4
+ ret i16 %v5
+}
+
+define i32 @test_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_i32(
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[B:%.*]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[A:%.*]], i32 [[TMP1]])
+; CHECK-NEXT: [[V2_INV:%.*]] = icmp sgt i32 [[B]], 0
+; CHECK-NEXT: [[V5:%.*]] = select i1 [[V2_INV]], i32 [[TMP2]], i32 0
+; CHECK-NEXT: ret i32 [[V5]]
+;
+ %v0 = sub nsw i32 0, %b
+ %v1 = sub nsw i32 %a, %b
+ %v2 = icmp slt i32 %b, 1
+ %v3 = tail call i32 @llvm.smin.i32(i32 %v1, i32 -1)
+ %v4 = select i1 %v2, i32 %v0, i32 %v3
+ %v5 = add nsw i32 %v4, %b
+ ret i32 %v5
+}
+
+define <2 x i16> @test_vector(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: @test_vector(
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i16> [[B:%.*]], splat (i16 -1)
+; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[A:%.*]], <2 x i16> [[TMP1]])
+; CHECK-NEXT: [[V2_INV:%.*]] = icmp sgt <2 x i16> [[B]], zeroinitializer
+; CHECK-NEXT: [[V5:%.*]] = select <2 x i1> [[V2_INV]], <2 x i16> [[TMP2]], <2 x i16> zeroinitializer
+; CHECK-NEXT: ret <2 x i16> [[V5]]
+;
+ %v0 = sub nsw <2 x i16> zeroinitializer, %b
+ %v1 = sub nsw <2 x i16> %a, %b
+ %v2 = icmp slt <2 x i16> %b, <i16 1, i16 1>
+ %v_minus_one = sub nsw <2 x i16> zeroinitializer, <i16 1, i16 1>
+
+ %v3 = tail call <2 x i16> @llvm.smin.v2i16(<2 x i16> %v1, <2 x i16> %v_minus_one)
+ %v4 = select <2 x i1> %v2, <2 x i16> %v0, <2 x i16> %v3
+ %v5 = add nsw <2 x i16> %v4, %b
+ ret <2 x i16> %v5
+}
+
+define i16 @test_multi_use(i16 %a, i16 %b) {
+; CHECK-LABEL: @test_multi_use(
+; CHECK-NEXT: [[V0:%.*]] = sub nsw i16 0, [[B:%.*]]
+; CHECK-NEXT: [[V1:%.*]] = sub nsw i16 [[A:%.*]], [[B]]
+; CHECK-NEXT: [[V2:%.*]] = icmp slt i16 [[B]], 1
+; CHECK-NEXT: [[V3:%.*]] = tail call i16 @llvm.smin.i16(i16 [[V1]], i16 -1)
+; CHECK-NEXT: call void @use_i16(i16 [[V3]])
+; CHECK-NEXT: [[V4:%.*]] = select i1 [[V2]], i16 [[V0]], i16 [[V3]]
+; CHECK-NEXT: [[V5:%.*]] = add nsw i16 [[V4]], [[B]]
+; CHECK-NEXT: ret i16 [[V5]]
+;
+ %v0 = sub nsw i16 0, %b
+ %v1 = sub nsw i16 %a, %b
+ %v2 = icmp slt i16 %b, 1
+ %v3 = tail call i16 @llvm.smin.i16(i16 %v1, i16 -1)
+ call void @use_i16(i16 %v3)
+ %v4 = select i1 %v2, i16 %v0, i16 %v3
+ %v5 = add nsw i16 %v4, %b
+ ret i16 %v5
+}
+
+
+define i16 @test_negative_no_nsw_add(i16 %a, i16 %b) {
+; CHECK-LABEL: @test_negative_no_nsw_add(
+; CHECK-NEXT: [[V0:%.*]] = sub nsw i16 0, [[B:%.*]]
+; CHECK-NEXT: [[V1:%.*]] = sub nsw i16 [[A:%.*]], [[B]]
+; CHECK-NEXT: [[V2:%.*]] = icmp slt i16 [[B]], 1
+; CHECK-NEXT: [[V3:%.*]] = tail call i16 @llvm.smin.i16(i16 [[V1]], i16 -1)
+; CHECK-NEXT: [[V4:%.*]] = select i1 [[V2]], i16 [[V0]], i16 [[V3]]
+; CHECK-NEXT: [[V5:%.*]] = add i16 [[V4]], [[B]]
+; CHECK-NEXT: ret i16 [[V5]]
+;
+ %v0 = sub nsw i16 0, %b
+ %v1 = sub nsw i16 %a, %b
+ %v2 = icmp slt i16 %b, 1
+ %v3 = tail call i16 @llvm.smin.i16(i16 %v1, i16 -1)
+ %v4 = select i1 %v2, i16 %v0, i16 %v3
+ %v5 = add i16 %v4, %b
+ ret i16 %v5
+}
+
+define i16 @test_negative_no_nsw_sub(i16 %a, i16 %b) {
+; CHECK-LABEL: @test_negative_no_nsw_sub(
+; CHECK-NEXT: [[V0:%.*]] = sub nsw i16 0, [[B:%.*]]
+; CHECK-NEXT: [[V1:%.*]] = sub i16 [[A:%.*]], [[B]]
+; CHECK-NEXT: [[V2:%.*]] = icmp slt i16 [[B]], 1
+; CHECK-NEXT: [[V3:%.*]] = tail call i16 @llvm.smin.i16(i16 [[V1]], i16 -1)
+; CHECK-NEXT: [[V4:%.*]] = select i1 [[V2]], i16 [[V0]], i16 [[V3]]
+; CHECK-NEXT: [[V5:%.*]] = add nsw i16 [[V4]], [[B]]
+; CHECK-NEXT: ret i16 [[V5]]
+;
+ %v0 = sub nsw i16 0, %b
+ %v1 = sub i16 %a, %b
+ %v2 = icmp slt i16 %b, 1
+ %v3 = tail call i16 @llvm.smin.i16(i16 %v1, i16 -1)
+ %v4 = select i1 %v2, i16 %v0, i16 %v3
+ %v5 = add nsw i16 %v4, %b
+ ret i16 %v5
+}
|
dtcxzyw
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we adjust foldOpIntoSelect to perform a simpler transform first?
add nsw (select Cond, (sub nsw 0, B), smin ((sub nsw A, B), -1)), B ->
select Cond, 0, (add nsw smin ((sub nsw A, B), -1)), B)
Then add nsw smin ((sub nsw A, B), -1), B -> smin (A, B - 1) should be covered by #166878.
Thanks for the suggestion! I've see your point—decomposing this transformation makes the code more modular. I will investigate I'll verify this locally and update the PR accordingly. |
Original pattern (simplified):
add nsw (select (Cond, (sub nsw 0, B), smin (sub nsw A, B), -1)), BThe pattern is folded into:
select (Cond, 0, smin(A, B - 1))Rationale
This transformation is based on the algebraic identity:
smin(A - B, -1) + B == smin(A, B - 1)(Under the assumption that
add nswholds, preventing signed overflow).Fixes #166885