-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Generalize fold of fcmp + copysign
#86387
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch generalize the fold of
It will improve the codegen of idiom Full diff: https://github.com/llvm/llvm-project/pull/86387.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index db302d7e526844..076452200fead5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -8101,22 +8101,22 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
}
// Convert a sign-bit test of an FP value into a cast and integer compare.
- // TODO: Simplify if the copysign constant is 0.0 or NaN.
- // TODO: Handle non-zero compare constants.
- // TODO: Handle other predicates.
if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::copysign>(m_APFloat(C),
m_Value(X)))) &&
- match(Op1, m_AnyZeroFP()) && !C->isZero() && !C->isNaN()) {
+ match(Op1, m_ImmConstant(RHSC))) {
Type *IntType = Builder.getIntNTy(X->getType()->getScalarSizeInBits());
if (auto *VecTy = dyn_cast<VectorType>(OpType))
IntType = VectorType::get(IntType, VecTy->getElementCount());
- // copysign(non-zero constant, X) < 0.0 --> (bitcast X) < 0
- if (Pred == FCmpInst::FCMP_OLT) {
- Value *IntX = Builder.CreateBitCast(X, IntType);
- return new ICmpInst(ICmpInst::ICMP_SLT, IntX,
- ConstantInt::getNullValue(IntType));
- }
+ APFloat PosC = abs(*C);
+ if (Value *CmpPos = ConstantFoldCompareInstOperands(
+ Pred, ConstantFP::get(X->getType(), PosC), RHSC, DL, &TLI, &I))
+ if (Value *CmpNeg = ConstantFoldCompareInstOperands(
+ Pred, ConstantFP::get(X->getType(), -PosC), RHSC, DL, &TLI, &I)) {
+ Value *IntX = Builder.CreateBitCast(X, IntType);
+ Value *NotNeg = Builder.CreateIsNotNeg(IntX);
+ return SelectInst::Create(NotNeg, CmpPos, CmpNeg);
+ }
}
{
diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll
index f2701d16d0f3d1..cff533ce5e9cc8 100644
--- a/llvm/test/Transforms/InstCombine/fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/fcmp.ll
@@ -596,8 +596,8 @@ define i1 @is_signbit_set(double %x) {
define i1 @is_signbit_set_1(double %x) {
; CHECK-LABEL: @is_signbit_set_1(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp ult double [[S]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp slt i64 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -607,8 +607,8 @@ define i1 @is_signbit_set_1(double %x) {
define i1 @is_signbit_set_2(double %x) {
; CHECK-LABEL: @is_signbit_set_2(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp ole double [[S]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp slt i64 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -618,8 +618,8 @@ define i1 @is_signbit_set_2(double %x) {
define i1 @is_signbit_set_3(double %x) {
; CHECK-LABEL: @is_signbit_set_3(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp ule double [[S]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp slt i64 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -640,12 +640,10 @@ define <2 x i1> @is_signbit_set_anyzero(<2 x double> %x) {
ret <2 x i1> %r
}
-; TODO: Handle different predicates.
-
define i1 @is_signbit_clear(double %x) {
; CHECK-LABEL: @is_signbit_clear(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp ogt double [[S]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -655,8 +653,8 @@ define i1 @is_signbit_clear(double %x) {
define i1 @is_signbit_clear_1(double %x) {
; CHECK-LABEL: @is_signbit_clear_1(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp ugt double [[S]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -666,8 +664,8 @@ define i1 @is_signbit_clear_1(double %x) {
define i1 @is_signbit_clear_2(double %x) {
; CHECK-LABEL: @is_signbit_clear_2(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp oge double [[S]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -677,8 +675,8 @@ define i1 @is_signbit_clear_2(double %x) {
define i1 @is_signbit_clear_3(double %x) {
; CHECK-LABEL: @is_signbit_clear_3(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp uge double [[S]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -701,12 +699,10 @@ define i1 @is_signbit_set_extra_use(double %x, ptr %p) {
ret i1 %r
}
-; TODO: Handle non-zero compare constant.
-
define i1 @is_signbit_clear_nonzero(double %x) {
; CHECK-LABEL: @is_signbit_clear_nonzero(
-; CHECK-NEXT: [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fcmp ogt double [[S]], 1.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT: [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -714,8 +710,6 @@ define i1 @is_signbit_clear_nonzero(double %x) {
ret i1 %r
}
-; TODO: Handle zero copysign constant.
-
define i1 @is_signbit_set_simplify_zero(double %x) {
; CHECK-LABEL: @is_signbit_set_simplify_zero(
; CHECK-NEXT: ret i1 false
@@ -725,8 +719,6 @@ define i1 @is_signbit_set_simplify_zero(double %x) {
ret i1 %r
}
-; TODO: Handle NaN copysign constant.
-
define i1 @is_signbit_set_simplify_nan(double %x) {
; CHECK-LABEL: @is_signbit_set_simplify_nan(
; CHECK-NEXT: ret i1 false
@@ -736,6 +728,54 @@ define i1 @is_signbit_set_simplify_nan(double %x) {
ret i1 %r
}
+define i1 @test_oeq(float %a) {
+; CHECK-LABEL: @test_oeq(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], -1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ %res = call float @llvm.copysign.f32(float 1.0, float %a)
+ %cmp = fcmp oeq float %res, 1.0
+ ret i1 %cmp
+}
+
+define i1 @test_one(float %a) {
+; CHECK-LABEL: @test_one(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ %res = call float @llvm.copysign.f32(float 1.0, float %a)
+ %cmp = fcmp one float %res, 1.0
+ ret i1 %cmp
+}
+
+define i1 @test_ogt_false(float %a) {
+; CHECK-LABEL: @test_ogt_false(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %res = call float @llvm.copysign.f32(float 1.0, float %a)
+ %cmp = fcmp ogt float %res, 2.0
+ ret i1 %cmp
+}
+
+define i1 @test_olt_true(float %a) {
+; CHECK-LABEL: @test_olt_true(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i1 true
+;
+entry:
+ %res = call float @llvm.copysign.f32(float 1.0, float %a)
+ %cmp = fcmp olt float %res, 2.0
+ ret i1 %cmp
+}
+
define <2 x i1> @lossy_oeq(<2 x float> %x) {
; CHECK-LABEL: @lossy_oeq(
; CHECK-NEXT: ret <2 x i1> zeroinitializer
|
✅ With the latest revision this PR passed the Python code formatter. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64 | ||
; CHECK-NEXT: [[R:%.*]] = icmp slt i64 [[TMP1]], 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Given that we currently don't try to track FP class / sign bit through bitcast, this form seems worse? Plus, if we had fast math flags this would necessarily lose them
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we introduce a new intrinsic i1 @llvm.signbit
?
See also https://discourse.llvm.org/t/do-we-need-intrinsics-for-floating-point-classification-functions/58852/8
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If it returns an i1, that wouldn't really be any better. It still wouldn't be a candidate for tracking. If we returned the signbit encoded in the original FP type, it may be more useful (i.e., it returns 0 or -0)
25e1ce4
to
6085802
Compare
This patch generalize the fold of
fcmp + copysign
:It will improve the codegen of idiom
fcmp oeq (copysign C, X), +/-C
.Godbolt: https://godbolt.org/z/a8zPheTqT