[InstCombine] Generalize fold of `fcmp + copysign` #86387

dtcxzyw · 2024-03-23T09:51:20Z

This patch generalize the fold of fcmp + copysign:

fcmp pred (copysign C1, X), C2 --> select !signbit(X), (fcmp pred abs(C1), C2), (fcmp pred nabs(C1), C2)

It will improve the codegen of idiom fcmp oeq (copysign C, X), +/-C.
Godbolt: https://godbolt.org/z/a8zPheTqT

llvmbot · 2024-03-23T09:51:47Z

@llvm/pr-subscribers-llvm-transforms

Author: Yingwei Zheng (dtcxzyw)

Changes

This patch generalize the fold of fcmp + copysign:

fcmp pred (copysign C1, X), C2 --&gt; select !signbit(X), (fcmp pred abs(C1), C2), (fcmp pred nabs(C1), C2)

It will improve the codegen of idiom fcmp oeq (copysign C, X), +/-C.
Godbolt: https://godbolt.org/z/a8zPheTqT

Full diff: https://github.com/llvm/llvm-project/pull/86387.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+10-10)
(modified) llvm/test/Transforms/InstCombine/fcmp.ll (+64-24)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index db302d7e526844..076452200fead5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -8101,22 +8101,22 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
   }
 
   // Convert a sign-bit test of an FP value into a cast and integer compare.
-  // TODO: Simplify if the copysign constant is 0.0 or NaN.
-  // TODO: Handle non-zero compare constants.
-  // TODO: Handle other predicates.
   if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::copysign>(m_APFloat(C),
                                                            m_Value(X)))) &&
-      match(Op1, m_AnyZeroFP()) && !C->isZero() && !C->isNaN()) {
+      match(Op1, m_ImmConstant(RHSC))) {
     Type *IntType = Builder.getIntNTy(X->getType()->getScalarSizeInBits());
     if (auto *VecTy = dyn_cast<VectorType>(OpType))
       IntType = VectorType::get(IntType, VecTy->getElementCount());
 
-    // copysign(non-zero constant, X) < 0.0 --> (bitcast X) < 0
-    if (Pred == FCmpInst::FCMP_OLT) {
-      Value *IntX = Builder.CreateBitCast(X, IntType);
-      return new ICmpInst(ICmpInst::ICMP_SLT, IntX,
-                          ConstantInt::getNullValue(IntType));
-    }
+    APFloat PosC = abs(*C);
+    if (Value *CmpPos = ConstantFoldCompareInstOperands(
+            Pred, ConstantFP::get(X->getType(), PosC), RHSC, DL, &TLI, &I))
+      if (Value *CmpNeg = ConstantFoldCompareInstOperands(
+              Pred, ConstantFP::get(X->getType(), -PosC), RHSC, DL, &TLI, &I)) {
+        Value *IntX = Builder.CreateBitCast(X, IntType);
+        Value *NotNeg = Builder.CreateIsNotNeg(IntX);
+        return SelectInst::Create(NotNeg, CmpPos, CmpNeg);
+      }
   }
 
   {
diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll
index f2701d16d0f3d1..cff533ce5e9cc8 100644
--- a/llvm/test/Transforms/InstCombine/fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/fcmp.ll
@@ -596,8 +596,8 @@ define i1 @is_signbit_set(double %x) {
 
 define i1 @is_signbit_set_1(double %x) {
 ; CHECK-LABEL: @is_signbit_set_1(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ult double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i64 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -607,8 +607,8 @@ define i1 @is_signbit_set_1(double %x) {
 
 define i1 @is_signbit_set_2(double %x) {
 ; CHECK-LABEL: @is_signbit_set_2(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ole double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i64 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -618,8 +618,8 @@ define i1 @is_signbit_set_2(double %x) {
 
 define i1 @is_signbit_set_3(double %x) {
 ; CHECK-LABEL: @is_signbit_set_3(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ule double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i64 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -640,12 +640,10 @@ define <2 x i1> @is_signbit_set_anyzero(<2 x double> %x) {
   ret <2 x i1> %r
 }
 
-; TODO: Handle different predicates.
-
 define i1 @is_signbit_clear(double %x) {
 ; CHECK-LABEL: @is_signbit_clear(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ogt double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -655,8 +653,8 @@ define i1 @is_signbit_clear(double %x) {
 
 define i1 @is_signbit_clear_1(double %x) {
 ; CHECK-LABEL: @is_signbit_clear_1(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ugt double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -666,8 +664,8 @@ define i1 @is_signbit_clear_1(double %x) {
 
 define i1 @is_signbit_clear_2(double %x) {
 ; CHECK-LABEL: @is_signbit_clear_2(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp oge double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -677,8 +675,8 @@ define i1 @is_signbit_clear_2(double %x) {
 
 define i1 @is_signbit_clear_3(double %x) {
 ; CHECK-LABEL: @is_signbit_clear_3(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp uge double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -701,12 +699,10 @@ define i1 @is_signbit_set_extra_use(double %x, ptr %p) {
   ret i1 %r
 }
 
-; TODO: Handle non-zero compare constant.
-
 define i1 @is_signbit_clear_nonzero(double %x) {
 ; CHECK-LABEL: @is_signbit_clear_nonzero(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ogt double [[S]], 1.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -714,8 +710,6 @@ define i1 @is_signbit_clear_nonzero(double %x) {
   ret i1 %r
 }
 
-; TODO: Handle zero copysign constant.
-
 define i1 @is_signbit_set_simplify_zero(double %x) {
 ; CHECK-LABEL: @is_signbit_set_simplify_zero(
 ; CHECK-NEXT:    ret i1 false
@@ -725,8 +719,6 @@ define i1 @is_signbit_set_simplify_zero(double %x) {
   ret i1 %r
 }
 
-; TODO: Handle NaN copysign constant.
-
 define i1 @is_signbit_set_simplify_nan(double %x) {
 ; CHECK-LABEL: @is_signbit_set_simplify_nan(
 ; CHECK-NEXT:    ret i1 false
@@ -736,6 +728,54 @@ define i1 @is_signbit_set_simplify_nan(double %x) {
   ret i1 %r
 }
 
+define i1 @test_oeq(float %a) {
+; CHECK-LABEL: @test_oeq(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %res = call float @llvm.copysign.f32(float 1.0, float %a)
+  %cmp = fcmp oeq float %res, 1.0
+  ret i1 %cmp
+}
+
+define i1 @test_one(float %a) {
+; CHECK-LABEL: @test_one(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %res = call float @llvm.copysign.f32(float 1.0, float %a)
+  %cmp = fcmp one float %res, 1.0
+  ret i1 %cmp
+}
+
+define i1 @test_ogt_false(float %a) {
+; CHECK-LABEL: @test_ogt_false(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %res = call float @llvm.copysign.f32(float 1.0, float %a)
+  %cmp = fcmp ogt float %res, 2.0
+  ret i1 %cmp
+}
+
+define i1 @test_olt_true(float %a) {
+; CHECK-LABEL: @test_olt_true(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %res = call float @llvm.copysign.f32(float 1.0, float %a)
+  %cmp = fcmp olt float %res, 2.0
+  ret i1 %cmp
+}
+
 define <2 x i1> @lossy_oeq(<2 x float> %x) {
 ; CHECK-LABEL: @lossy_oeq(
 ; CHECK-NEXT:    ret <2 x i1> zeroinitializer

PR Link: llvm/llvm-project#86387

github-actions · 2024-03-23T09:54:05Z

✅ With the latest revision this PR passed the Python code formatter.

github-actions · 2024-03-23T09:54:05Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

llvm/test/Transforms/InstCombine/fcmp.ll

arsenm · 2024-03-26T06:17:55Z

llvm/test/Transforms/InstCombine/fcmp.ll

+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i64 [[TMP1]], 0


Given that we currently don't try to track FP class / sign bit through bitcast, this form seems worse? Plus, if we had fast math flags this would necessarily lose them

Should we introduce a new intrinsic i1 @llvm.signbit?
See also https://discourse.llvm.org/t/do-we-need-intrinsics-for-floating-point-classification-functions/58852/8

If it returns an i1, that wouldn't really be any better. It still wouldn't be a candidate for tracking. If we returned the signbit encoded in the original FP type, it may be more useful (i.e., it returns 0 or -0)

llvm/test/Transforms/InstCombine/fcmp.ll

dtcxzyw requested review from arsenm, goldsteinn, andykaylor and jcranmer-intel March 23, 2024 09:51

dtcxzyw requested a review from nikic as a code owner March 23, 2024 09:51

llvmbot added the llvm:transforms label Mar 23, 2024

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Mar 23, 2024

pre-commit: test PR86387

2d345ad

PR Link: llvm/llvm-project#86387

dtcxzyw mentioned this pull request Mar 23, 2024

pre-commit: test PR86387 dtcxzyw/llvm-opt-benchmark#449

Open

andykaylor reviewed Mar 25, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp Show resolved Hide resolved

llvm/test/Transforms/InstCombine/fcmp.ll Show resolved Hide resolved

arsenm reviewed Mar 26, 2024

View reviewed changes

dtcxzyw added 2 commits April 11, 2024 21:55

[InstCombine] Add pre-commit tests. NFC.

17224b3

[InstCombine] Generalize fold of fcmp + copysign

6085802

dtcxzyw force-pushed the perf/fcmp-copysign branch from 25e1ce4 to 6085802 Compare April 11, 2024 14:03

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[InstCombine] Generalize fold of `fcmp + copysign` #86387

[InstCombine] Generalize fold of `fcmp + copysign` #86387

dtcxzyw commented Mar 23, 2024

llvmbot commented Mar 23, 2024

github-actions bot commented Mar 23, 2024

github-actions bot commented Mar 23, 2024

arsenm Mar 26, 2024

dtcxzyw Apr 11, 2024 •

edited

arsenm Apr 17, 2024 •

edited

		; CHECK-NEXT: [[TMP1:%.]] = bitcast double [[X:%.]] to i64
		; CHECK-NEXT: [[R:%.*]] = icmp slt i64 [[TMP1]], 0

[InstCombine] Generalize fold of fcmp + copysign #86387

Are you sure you want to change the base?

[InstCombine] Generalize fold of fcmp + copysign #86387

Conversation

dtcxzyw commented Mar 23, 2024

llvmbot commented Mar 23, 2024

github-actions bot commented Mar 23, 2024

github-actions bot commented Mar 23, 2024

arsenm Mar 26, 2024

Choose a reason for hiding this comment

dtcxzyw Apr 11, 2024 • edited

Choose a reason for hiding this comment

arsenm Apr 17, 2024 • edited

Choose a reason for hiding this comment

[InstCombine] Generalize fold of `fcmp + copysign` #86387

[InstCombine] Generalize fold of `fcmp + copysign` #86387

dtcxzyw Apr 11, 2024 •

edited

arsenm Apr 17, 2024 •

edited