Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[InstCombine] Generalize fold of fcmp + copysign #86387

Open
wants to merge 2 commits into
base: main
Choose a base branch
from

Conversation

dtcxzyw
Copy link
Member

@dtcxzyw dtcxzyw commented Mar 23, 2024

This patch generalize the fold of fcmp + copysign:

fcmp pred (copysign C1, X), C2 --> select !signbit(X), (fcmp pred abs(C1), C2), (fcmp pred nabs(C1), C2)

It will improve the codegen of idiom fcmp oeq (copysign C, X), +/-C.
Godbolt: https://godbolt.org/z/a8zPheTqT

@llvmbot
Copy link
Collaborator

llvmbot commented Mar 23, 2024

@llvm/pr-subscribers-llvm-transforms

Author: Yingwei Zheng (dtcxzyw)

Changes

This patch generalize the fold of fcmp + copysign:

fcmp pred (copysign C1, X), C2 --> select !signbit(X), (fcmp pred abs(C1), C2), (fcmp pred nabs(C1), C2)

It will improve the codegen of idiom fcmp oeq (copysign C, X), +/-C.
Godbolt: https://godbolt.org/z/a8zPheTqT


Full diff: https://github.com/llvm/llvm-project/pull/86387.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+10-10)
  • (modified) llvm/test/Transforms/InstCombine/fcmp.ll (+64-24)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index db302d7e526844..076452200fead5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -8101,22 +8101,22 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
   }
 
   // Convert a sign-bit test of an FP value into a cast and integer compare.
-  // TODO: Simplify if the copysign constant is 0.0 or NaN.
-  // TODO: Handle non-zero compare constants.
-  // TODO: Handle other predicates.
   if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::copysign>(m_APFloat(C),
                                                            m_Value(X)))) &&
-      match(Op1, m_AnyZeroFP()) && !C->isZero() && !C->isNaN()) {
+      match(Op1, m_ImmConstant(RHSC))) {
     Type *IntType = Builder.getIntNTy(X->getType()->getScalarSizeInBits());
     if (auto *VecTy = dyn_cast<VectorType>(OpType))
       IntType = VectorType::get(IntType, VecTy->getElementCount());
 
-    // copysign(non-zero constant, X) < 0.0 --> (bitcast X) < 0
-    if (Pred == FCmpInst::FCMP_OLT) {
-      Value *IntX = Builder.CreateBitCast(X, IntType);
-      return new ICmpInst(ICmpInst::ICMP_SLT, IntX,
-                          ConstantInt::getNullValue(IntType));
-    }
+    APFloat PosC = abs(*C);
+    if (Value *CmpPos = ConstantFoldCompareInstOperands(
+            Pred, ConstantFP::get(X->getType(), PosC), RHSC, DL, &TLI, &I))
+      if (Value *CmpNeg = ConstantFoldCompareInstOperands(
+              Pred, ConstantFP::get(X->getType(), -PosC), RHSC, DL, &TLI, &I)) {
+        Value *IntX = Builder.CreateBitCast(X, IntType);
+        Value *NotNeg = Builder.CreateIsNotNeg(IntX);
+        return SelectInst::Create(NotNeg, CmpPos, CmpNeg);
+      }
   }
 
   {
diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll
index f2701d16d0f3d1..cff533ce5e9cc8 100644
--- a/llvm/test/Transforms/InstCombine/fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/fcmp.ll
@@ -596,8 +596,8 @@ define i1 @is_signbit_set(double %x) {
 
 define i1 @is_signbit_set_1(double %x) {
 ; CHECK-LABEL: @is_signbit_set_1(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ult double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i64 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -607,8 +607,8 @@ define i1 @is_signbit_set_1(double %x) {
 
 define i1 @is_signbit_set_2(double %x) {
 ; CHECK-LABEL: @is_signbit_set_2(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ole double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i64 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -618,8 +618,8 @@ define i1 @is_signbit_set_2(double %x) {
 
 define i1 @is_signbit_set_3(double %x) {
 ; CHECK-LABEL: @is_signbit_set_3(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 1.000000e+00, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ule double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i64 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double 1.0, double %x)
@@ -640,12 +640,10 @@ define <2 x i1> @is_signbit_set_anyzero(<2 x double> %x) {
   ret <2 x i1> %r
 }
 
-; TODO: Handle different predicates.
-
 define i1 @is_signbit_clear(double %x) {
 ; CHECK-LABEL: @is_signbit_clear(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ogt double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -655,8 +653,8 @@ define i1 @is_signbit_clear(double %x) {
 
 define i1 @is_signbit_clear_1(double %x) {
 ; CHECK-LABEL: @is_signbit_clear_1(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ugt double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -666,8 +664,8 @@ define i1 @is_signbit_clear_1(double %x) {
 
 define i1 @is_signbit_clear_2(double %x) {
 ; CHECK-LABEL: @is_signbit_clear_2(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp oge double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -677,8 +675,8 @@ define i1 @is_signbit_clear_2(double %x) {
 
 define i1 @is_signbit_clear_3(double %x) {
 ; CHECK-LABEL: @is_signbit_clear_3(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp uge double [[S]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -701,12 +699,10 @@ define i1 @is_signbit_set_extra_use(double %x, ptr %p) {
   ret i1 %r
 }
 
-; TODO: Handle non-zero compare constant.
-
 define i1 @is_signbit_clear_nonzero(double %x) {
 ; CHECK-LABEL: @is_signbit_clear_nonzero(
-; CHECK-NEXT:    [[S:%.*]] = call double @llvm.copysign.f64(double 4.200000e+01, double [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp ogt double [[S]], 1.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i64 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = call double @llvm.copysign.f64(double -42.0, double %x)
@@ -714,8 +710,6 @@ define i1 @is_signbit_clear_nonzero(double %x) {
   ret i1 %r
 }
 
-; TODO: Handle zero copysign constant.
-
 define i1 @is_signbit_set_simplify_zero(double %x) {
 ; CHECK-LABEL: @is_signbit_set_simplify_zero(
 ; CHECK-NEXT:    ret i1 false
@@ -725,8 +719,6 @@ define i1 @is_signbit_set_simplify_zero(double %x) {
   ret i1 %r
 }
 
-; TODO: Handle NaN copysign constant.
-
 define i1 @is_signbit_set_simplify_nan(double %x) {
 ; CHECK-LABEL: @is_signbit_set_simplify_nan(
 ; CHECK-NEXT:    ret i1 false
@@ -736,6 +728,54 @@ define i1 @is_signbit_set_simplify_nan(double %x) {
   ret i1 %r
 }
 
+define i1 @test_oeq(float %a) {
+; CHECK-LABEL: @test_oeq(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %res = call float @llvm.copysign.f32(float 1.0, float %a)
+  %cmp = fcmp oeq float %res, 1.0
+  ret i1 %cmp
+}
+
+define i1 @test_one(float %a) {
+; CHECK-LABEL: @test_one(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %res = call float @llvm.copysign.f32(float 1.0, float %a)
+  %cmp = fcmp one float %res, 1.0
+  ret i1 %cmp
+}
+
+define i1 @test_ogt_false(float %a) {
+; CHECK-LABEL: @test_ogt_false(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %res = call float @llvm.copysign.f32(float 1.0, float %a)
+  %cmp = fcmp ogt float %res, 2.0
+  ret i1 %cmp
+}
+
+define i1 @test_olt_true(float %a) {
+; CHECK-LABEL: @test_olt_true(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %res = call float @llvm.copysign.f32(float 1.0, float %a)
+  %cmp = fcmp olt float %res, 2.0
+  ret i1 %cmp
+}
+
 define <2 x i1> @lossy_oeq(<2 x float> %x) {
 ; CHECK-LABEL: @lossy_oeq(
 ; CHECK-NEXT:    ret <2 x i1> zeroinitializer

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Mar 23, 2024
Copy link

✅ With the latest revision this PR passed the Python code formatter.

Copy link

✅ With the latest revision this PR passed the C/C++ code formatter.

Comment on lines +599 to +600
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
; CHECK-NEXT: [[R:%.*]] = icmp slt i64 [[TMP1]], 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that we currently don't try to track FP class / sign bit through bitcast, this form seems worse? Plus, if we had fast math flags this would necessarily lose them

Copy link
Member Author

@dtcxzyw dtcxzyw Apr 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

@arsenm arsenm Apr 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it returns an i1, that wouldn't really be any better. It still wouldn't be a candidate for tracking. If we returned the signbit encoded in the original FP type, it may be more useful (i.e., it returns 0 or -0)

llvm/test/Transforms/InstCombine/fcmp.ll Show resolved Hide resolved
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants