InstCombine: Handle folding fcmp of 0 into llvm.is.fpclass

This needs to consider the denormal mode.
llvm · Mar 15, 2023 · 0d18f31 · 0d18f31
1 parent 4109e3f
commit 0d18f31
Show file tree

Hide file tree

Showing 3 changed files with 78 additions and 167 deletions.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1273,17 +1273,37 @@ static Value *matchIsFiniteTest(InstCombiner::BuilderTy &Builder, FCmpInst *LHS,
   return Builder.CreateFCmp(FCmpInst::getOrderedPredicate(PredR), RHS0, RHS1);
 }
 
+/// Return true if it's possible to assume IEEE treatment of input denormals in
+/// \p F for \p Val.
+static bool inputDenormalIsIEEE(const Function &F, const Value *Val) {
+  Type *Ty = Val->getType()->getScalarType();
+  return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
+}
+
 /// Returns a pair of values, which if passed to llvm.is.fpclass, returns the
 /// same result as an fcmp with the given operands.
 static std::pair<Value *, unsigned> fcmpToClassTest(FCmpInst::Predicate Pred,
+                                                    const Function &F,
                                                     Value *LHS, Value *RHS) {
   const APFloat *ConstRHS;
   if (!match(RHS, m_APFloat(ConstRHS)))
     return {nullptr, 0};
 
   if (ConstRHS->isZero()) {
+    // Compares with 0 are only exactly equal to fcZero if input denormals are
+    // not flushed.
+    if (FCmpInst::isEquality(Pred) && !inputDenormalIsIEEE(F, LHS))
+      return {nullptr, 0};
+
     switch (Pred) {
-    // TODO: Compares eq/ne with 0 depends on the denormal handling mode.
+    case FCmpInst::FCMP_OEQ: // Match x == 0.0
+      return {LHS, fcZero};
+    case FCmpInst::FCMP_UEQ: // Match isnan(x) || (x == 0.0)
+      return {LHS, fcZero | fcNan};
+    case FCmpInst::FCMP_UNE: // Match (x != 0.0)
+      return {LHS, ~fcZero & fcAllFlags};
+    case FCmpInst::FCMP_ONE: // Match !isnan(x) && x != 0.0
+      return {LHS, ~fcNan & ~fcZero & fcAllFlags};
     case FCmpInst::FCMP_ORD:
       // Canonical form of ord/uno is with a zero. We could also handle
       // non-canonical other non-NaN constants or LHS == RHS.
@@ -1501,9 +1521,11 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
   // potentially eliminate 4-6 instructions. If we can represent a test with a
   // single fcmp with fneg and fabs, that's likely a better canonical form.
   if (LHS->hasOneUse() && RHS->hasOneUse()) {
-    auto [ClassValRHS, ClassMaskRHS] = fcmpToClassTest(PredR, RHS0, RHS1);
+    auto [ClassValRHS, ClassMaskRHS] =
+        fcmpToClassTest(PredR, *RHS->getFunction(), RHS0, RHS1);
     if (ClassValRHS) {
-      auto [ClassValLHS, ClassMaskLHS] = fcmpToClassTest(PredL, LHS0, LHS1);
+      auto [ClassValLHS, ClassMaskLHS] =
+          fcmpToClassTest(PredL, *LHS->getFunction(), LHS0, LHS1);
       if (ClassValLHS == ClassValRHS) {
         unsigned CombinedMask = IsAnd ? (ClassMaskLHS & ClassMaskRHS)
                                       : (ClassMaskLHS | ClassMaskRHS);
@@ -1525,8 +1547,9 @@ static bool matchIsFPClassLikeFCmp(Value *Op, Value *&ClassVal,
   if (!FCmp || !FCmp->hasOneUse())
     return false;
 
-  std::tie(ClassVal, ClassMask) = fcmpToClassTest(
-      FCmp->getPredicate(), FCmp->getOperand(0), FCmp->getOperand(1));
+  std::tie(ClassVal, ClassMask) =
+      fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
+                      FCmp->getOperand(0), FCmp->getOperand(1));
   return ClassVal != nullptr;
 }
 

diff --git a/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll b/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll
@@ -211,10 +211,8 @@ define <2 x i1> @class_finite_or_fcmp_issubnormal_vector(<2 x half> %x) {
 
 define i1 @fcmp_oeq_zero_or_class_normal(half %x) {
 ; CHECK-LABEL: @fcmp_oeq_zero_or_class_normal(
-; CHECK-NEXT:    [[OEQ_INF:%.*]] = fcmp oeq half [[X:%.*]], 0xH0000
-; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[OEQ_INF]], [[CLASS]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 360)
+; CHECK-NEXT:    ret i1 [[CLASS]]
 ;
   %oeq.inf = fcmp oeq half %x, 0.0
   %class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
@@ -276,10 +274,8 @@ define <2 x i1> @fcmp_oeq_zero_or_class_normal_dynamic_v2f16(<2 x half> %x) #2 {
 
 define i1 @class_normal_or_fcmp_oeq_zero(half %x) {
 ; CHECK-LABEL: @class_normal_or_fcmp_oeq_zero(
-; CHECK-NEXT:    [[OEQ_INF:%.*]] = fcmp oeq half [[X:%.*]], 0xH0000
-; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[OEQ_INF]], [[CLASS]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 360)
+; CHECK-NEXT:    ret i1 [[CLASS]]
 ;
   %oeq.inf = fcmp oeq half %x, 0.0
   %class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
@@ -289,10 +285,8 @@ define i1 @class_normal_or_fcmp_oeq_zero(half %x) {
 
 define i1 @fcmp_ueq_zero_or_class_normal(half %x) {
 ; CHECK-LABEL: @fcmp_ueq_zero_or_class_normal(
-; CHECK-NEXT:    [[UEQ_INF:%.*]] = fcmp ueq half [[X:%.*]], 0xH0000
-; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[UEQ_INF]], [[CLASS]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 363)
+; CHECK-NEXT:    ret i1 [[CLASS]]
 ;
   %ueq.inf = fcmp ueq half %x, 0.0
   %class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
@@ -302,10 +296,8 @@ define i1 @fcmp_ueq_zero_or_class_normal(half %x) {
 
 define i1 @class_normal_or_fcmp_ueq_zero(half %x) {
 ; CHECK-LABEL: @class_normal_or_fcmp_ueq_zero(
-; CHECK-NEXT:    [[UEQ_INF:%.*]] = fcmp ueq half [[X:%.*]], 0xH0000
-; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[UEQ_INF]], [[CLASS]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 363)
+; CHECK-NEXT:    ret i1 [[CLASS]]
 ;
   %ueq.inf = fcmp ueq half %x, 0.0
   %class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
@@ -315,10 +307,8 @@ define i1 @class_normal_or_fcmp_ueq_zero(half %x) {
 
 define i1 @fcmp_one_zero_or_class_normal(half %x) {
 ; CHECK-LABEL: @fcmp_one_zero_or_class_normal(
-; CHECK-NEXT:    [[ONE_INF:%.*]] = fcmp one half [[X:%.*]], 0xH0000
-; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[ONE_INF]], [[CLASS]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 924)
+; CHECK-NEXT:    ret i1 [[CLASS]]
 ;
   %one.inf = fcmp one half %x, 0.0
   %class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
@@ -354,10 +344,8 @@ define i1 @fcmp_one_zero_or_class_normal_dynamic(half %x) #2 {
 
 define i1 @class_normal_or_fcmp_one_zero(half %x) {
 ; CHECK-LABEL: @class_normal_or_fcmp_one_zero(
-; CHECK-NEXT:    [[ONE_INF:%.*]] = fcmp one half [[X:%.*]], 0xH0000
-; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[ONE_INF]], [[CLASS]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 924)
+; CHECK-NEXT:    ret i1 [[CLASS]]
 ;
   %one.inf = fcmp one half %x, 0.0
   %class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
@@ -367,10 +355,8 @@ define i1 @class_normal_or_fcmp_one_zero(half %x) {
 
 define i1 @fcmp_une_zero_or_class_normal(half %x) {
 ; CHECK-LABEL: @fcmp_une_zero_or_class_normal(
-; CHECK-NEXT:    [[UNE_INF:%.*]] = fcmp une half [[X:%.*]], 0xH0000
-; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[UNE_INF]], [[CLASS]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    [[CLASS:%.*]] = fcmp une half [[X:%.*]], 0xH0000
+; CHECK-NEXT:    ret i1 [[CLASS]]
 ;
   %une.inf = fcmp une half %x, 0.0
   %class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
@@ -380,10 +366,8 @@ define i1 @fcmp_une_zero_or_class_normal(half %x) {
 
 define i1 @class_normal_or_fcmp_une_zero(half %x) {
 ; CHECK-LABEL: @class_normal_or_fcmp_une_zero(
-; CHECK-NEXT:    [[UNE_INF:%.*]] = fcmp une half [[X:%.*]], 0xH0000
-; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[UNE_INF]], [[CLASS]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    [[CLASS:%.*]] = fcmp une half [[X:%.*]], 0xH0000
+; CHECK-NEXT:    ret i1 [[CLASS]]
 ;
   %une.inf = fcmp une half %x, 0.0
   %class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)