[DAGCombiner] Change foldAndOrOfSETCC() to optimize and/or patterns w…

…ith floating points. This reverts commit 48fa79a. Reviewed By: brooksmoses Differential Revision: https://reviews.llvm.org/D159240
llvm · Aug 31, 2023 · 17fc78e · 17fc78e
1 parent 7b33f60
commit 17fc78e
Show file tree

Hide file tree

Showing 6 changed files with 2,098 additions and 1,548 deletions.
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1544,6 +1544,12 @@ inline bool isIntEqualitySetCC(CondCode Code) {
   return Code == SETEQ || Code == SETNE;
 }
 
+/// Return true if this is a setcc instruction that performs an equality
+/// comparison when used with floating point operands.
+inline bool isFPEqualitySetCC(CondCode Code) {
+  return Code == SETOEQ || Code == SETONE || Code == SETUEQ || Code == SETUNE;
+}
+
 /// Return true if the specified condition returns true if the two operands to
 /// the condition are equal. Note that if one of the two operands is a NaN,
 /// this value is meaningless.

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6042,6 +6042,72 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
   return SDValue();
 }
 
+static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
+                                   SelectionDAG &DAG) {
+  return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
+}
+
+static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
+                                  SelectionDAG &DAG) {
+  return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
+}
+
+static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
+                                     ISD::CondCode CC, unsigned OrAndOpcode,
+                                     SelectionDAG &DAG,
+                                     bool isFMAXNUMFMINNUM_IEEE,
+                                     bool isFMAXNUMFMINNUM) {
+  // The optimization cannot be applied for all the predicates because
+  // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
+  // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
+  // applied at all if one of the operands is a signaling NaN.
+
+  // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
+  // are non NaN values.
+  if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
+      ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
+    return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
+                   isFMAXNUMFMINNUM_IEEE
+               ? ISD::FMINNUM_IEEE
+               : ISD::DELETED_NODE;
+  else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
+            (OrAndOpcode == ISD::OR)) ||
+           ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+            (OrAndOpcode == ISD::AND)))
+    return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
+                   isFMAXNUMFMINNUM_IEEE
+               ? ISD::FMAXNUM_IEEE
+               : ISD::DELETED_NODE;
+  // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
+  // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
+  // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
+  // that there are not any sNaNs, then the optimization is not valid
+  // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
+  // the optimization using FMINNUM/FMAXNUM for the following cases. If
+  // we can prove that we do not have any sNaNs, then we can do the
+  // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
+  // cases.
+  else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
+            (OrAndOpcode == ISD::OR)) ||
+           ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
+            (OrAndOpcode == ISD::AND)))
+    return isFMAXNUMFMINNUM ? ISD::FMINNUM
+                            : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
+                                      isFMAXNUMFMINNUM_IEEE
+                                  ? ISD::FMINNUM_IEEE
+                                  : ISD::DELETED_NODE;
+  else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
+            (OrAndOpcode == ISD::OR)) ||
+           ((CC == ISD::SETULT || CC == ISD::SETULE) &&
+            (OrAndOpcode == ISD::AND)))
+    return isFMAXNUMFMINNUM ? ISD::FMAXNUM
+                            : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
+                                      isFMAXNUMFMINNUM_IEEE
+                                  ? ISD::FMAXNUM_IEEE
+                                  : ISD::DELETED_NODE;
+  return ISD::DELETED_NODE;
+}
+
 static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
   using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
   assert(
@@ -6083,12 +6149,21 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
   // The optimization does not work for `==` or `!=` .
   // The two comparisons should have either the same predicate or the
   // predicate of one of the comparisons is the opposite of the other one.
-  if (OpVT.isInteger() && !ISD::isIntEqualitySetCC(CCL) &&
-      (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR)) &&
-      TLI.isOperationLegal(ISD::UMAX, OpVT) &&
-      TLI.isOperationLegal(ISD::SMAX, OpVT) &&
-      TLI.isOperationLegal(ISD::UMIN, OpVT) &&
-      TLI.isOperationLegal(ISD::SMIN, OpVT)) {
+  bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
+                               TLI.isOperationLegal(ISD::FMINNUM_IEEE, OpVT);
+  bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
+                          TLI.isOperationLegalOrCustom(ISD::FMINNUM, OpVT);
+  if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
+        TLI.isOperationLegal(ISD::SMAX, OpVT) &&
+        TLI.isOperationLegal(ISD::UMIN, OpVT) &&
+        TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
+       (OpVT.isFloatingPoint() &&
+        (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
+      !ISD::isIntEqualitySetCC(CCL) && !ISD::isFPEqualitySetCC(CCL) &&
+      CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
+      CCL != ISD::SETTRUE &&
+      (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
+
     SDValue CommonValue, Operand1, Operand2;
     ISD::CondCode CC = ISD::SETCC_INVALID;
     if (CCL == CCR) {
@@ -6126,19 +6201,26 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
       CC = ISD::SETCC_INVALID;
 
     if (CC != ISD::SETCC_INVALID) {
-      unsigned NewOpcode;
+      unsigned NewOpcode = ISD::DELETED_NODE;
       bool IsSigned = isSignedIntSetCC(CC);
-      bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
-                     CC == ISD::SETLT || CC == ISD::SETULT);
-      bool IsOr = (LogicOp->getOpcode() == ISD::OR);
-      if (IsLess == IsOr)
-        NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
-      else
-        NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
-
-      SDValue MinMaxValue =
-          DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
-      return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
+      if (OpVT.isInteger()) {
+        bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
+                       CC == ISD::SETLT || CC == ISD::SETULT);
+        bool IsOr = (LogicOp->getOpcode() == ISD::OR);
+        if (IsLess == IsOr)
+          NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
+        else
+          NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
+      } else if (OpVT.isFloatingPoint())
+        NewOpcode =
+            getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
+                                 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
+
+      if (NewOpcode != ISD::DELETED_NODE) {
+        SDValue MinMaxValue =
+            DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
+        return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
+      }
     }
   }
 

diff --git a/llvm/test/CodeGen/AArch64/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AArch64/combine_andor_with_cmps.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=neoverse-n1 -verify-machineinstrs < %s | FileCheck %s
+
+; The tests check the following optimization of DAGCombiner for AArch64:
+; CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
+; CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
+
+define i1 @test1(float %arg1, float %arg2, float %arg3) #0 {
+; CHECK-LABEL: test1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnm s0, s0, s1
+; CHECK-NEXT:    fcmp s0, s2
+; CHECK-NEXT:    cset w0, mi
+; CHECK-NEXT:    ret
+  %cmp1 = fcmp olt float %arg1, %arg3
+  %cmp2 = fcmp olt float %arg2, %arg3
+  %or1  = or i1 %cmp1, %cmp2
+  ret i1 %or1
+}
+
+define i1 @test2(double %arg1, double %arg2, double %arg3) #0 {
+; CHECK-LABEL: test2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnm d0, d0, d1
+; CHECK-NEXT:    fcmp d0, d2
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    ret
+  %cmp1 = fcmp ogt double %arg1, %arg3
+  %cmp2 = fcmp ogt double %arg2, %arg3
+  %or1  = or i1 %cmp1, %cmp2
+  ret i1 %or1
+}
+
+; It is illegal to apply the optimization in the following two test cases
+; because FMINNUM_IEEE and FMAXNUM_IEEE are not supported.
+
+define i1 @test3(float %arg1, float %arg2, float %arg3) {
+; CHECK-LABEL: test3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov s3, #1.00000000
+; CHECK-NEXT:    fadd s0, s0, s3
+; CHECK-NEXT:    fmov s3, #2.00000000
+; CHECK-NEXT:    fadd s1, s1, s3
+; CHECK-NEXT:    fcmp s1, s2
+; CHECK-NEXT:    fccmp s0, s2, #0, lt
+; CHECK-NEXT:    cset w0, lt
+; CHECK-NEXT:    ret
+  %add1 = fadd nnan float %arg1, 1.0
+  %add2 = fadd nnan float %arg2, 2.0
+  %cmp1 = fcmp nnan olt float %add1, %arg3
+  %cmp2 = fcmp nnan olt float %add2, %arg3
+  %or1  = and i1 %cmp1, %cmp2
+  ret i1 %or1
+}
+
+define i1 @test4(float %arg1, float %arg2, float %arg3) {
+; CHECK-LABEL: test4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov s3, #1.00000000
+; CHECK-NEXT:    fadd s0, s0, s3
+; CHECK-NEXT:    fmov s3, #2.00000000
+; CHECK-NEXT:    fadd s1, s1, s3
+; CHECK-NEXT:    fcmp s1, s2
+; CHECK-NEXT:    fccmp s0, s2, #4, gt
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    ret
+  %add1 = fadd nnan float %arg1, 1.0
+  %add2 = fadd nnan float %arg2, 2.0
+  %cmp1 = fcmp nnan ogt float %add1, %arg3
+  %cmp2 = fcmp nnan ogt float %add2, %arg3
+  %or1  = and i1 %cmp1, %cmp2
+  ret i1 %or1
+}
+