[PowerPC] Respect SDNodeFlags in lowering SELECT_CC

Legalizer should respect both command-line options or SDNode-level fast-math flags. Also, this patch propagates other flags during custom simplifying. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D79074
llvm · May 13, 2020 · e975382 · e975382
1 parent 5440d0a
commit e975382
Show file tree

Hide file tree

Showing 4 changed files with 113 additions and 155 deletions.
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7992,6 +7992,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
   SDLoc dl(Op);
+  SDNodeFlags Flags = Op.getNode()->getFlags();
 
   // We have xsmaxcdp/xsmincdp which are OK to emit even in the
   // presence of infinities.
@@ -8012,15 +8013,10 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   // general, fsel-based lowering of select is a finite-math-only optimization.
   // For more information, see section F.3 of the 2.06 ISA specification.
   // With ISA 3.0
-  if (!DAG.getTarget().Options.NoInfsFPMath ||
-      !DAG.getTarget().Options.NoNaNsFPMath)
+  if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
+      (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
     return Op;
 
-  // TODO: Propagate flags from the select rather than global settings.
-  SDNodeFlags Flags;
-  Flags.setNoInfs(true);
-  Flags.setNoNaNs(true);
-
   // If the RHS of the comparison is a 0.0, we don't need to do the
   // subtraction at all.
   SDValue Sel1;

diff --git a/llvm/test/CodeGen/PowerPC/scalar-equal.ll b/llvm/test/CodeGen/PowerPC/scalar-equal.ll
@@ -51,6 +51,44 @@ define double @testoeq(double %a, double %b, double %c, double %d) {
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
 ; NO-FAST-P8-NEXT:    fmr f1, f4
 ; NO-FAST-P8-NEXT:    blr
+entry:
+  %cmp = fcmp oeq double %a, %b
+  %cond = select i1 %cmp, double %c, double %d
+  ret double %cond
+}
+
+define double @testoeq_fast(double %a, double %b, double %c, double %d) {
+; FAST-P8-LABEL: testoeq_fast:
+; FAST-P8:       # %bb.0: # %entry
+; FAST-P8-NEXT:    xssubdp f0, f1, f2
+; FAST-P8-NEXT:    xsnegdp f1, f0
+; FAST-P8-NEXT:    fsel f0, f0, f3, f4
+; FAST-P8-NEXT:    fsel f1, f1, f0, f4
+; FAST-P8-NEXT:    blr
+;
+; FAST-P9-LABEL: testoeq_fast:
+; FAST-P9:       # %bb.0: # %entry
+; FAST-P9-NEXT:    xssubdp f0, f1, f2
+; FAST-P9-NEXT:    fsel f1, f0, f3, f4
+; FAST-P9-NEXT:    xsnegdp f0, f0
+; FAST-P9-NEXT:    fsel f1, f0, f1, f4
+; FAST-P9-NEXT:    blr
+;
+; NO-FAST-P9-LABEL: testoeq_fast:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xssubdp f0, f1, f2
+; NO-FAST-P9-NEXT:    fsel f1, f0, f3, f4
+; NO-FAST-P9-NEXT:    xsnegdp f0, f0
+; NO-FAST-P9-NEXT:    fsel f1, f0, f1, f4
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testoeq_fast:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xssubdp f0, f1, f2
+; NO-FAST-P8-NEXT:    xsnegdp f1, f0
+; NO-FAST-P8-NEXT:    fsel f0, f0, f3, f4
+; NO-FAST-P8-NEXT:    fsel f1, f1, f0, f4
+; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp fast oeq double %a, %b
   %cond = select fast i1 %cmp, double %c, double %d

diff --git a/llvm/test/CodeGen/PowerPC/scalar-min-max.ll b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
@@ -122,10 +122,8 @@ define dso_local float @testfmax_fast(float %a, float %b) local_unnamed_addr {
 ;
 ; NO-FAST-P8-LABEL: testfmax_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
-; NO-FAST-P8-NEXT:    bgtlr cr0
-; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f2
+; NO-FAST-P8-NEXT:    xssubsp f0, f2, f1
+; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp fast ogt float %a, %b
@@ -145,10 +143,8 @@ define dso_local double @testdmax_fast(double %a, double %b) local_unnamed_addr
 ;
 ; NO-FAST-P8-LABEL: testdmax_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xscmpudp cr0, f1, f2
-; NO-FAST-P8-NEXT:    bgtlr cr0
-; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f2
+; NO-FAST-P8-NEXT:    xssubdp f0, f2, f1
+; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp fast ogt double %a, %b
@@ -168,10 +164,8 @@ define dso_local float @testfmin_fast(float %a, float %b) local_unnamed_addr {
 ;
 ; NO-FAST-P8-LABEL: testfmin_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
-; NO-FAST-P8-NEXT:    bltlr cr0
-; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f2
+; NO-FAST-P8-NEXT:    xssubsp f0, f1, f2
+; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp fast olt float %a, %b
@@ -191,10 +185,8 @@ define dso_local double @testdmin_fast(double %a, double %b) local_unnamed_addr
 ;
 ; NO-FAST-P8-LABEL: testdmin_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xscmpudp cr0, f1, f2
-; NO-FAST-P8-NEXT:    bltlr cr0
-; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f2
+; NO-FAST-P8-NEXT:    xssubdp f0, f1, f2
+; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp fast olt double %a, %b