Revert "Transform vector SET{LE/ULT/ULE} -> SETLT and SET{GE/UGT/UGE}…

… -> SETGT if possible" This reverts commit f3732c2.
llvm · Feb 15, 2023 · 8bd0e94 · 8bd0e94
1 parent 7adf26e
commit 8bd0e94
Show file tree

Hide file tree

Showing 6 changed files with 3,611 additions and 2,817 deletions.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24725,8 +24725,7 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
 /// incremented or decremented. If incrementing or decrementing would result in
 /// unsigned overflow or underflow or this is not a simple vector constant,
 /// return an empty value.
-static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc,
-                                    bool NSW) {
+static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc) {
   auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode());
   if (!BV)
     return SDValue();
@@ -24745,9 +24744,6 @@ static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc,
     const APInt &EltC = Elt->getAPIntValue();
     if ((IsInc && EltC.isMaxValue()) || (!IsInc && EltC.isZero()))
       return SDValue();
-    if (NSW && ((IsInc && EltC.isMaxSignedValue()) ||
-                (!IsInc && EltC.isMinSignedValue())))
-      return SDValue();
 
     NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT));
   }
@@ -24781,8 +24777,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
     // Only do this pre-AVX since vpcmp* is no longer destructive.
     if (Subtarget.hasAVX())
       return SDValue();
-    SDValue ULEOp1 =
-        incDecVectorConstant(Op1, DAG, /*IsInc*/ false, /*NSW*/ false);
+    SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/false);
     if (!ULEOp1)
       return SDValue();
     Op1 = ULEOp1;
@@ -24793,8 +24788,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
     // This is beneficial because materializing a constant 0 for the PCMPEQ is
     // probably cheaper than XOR+PCMPGT using 2 different vector constants:
     // cmpgt (xor X, SignMaskC) CmpC --> cmpeq (usubsat (CmpC+1), X), 0
-    SDValue UGEOp1 =
-        incDecVectorConstant(Op1, DAG, /*IsInc*/ true, /*NSW*/ false);
+    SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/true);
     if (!UGEOp1)
       return SDValue();
     Op1 = Op0;
@@ -25087,16 +25081,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
     // condition to avoid an invert.
     if (Cond == ISD::SETUGT) {
       // X > C --> X >= (C+1) --> X == umax(X, C+1)
-      if (SDValue UGTOp1 =
-              incDecVectorConstant(Op1, DAG, /*IsInc*/ true, /*NSW*/ false)) {
+      if (SDValue UGTOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/true)) {
         Op1 = UGTOp1;
         Cond = ISD::SETUGE;
       }
     }
     if (Cond == ISD::SETULT) {
       // X < C --> X <= (C-1) --> X == umin(X, C-1)
-      if (SDValue ULTOp1 =
-              incDecVectorConstant(Op1, DAG, /*IsInc*/ false, /*NSW*/ false)) {
+      if (SDValue ULTOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/false)) {
         Op1 = ULTOp1;
         Cond = ISD::SETULE;
       }
@@ -53897,25 +53889,6 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// If we have AVX512, but not BWI and this is a vXi16/vXi8 setcc, just
-/// pre-promote its result type since vXi1 vectors don't get promoted
-/// during type legalization.
-/// NOTE: The element count check is to ignore operand types that need to
-/// go through type promotion to a 128-bit vector.
-static SDValue truncateAVX512SetCCNoBWI(EVT VT, EVT OpVT, SDValue LHS,
-                                        SDValue RHS, ISD::CondCode CC, SDLoc DL,
-                                        SelectionDAG &DAG,
-                                        const X86Subtarget &Subtarget) {
-  if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
-      VT.getVectorElementType() == MVT::i1 &&
-      (OpVT.getVectorElementType() == MVT::i8 ||
-       OpVT.getVectorElementType() == MVT::i16)) {
-    SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC);
-    return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc);
-  }
-  return SDValue();
-}
-
 static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
                             TargetLowering::DAGCombinerInfo &DCI,
                             const X86Subtarget &Subtarget) {
@@ -54051,79 +54024,19 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // Try and make unsigned vector comparison signed. On pre AVX512 targets there
-  // only are unsigned comparisons (`PCMPGT`) and on AVX512 its often better to
-  // use `PCMPGT` if the result is mean to stay in a vector (and if its going to
-  // a mask, there are signed AVX512 comparisons).
-  if (VT.isVector() && OpVT.isVector() && OpVT.isInteger()) {
-    bool CanMakeSigned = false;
-    if (ISD::isUnsignedIntSetCC(CC)) {
-      KnownBits CmpKnown = KnownBits::commonBits(DAG.computeKnownBits(LHS),
-                                                 DAG.computeKnownBits(RHS));
-      // If we know LHS/RHS share the same sign bit at each element we can
-      // make this signed.
-      // NOTE: `computeKnownBits` on a vector type aggregates common bits
-      // across all lanes. So a pattern where the sign varies from lane to
-      // lane, but at each lane Sign(LHS) is known to equal Sign(RHS), will be
-      // missed. We could get around this by demanding each lane
-      // independently, but this isn't the most important optimization and
-      // that may eat into compile time.
-      CanMakeSigned =
-          CmpKnown.Zero.isSignBitSet() || CmpKnown.One.isSignBitSet();
-    }
-    if (CanMakeSigned || ISD::isSignedIntSetCC(CC)) {
-      SDValue LHSOut = LHS;
-      SDValue RHSOut = RHS;
-      ISD::CondCode NewCC = CC;
-      switch (CC) {
-      case ISD::SETGE:
-      case ISD::SETUGE:
-        if (SDValue NewLHS = incDecVectorConstant(LHS, DAG, /*IsInc*/ true,
-                                                  /*NSW*/ true))
-          LHSOut = NewLHS;
-        else if (SDValue NewRHS = incDecVectorConstant(
-                     RHS, DAG, /*IsInc*/ false, /*NSW*/ true))
-          RHSOut = NewRHS;
-        else
-          break;
-
-        [[fallthrough]];
-      case ISD::SETUGT:
-        NewCC = ISD::SETGT;
-        break;
-
-      case ISD::SETLE:
-      case ISD::SETULE:
-        if (SDValue NewLHS = incDecVectorConstant(LHS, DAG, /*IsInc*/ false,
-                                                  /*NSW*/ true))
-          LHSOut = NewLHS;
-        else if (SDValue NewRHS = incDecVectorConstant(RHS, DAG, /*IsInc*/ true,
-                                                       /*NSW*/ true))
-          RHSOut = NewRHS;
-        else
-          break;
-
-        [[fallthrough]];
-      case ISD::SETULT:
-        // Will be swapped to SETGT in LowerVSETCC*.
-        NewCC = ISD::SETLT;
-        break;
-      default:
-        break;
-      }
-      if (NewCC != CC) {
-        if (SDValue R = truncateAVX512SetCCNoBWI(VT, OpVT, LHSOut, RHSOut,
-                                                 NewCC, DL, DAG, Subtarget))
-          return R;
-        return DAG.getSetCC(DL, VT, LHSOut, RHSOut, NewCC);
-      }
-    }
+  // If we have AVX512, but not BWI and this is a vXi16/vXi8 setcc, just
+  // pre-promote its result type since vXi1 vectors don't get promoted
+  // during type legalization.
+  // NOTE: The element count check is to ignore operand types that need to
+  // go through type promotion to a 128-bit vector.
+  if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
+      VT.getVectorElementType() == MVT::i1 &&
+      (OpVT.getVectorElementType() == MVT::i8 ||
+       OpVT.getVectorElementType() == MVT::i16)) {
+    SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC);
+    return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc);
   }
 
-  if (SDValue R =
-          truncateAVX512SetCCNoBWI(VT, OpVT, LHS, RHS, CC, DL, DAG, Subtarget))
-    return R;
-
   // For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early
   // to avoid scalarization via legalization because v4i32 is not a legal type.
   if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 &&

diff --git a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
@@ -246,7 +246,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
 ; AVX512VL-NEXT:    vpand %xmm2, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    vpternlogd $200, %xmm1, %xmm2, %xmm0
-; AVX512VL-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; AVX512VL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
 ; AVX512VL-NEXT:    kshiftrw $1, %k0, %k1
 ; AVX512VL-NEXT:    kmovw %k1, %edx
 ; AVX512VL-NEXT:    kshiftrw $2, %k0, %k1

diff --git a/llvm/test/CodeGen/X86/vector-compare-simplify.ll b/llvm/test/CodeGen/X86/vector-compare-simplify.ll
@@ -36,7 +36,9 @@ define <4 x i32> @sgt_min(<4 x i32> %x) {
 define <4 x i32> @sle_min(<4 x i32> %x) {
 ; CHECK-LABEL: sle_min:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %cmp = icmp sle <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
   %r = sext <4 x i1> %cmp to <4 x i32>
@@ -78,7 +80,10 @@ define <4 x i32> @slt_max(<4 x i32> %x) {
 define <4 x i32> @sge_max(<4 x i32> %x) {
 ; CHECK-LABEL: sge_max:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
+; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
+; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %cmp = icmp sge <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
   %r = sext <4 x i1> %cmp to <4 x i32>
@@ -192,7 +197,10 @@ define <4 x i32> @slt_min_plus1(<4 x i32> %x) {
 define <4 x i32> @sge_min_plus1(<4 x i32> %x) {
 ; CHECK-LABEL: sge_min_plus1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [2147483649,2147483649,2147483649,2147483649]
+; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
+; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %cmp = icmp sge <4 x i32> %x, <i32 -2147483647, i32 -2147483647, i32 -2147483647, i32 -2147483647>
   %r = sext <4 x i1> %cmp to <4 x i32>
@@ -212,9 +220,9 @@ define <4 x i32> @sgt_max_minus1(<4 x i32> %x) {
 define <4 x i32> @sle_max_minus1(<4 x i32> %x) {
 ; CHECK-LABEL: sle_max_minus1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
-; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
-; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %cmp = icmp sle <4 x i32> %x, <i32 2147483646, i32 2147483646, i32 2147483646, i32 2147483646>
   %r = sext <4 x i1> %cmp to <4 x i32>