Skip to content

Commit

Permalink
Revert "Transform vector SET{LE/ULT/ULE} -> SETLT and SET{GE/UGT/UGE}…
Browse files Browse the repository at this point in the history
… -> SETGT if possible"

This reverts commit f3732c2.
  • Loading branch information
goldsteinn committed Feb 15, 2023
1 parent 7adf26e commit 8bd0e94
Show file tree
Hide file tree
Showing 6 changed files with 3,611 additions and 2,817 deletions.
119 changes: 16 additions & 103 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -24725,8 +24725,7 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
/// incremented or decremented. If incrementing or decrementing would result in
/// unsigned overflow or underflow or this is not a simple vector constant,
/// return an empty value.
static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc,
bool NSW) {
static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc) {
auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode());
if (!BV)
return SDValue();
Expand All @@ -24745,9 +24744,6 @@ static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc,
const APInt &EltC = Elt->getAPIntValue();
if ((IsInc && EltC.isMaxValue()) || (!IsInc && EltC.isZero()))
return SDValue();
if (NSW && ((IsInc && EltC.isMaxSignedValue()) ||
(!IsInc && EltC.isMinSignedValue())))
return SDValue();

NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT));
}
Expand Down Expand Up @@ -24781,8 +24777,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
// Only do this pre-AVX since vpcmp* is no longer destructive.
if (Subtarget.hasAVX())
return SDValue();
SDValue ULEOp1 =
incDecVectorConstant(Op1, DAG, /*IsInc*/ false, /*NSW*/ false);
SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/false);
if (!ULEOp1)
return SDValue();
Op1 = ULEOp1;
Expand All @@ -24793,8 +24788,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
// This is beneficial because materializing a constant 0 for the PCMPEQ is
// probably cheaper than XOR+PCMPGT using 2 different vector constants:
// cmpgt (xor X, SignMaskC) CmpC --> cmpeq (usubsat (CmpC+1), X), 0
SDValue UGEOp1 =
incDecVectorConstant(Op1, DAG, /*IsInc*/ true, /*NSW*/ false);
SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/true);
if (!UGEOp1)
return SDValue();
Op1 = Op0;
Expand Down Expand Up @@ -25087,16 +25081,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// condition to avoid an invert.
if (Cond == ISD::SETUGT) {
// X > C --> X >= (C+1) --> X == umax(X, C+1)
if (SDValue UGTOp1 =
incDecVectorConstant(Op1, DAG, /*IsInc*/ true, /*NSW*/ false)) {
if (SDValue UGTOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/true)) {
Op1 = UGTOp1;
Cond = ISD::SETUGE;
}
}
if (Cond == ISD::SETULT) {
// X < C --> X <= (C-1) --> X == umin(X, C-1)
if (SDValue ULTOp1 =
incDecVectorConstant(Op1, DAG, /*IsInc*/ false, /*NSW*/ false)) {
if (SDValue ULTOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/false)) {
Op1 = ULTOp1;
Cond = ISD::SETULE;
}
Expand Down Expand Up @@ -53897,25 +53889,6 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
return SDValue();
}

/// If we have AVX512, but not BWI and this is a vXi16/vXi8 setcc, just
/// pre-promote its result type since vXi1 vectors don't get promoted
/// during type legalization.
/// NOTE: The element count check is to ignore operand types that need to
/// go through type promotion to a 128-bit vector.
static SDValue truncateAVX512SetCCNoBWI(EVT VT, EVT OpVT, SDValue LHS,
SDValue RHS, ISD::CondCode CC, SDLoc DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
VT.getVectorElementType() == MVT::i1 &&
(OpVT.getVectorElementType() == MVT::i8 ||
OpVT.getVectorElementType() == MVT::i16)) {
SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC);
return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc);
}
return SDValue();
}

static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
Expand Down Expand Up @@ -54051,79 +54024,19 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
}
}

// Try and make unsigned vector comparison signed. On pre AVX512 targets there
// only are unsigned comparisons (`PCMPGT`) and on AVX512 its often better to
// use `PCMPGT` if the result is mean to stay in a vector (and if its going to
// a mask, there are signed AVX512 comparisons).
if (VT.isVector() && OpVT.isVector() && OpVT.isInteger()) {
bool CanMakeSigned = false;
if (ISD::isUnsignedIntSetCC(CC)) {
KnownBits CmpKnown = KnownBits::commonBits(DAG.computeKnownBits(LHS),
DAG.computeKnownBits(RHS));
// If we know LHS/RHS share the same sign bit at each element we can
// make this signed.
// NOTE: `computeKnownBits` on a vector type aggregates common bits
// across all lanes. So a pattern where the sign varies from lane to
// lane, but at each lane Sign(LHS) is known to equal Sign(RHS), will be
// missed. We could get around this by demanding each lane
// independently, but this isn't the most important optimization and
// that may eat into compile time.
CanMakeSigned =
CmpKnown.Zero.isSignBitSet() || CmpKnown.One.isSignBitSet();
}
if (CanMakeSigned || ISD::isSignedIntSetCC(CC)) {
SDValue LHSOut = LHS;
SDValue RHSOut = RHS;
ISD::CondCode NewCC = CC;
switch (CC) {
case ISD::SETGE:
case ISD::SETUGE:
if (SDValue NewLHS = incDecVectorConstant(LHS, DAG, /*IsInc*/ true,
/*NSW*/ true))
LHSOut = NewLHS;
else if (SDValue NewRHS = incDecVectorConstant(
RHS, DAG, /*IsInc*/ false, /*NSW*/ true))
RHSOut = NewRHS;
else
break;

[[fallthrough]];
case ISD::SETUGT:
NewCC = ISD::SETGT;
break;

case ISD::SETLE:
case ISD::SETULE:
if (SDValue NewLHS = incDecVectorConstant(LHS, DAG, /*IsInc*/ false,
/*NSW*/ true))
LHSOut = NewLHS;
else if (SDValue NewRHS = incDecVectorConstant(RHS, DAG, /*IsInc*/ true,
/*NSW*/ true))
RHSOut = NewRHS;
else
break;

[[fallthrough]];
case ISD::SETULT:
// Will be swapped to SETGT in LowerVSETCC*.
NewCC = ISD::SETLT;
break;
default:
break;
}
if (NewCC != CC) {
if (SDValue R = truncateAVX512SetCCNoBWI(VT, OpVT, LHSOut, RHSOut,
NewCC, DL, DAG, Subtarget))
return R;
return DAG.getSetCC(DL, VT, LHSOut, RHSOut, NewCC);
}
}
// If we have AVX512, but not BWI and this is a vXi16/vXi8 setcc, just
// pre-promote its result type since vXi1 vectors don't get promoted
// during type legalization.
// NOTE: The element count check is to ignore operand types that need to
// go through type promotion to a 128-bit vector.
if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
VT.getVectorElementType() == MVT::i1 &&
(OpVT.getVectorElementType() == MVT::i8 ||
OpVT.getVectorElementType() == MVT::i16)) {
SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC);
return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc);
}

if (SDValue R =
truncateAVX512SetCCNoBWI(VT, OpVT, LHS, RHS, CC, DL, DAG, Subtarget))
return R;

// For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early
// to avoid scalarization via legalization because v4i32 is not a legal type.
if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 &&
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
Expand Up @@ -246,7 +246,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogd $200, %xmm1, %xmm2, %xmm0
; AVX512VL-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
; AVX512VL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
; AVX512VL-NEXT: kshiftrw $1, %k0, %k1
; AVX512VL-NEXT: kmovw %k1, %edx
; AVX512VL-NEXT: kshiftrw $2, %k0, %k1
Expand Down
20 changes: 14 additions & 6 deletions llvm/test/CodeGen/X86/vector-compare-simplify.ll
Expand Up @@ -36,7 +36,9 @@ define <4 x i32> @sgt_min(<4 x i32> %x) {
define <4 x i32> @sle_min(<4 x i32> %x) {
; CHECK-LABEL: sle_min:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retq
%cmp = icmp sle <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
%r = sext <4 x i1> %cmp to <4 x i32>
Expand Down Expand Up @@ -78,7 +80,10 @@ define <4 x i32> @slt_max(<4 x i32> %x) {
define <4 x i32> @sge_max(<4 x i32> %x) {
; CHECK-LABEL: sge_max:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retq
%cmp = icmp sge <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%r = sext <4 x i1> %cmp to <4 x i32>
Expand Down Expand Up @@ -192,7 +197,10 @@ define <4 x i32> @slt_min_plus1(<4 x i32> %x) {
define <4 x i32> @sge_min_plus1(<4 x i32> %x) {
; CHECK-LABEL: sge_min_plus1:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483649,2147483649,2147483649,2147483649]
; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retq
%cmp = icmp sge <4 x i32> %x, <i32 -2147483647, i32 -2147483647, i32 -2147483647, i32 -2147483647>
%r = sext <4 x i1> %cmp to <4 x i32>
Expand All @@ -212,9 +220,9 @@ define <4 x i32> @sgt_max_minus1(<4 x i32> %x) {
define <4 x i32> @sle_max_minus1(<4 x i32> %x) {
; CHECK-LABEL: sle_max_minus1:
; CHECK: # %bb.0:
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retq
%cmp = icmp sle <4 x i32> %x, <i32 2147483646, i32 2147483646, i32 2147483646, i32 2147483646>
%r = sext <4 x i1> %cmp to <4 x i32>
Expand Down

0 comments on commit 8bd0e94

Please sign in to comment.