Skip to content

Commit

Permalink
[X86] Improve lowering of (v2i64 (setgt X, -1)) on pre-SSE2 targets. …
Browse files Browse the repository at this point in the history
…Enable v2i64 in foldVectorXorShiftIntoCmp.

Similar to D72302 but for the canonical form for the opposite case. I've changed foldVectorXorShiftIntoCmp to form a target independent setcc node instead of PCMPGT now and enabled its for v2i64 on pre-SSE4.2 targets. The setcc should eventually get lowered to PCMPGT or the new v2i64 sequence.

Differential Revision: https://reviews.llvm.org/D72318
  • Loading branch information
topperc committed Jan 7, 2020
1 parent b937669 commit afa8211
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 13 deletions.
17 changes: 14 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -21597,6 +21597,17 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getBitcast(VT, Result);
}

if (!FlipSigns && !Invert && ISD::isBuildVectorAllOnes(Op1.getNode())) {
Op0 = DAG.getBitcast(MVT::v4i32, Op0);
Op1 = DAG.getConstant(-1, dl, MVT::v4i32);

SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
static const int MaskHi[] = { 1, 1, 3, 3 };
SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);

return DAG.getBitcast(VT, Result);
}

// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations. The lower
// compare is always unsigned.
Expand Down Expand Up @@ -40814,8 +40825,8 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
default: return SDValue();
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32: if (!Subtarget.hasSSE2()) return SDValue(); break;
case MVT::v2i64: if (!Subtarget.hasSSE42()) return SDValue(); break;
case MVT::v4i32:
case MVT::v2i64: if (!Subtarget.hasSSE2()) return SDValue(); break;
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
Expand All @@ -40839,7 +40850,7 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,

// Create a greater-than comparison against -1. We don't use the more obvious
// greater-than-or-equal-to-zero because SSE/AVX don't have that instruction.
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
return DAG.getSetCC(SDLoc(N), VT, Shift.getOperand(0), Ones, ISD::SETGT);
}

/// Detect patterns of truncation with unsigned saturation:
Expand Down
17 changes: 7 additions & 10 deletions llvm/test/CodeGen/X86/vector-pcmp.ll
Expand Up @@ -61,10 +61,9 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %x) {
define <2 x i64> @test_pcmpgtq(<2 x i64> %x) {
; SSE2-LABEL: test_pcmpgtq:
; SSE2: # %bb.0:
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: retq
;
; SSE42-LABEL: test_pcmpgtq:
Expand Down Expand Up @@ -187,13 +186,11 @@ define <8 x i32> @test_pcmpgtd_256(<8 x i32> %x) {
define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) {
; SSE2-LABEL: test_pcmpgtq_256:
; SSE2: # %bb.0:
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE2-NEXT: retq
;
; SSE42-LABEL: test_pcmpgtq_256:
Expand Down

0 comments on commit afa8211

Please sign in to comment.