Skip to content

Commit

Permalink
[X86] combineMOVMSK - fold movmsk(icmp_eq(and(x,c1),c1)) -> movmsk(sh…
Browse files Browse the repository at this point in the history
…l(x,c2)) iff pow2splat(c1)

We already have a similar fold for movmsk(icmp_eq(and(x,c1),0)) which we can probably merge this with, but it will involve generalizing a lot of the knownbits code
  • Loading branch information
RKSimon committed Apr 3, 2023
1 parent 39d7bf6 commit 6865cff
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 12 deletions.
26 changes: 26 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54537,6 +54537,32 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(NotMask, DL, VT));
}

// Fold movmsk(icmp_eq(and(x,c1),c1)) -> movmsk(shl(x,c2))
// iff pow2splat(c1).
// Use KnownBits to determine if only a single bit is non-zero
// in each element (pow2 or zero), and shift that bit to the msb.
// TODO: Merge with the movmsk(icmp_eq(and(x,c1),0)) fold below?
if (Src.getOpcode() == X86ISD::PCMPEQ &&
Src.getOperand(0).getOpcode() == ISD::AND &&
Src.getOperand(1) == Src.getOperand(0).getOperand(1)) {
KnownBits KnownSrc = DAG.computeKnownBits(Src.getOperand(1));
if (KnownSrc.countMaxPopulation() == 1) {
SDLoc DL(N);
MVT ShiftVT = SrcVT;
SDValue ShiftSrc = Src.getOperand(0);
if (ShiftVT.getScalarType() == MVT::i8) {
// vXi8 shifts - we only care about the signbit so can use PSLLW.
ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
ShiftSrc = DAG.getBitcast(ShiftVT, ShiftSrc);
}
unsigned ShiftAmt = KnownSrc.countMinLeadingZeros();
ShiftSrc = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT,
ShiftSrc, ShiftAmt, DAG);
ShiftSrc = DAG.getBitcast(SrcVT, ShiftSrc);
return DAG.getNode(X86ISD::MOVMSK, DL, VT, ShiftSrc);
}
}

// Fold movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2)))
// iff pow2splat(c1).
// Use KnownBits to determine if only a single bit is non-zero
Expand Down
16 changes: 4 additions & 12 deletions llvm/test/CodeGen/X86/bitcast-vector-bool.ll
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,7 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
define i1 @trunc_v4i32_cmp(<4 x i32> %a0) nounwind {
; SSE2-SSSE3-LABEL: trunc_v4i32_cmp:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pslld $31, %xmm0
; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
; SSE2-SSSE3-NEXT: xorl $15, %eax
; SSE2-SSSE3-NEXT: sete %al
Expand Down Expand Up @@ -263,9 +261,7 @@ define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
define i1 @trunc_v16i8_cmp(<16 x i8> %a0) nounwind {
; SSE2-SSSE3-LABEL: trunc_v16i8_cmp:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
; SSE2-SSSE3-NEXT: psllw $7, %xmm0
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: xorl $65535, %eax # imm = 0xFFFF
; SSE2-SSSE3-NEXT: setne %al
Expand Down Expand Up @@ -402,9 +398,7 @@ define i1 @trunc_v8i132_cmp(<8 x i32> %a0) nounwind {
; SSE2-SSSE3-LABEL: trunc_v8i132_cmp:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pslld $31, %xmm0
; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
; SSE2-SSSE3-NEXT: xorl $15, %eax
; SSE2-SSSE3-NEXT: setne %al
Expand Down Expand Up @@ -588,9 +582,7 @@ define i1 @trunc_v32i8_cmp(<32 x i8> %a0) nounwind {
; SSE2-SSSE3-LABEL: trunc_v32i8_cmp:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
; SSE2-SSSE3-NEXT: psllw $7, %xmm0
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: xorl $65535, %eax # imm = 0xFFFF
; SSE2-SSSE3-NEXT: sete %al
Expand Down

0 comments on commit 6865cff

Please sign in to comment.