Skip to content

Commit

Permalink
[X86] Fold PACKSS(NOT(X),NOT(Y)) -> NOT(PACKSS(X,Y))
Browse files Browse the repository at this point in the history
  • Loading branch information
RKSimon committed Jul 14, 2023
1 parent f8ffd67 commit 720debc
Show file tree
Hide file tree
Showing 13 changed files with 378 additions and 533 deletions.
17 changes: 17 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49538,6 +49538,23 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineHorizOpWithShuffle(N, DAG, Subtarget))
return V;

// Try to fold PACKSS(NOT(X),NOT(Y)) -> NOT(PACKSS(X,Y)).
// Currently limit this to allsignbits cases only.
if (IsSigned &&
(N0.isUndef() || DAG.ComputeNumSignBits(N0) == SrcBitsPerElt) &&
(N1.isUndef() || DAG.ComputeNumSignBits(N1) == SrcBitsPerElt)) {
SDValue Not0 = N0.isUndef() ? N0 : IsNOT(N0, DAG);
SDValue Not1 = N1.isUndef() ? N1 : IsNOT(N1, DAG);
if (Not0 && Not1) {
SDLoc DL(N);
MVT SrcVT = N0.getSimpleValueType();
SDValue Pack =
DAG.getNode(X86ISD::PACKSS, DL, VT, DAG.getBitcast(SrcVT, Not0),
DAG.getBitcast(SrcVT, Not1));
return DAG.getNOT(DL, Pack, VT);
}
}

// Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
// truncate to create a larger truncate.
if (Subtarget.hasAVX512() &&
Expand Down
15 changes: 6 additions & 9 deletions llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -245,12 +245,10 @@ define <4 x i1> @illegal_abs_to_ne_and(<4 x i64> %x) {
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129]
; SSE41-NEXT: pcmpeqq %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
; SSE41-NEXT: pxor %xmm3, %xmm1
; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
; SSE41-NEXT: pxor %xmm3, %xmm2
; SSE41-NEXT: packssdw %xmm1, %xmm2
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm2, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: illegal_abs_to_ne_and:
Expand All @@ -267,13 +265,12 @@ define <4 x i1> @illegal_abs_to_ne_and(<4 x i64> %x) {
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
; SSE2-NEXT: pand %xmm1, %xmm3
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm1, %xmm3
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: packssdw %xmm3, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: retq
%abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true)
%cmp = icmp ne <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129>
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/X86/ispow2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -139,15 +139,14 @@ define <4 x i1> @neither_pow2_non_zero_4xv64(<4 x i64> %xin) {
; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm3
; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,0,3,2]
; CHECK-NOBMI-NEXT: pand %xmm3, %xmm4
; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm4
; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm3
; CHECK-NOBMI-NEXT: paddq %xmm2, %xmm3
; CHECK-NOBMI-NEXT: pand %xmm3, %xmm0
; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
; CHECK-NOBMI-NEXT: pand %xmm1, %xmm0
; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm0
; CHECK-NOBMI-NEXT: packssdw %xmm4, %xmm0
; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm0
; CHECK-NOBMI-NEXT: retq
;
; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64:
Expand Down

0 comments on commit 720debc

Please sign in to comment.