Skip to content

Commit

Permalink
Revert rG6aa7cc037f2f95c237c1d82c523f8857fa3a10c3 - "[X86] LowerVecto…
Browse files Browse the repository at this point in the history
…rAllZero - add 512-bit support with AVX512 vptestnmd+kortestw patterns"

Reverted - I need to adjust the implementation so we can properly refactor it into a "LowerVectorAllEqual" function
  • Loading branch information
RKSimon committed Mar 23, 2023
1 parent 3d65cd4 commit 7fef15e
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 35 deletions.
13 changes: 2 additions & 11 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24192,23 +24192,14 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC,
DAG.getConstant(0, DL, IntVT));
}

// Split down to 128/256/512-bit vector.
unsigned TestSize =
Subtarget.useAVX512Regs() ? 512 : (Subtarget.hasAVX() ? 256 : 128);
// Split down to 128/256-bit vector.
unsigned TestSize = Subtarget.hasAVX() ? 256 : 128;
while (VT.getSizeInBits() > TestSize) {
auto Split = DAG.SplitVector(V, DL);
VT = Split.first.getValueType();
V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second);
}

bool UseKORTEST = Subtarget.useAVX512Regs();
if (UseKORTEST && VT.is512BitVector()) {
V = DAG.getBitcast(MVT::v16i32, MaskBits(V));
V = DAG.getSetCC(DL, MVT::v16i1, V,
getZeroVector(MVT::v16i32, Subtarget, DAG, DL), ISD::SETEQ);
return DAG.getNode(X86ISD::KORTEST, DL, MVT::i32, V, V);
}

bool UsePTEST = Subtarget.hasSSE41();
if (UsePTEST) {
MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
Expand Down
15 changes: 9 additions & 6 deletions llvm/test/CodeGen/X86/ptest.ll
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,9 @@ define i32 @veccond512(<16 x i32> %input) {
;
; AVX512-LABEL: veccond512:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vptest %ymm0, %ymm0
; AVX512-NEXT: je .LBB2_2
; AVX512-NEXT: # %bb.1: # %if-true-block
; AVX512-NEXT: xorl %eax, %eax
Expand Down Expand Up @@ -267,9 +268,10 @@ define i32 @vectest512(<16 x i32> %input) {
;
; AVX512-LABEL: vectest512:
; AVX512: # %bb.0:
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: xorl %eax, %eax
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: vptest %ymm0, %ymm0
; AVX512-NEXT: setne %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down Expand Up @@ -378,8 +380,9 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) {
; AVX512-LABEL: vecsel512:
; AVX512: # %bb.0:
; AVX512-NEXT: movl %edi, %eax
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vptest %ymm0, %ymm0
; AVX512-NEXT: cmovel %esi, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down
46 changes: 28 additions & 18 deletions llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,9 @@ define i1 @test_v8i64(<8 x i64> %a0) {
;
; AVX512-LABEL: test_v8i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vptest %ymm0, %ymm0
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down Expand Up @@ -168,8 +169,9 @@ define i1 @test_v16i64(<16 x i64> %a0) {
; AVX512-LABEL: test_v16i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vptest %ymm0, %ymm0
; AVX512-NEXT: setne %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down Expand Up @@ -296,8 +298,9 @@ define i1 @test_v16i32(<16 x i32> %a0) {
;
; AVX512-LABEL: test_v16i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vptest %ymm0, %ymm0
; AVX512-NEXT: setne %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down Expand Up @@ -359,8 +362,9 @@ define i1 @test_v32i32(<32 x i32> %a0) {
; AVX512-LABEL: test_v32i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vptest %ymm0, %ymm0
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down Expand Up @@ -506,8 +510,9 @@ define i1 @test_v32i16(<32 x i16> %a0) {
;
; AVX512-LABEL: test_v32i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vptest %ymm0, %ymm0
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down Expand Up @@ -569,8 +574,9 @@ define i1 @test_v64i16(<64 x i16> %a0) {
; AVX512-LABEL: test_v64i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vptest %ymm0, %ymm0
; AVX512-NEXT: setne %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down Expand Up @@ -735,8 +741,9 @@ define i1 @test_v64i8(<64 x i8> %a0) {
;
; AVX512-LABEL: test_v64i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vptest %ymm0, %ymm0
; AVX512-NEXT: setne %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down Expand Up @@ -798,8 +805,9 @@ define i1 @test_v128i8(<128 x i8> %a0) {
; AVX512-LABEL: test_v128i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vptest %ymm0, %ymm0
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down Expand Up @@ -1006,8 +1014,10 @@ define i1 @mask_v128i8(<128 x i8> %a0) {
; AVX512-LABEL: mask_v128i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
; AVX512-NEXT: vptest %ymm1, %ymm0
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down

0 comments on commit 7fef15e

Please sign in to comment.