diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e006388b6e928..e828fe4b9dd15 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24192,23 +24192,14 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC, DAG.getConstant(0, DL, IntVT)); } - // Split down to 128/256/512-bit vector. - unsigned TestSize = - Subtarget.useAVX512Regs() ? 512 : (Subtarget.hasAVX() ? 256 : 128); + // Split down to 128/256-bit vector. + unsigned TestSize = Subtarget.hasAVX() ? 256 : 128; while (VT.getSizeInBits() > TestSize) { auto Split = DAG.SplitVector(V, DL); VT = Split.first.getValueType(); V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second); } - bool UseKORTEST = Subtarget.useAVX512Regs(); - if (UseKORTEST && VT.is512BitVector()) { - V = DAG.getBitcast(MVT::v16i32, MaskBits(V)); - V = DAG.getSetCC(DL, MVT::v16i1, V, - getZeroVector(MVT::v16i32, Subtarget, DAG, DL), ISD::SETEQ); - return DAG.getNode(X86ISD::KORTEST, DL, MVT::i32, V, V); - } - bool UsePTEST = Subtarget.hasSSE41(); if (UsePTEST) { MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; diff --git a/llvm/test/CodeGen/X86/ptest.ll b/llvm/test/CodeGen/X86/ptest.ll index c417c5d15b874..066cbb6193317 100644 --- a/llvm/test/CodeGen/X86/ptest.ll +++ b/llvm/test/CodeGen/X86/ptest.ll @@ -148,8 +148,9 @@ define i32 @veccond512(<16 x i32> %input) { ; ; AVX512-LABEL: veccond512: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: je .LBB2_2 ; AVX512-NEXT: # %bb.1: # %if-true-block ; AVX512-NEXT: xorl %eax, %eax @@ -267,9 +268,10 @@ define i32 @vectest512(<16 x i32> %input) { ; ; AVX512-LABEL: vectest512: ; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -378,8 +380,9 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) { ; AVX512-LABEL: vecsel512: ; AVX512: # %bb.0: ; AVX512-NEXT: movl %edi, %eax -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: cmovel %esi, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll index 5d921c0aa2c62..fcb0ab6090398 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll @@ -105,8 +105,9 @@ define i1 @test_v8i64(<8 x i64> %a0) { ; ; AVX512-LABEL: test_v8i64: ; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -168,8 +169,9 @@ define i1 @test_v16i64(<16 x i64> %a0) { ; AVX512-LABEL: test_v16i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -296,8 +298,9 @@ define i1 @test_v16i32(<16 x i32> %a0) { ; ; AVX512-LABEL: test_v16i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -359,8 +362,9 @@ define i1 @test_v32i32(<32 x i32> %a0) { ; AVX512-LABEL: test_v32i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -506,8 +510,9 @@ define i1 @test_v32i16(<32 x i16> %a0) { ; ; AVX512-LABEL: test_v32i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -569,8 +574,9 @@ define i1 @test_v64i16(<64 x i16> %a0) { ; AVX512-LABEL: test_v64i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -735,8 +741,9 @@ define i1 @test_v64i8(<64 x i8> %a0) { ; ; AVX512-LABEL: test_v64i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -798,8 +805,9 @@ define i1 @test_v128i8(<128 x i8> %a0) { ; AVX512-LABEL: test_v128i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1006,8 +1014,10 @@ define i1 @mask_v128i8(<128 x i8> %a0) { ; AVX512-LABEL: mask_v128i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673] +; AVX512-NEXT: vptest %ymm1, %ymm0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq