Skip to content

Commit

Permalink
[X86] Prefer vpternlog instead of blendv for vselect on masks.
Browse files Browse the repository at this point in the history
This rarely comes up because most `vselect` are lowered with actually
avx512 mask instructions, but is an improvement in the rare cases.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D145221
  • Loading branch information
goldsteinn committed Apr 4, 2023
1 parent 443825c commit 4b2be14
Show file tree
Hide file tree
Showing 6 changed files with 260 additions and 132 deletions.
21 changes: 15 additions & 6 deletions llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1034,18 +1034,27 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
break;
}
case ISD::VSELECT: {
// Replace VSELECT with non-mask conditions with with BLENDV.
if (N->getOperand(0).getValueType().getVectorElementType() == MVT::i1)
// Replace VSELECT with non-mask conditions with with BLENDV/VPTERNLOG.
EVT EleVT = N->getOperand(0).getValueType().getVectorElementType();
if (EleVT == MVT::i1)
break;

assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
assert(N->getValueType(0).getVectorElementType() != MVT::i16 &&
"We can't replace VSELECT with BLENDV in vXi16!");
SDValue Blendv =
CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0),
N->getOperand(0), N->getOperand(1), N->getOperand(2));
SDValue R;
if (Subtarget->hasVLX() && CurDAG->ComputeNumSignBits(N->getOperand(0)) ==
EleVT.getSizeInBits()) {
R = CurDAG->getNode(X86ISD::VPTERNLOG, SDLoc(N), N->getValueType(0),
N->getOperand(0), N->getOperand(1), N->getOperand(2),
CurDAG->getTargetConstant(0xCA, SDLoc(N), MVT::i8));
} else {
R = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0),
N->getOperand(0), N->getOperand(1),
N->getOperand(2));
}
--I;
CurDAG->ReplaceAllUsesWith(N, Blendv.getNode());
CurDAG->ReplaceAllUsesWith(N, R.getNode());
++I;
MadeChange = true;
continue;
Expand Down
52 changes: 26 additions & 26 deletions llvm/test/CodeGen/X86/avx512-gfni-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineinvqb_128(<16 x i8>
; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
; X86NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
; X86NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
; X86NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50]
; X86NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
; X86NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86NOBW-NEXT: retl # encoding: [0xc3]
Expand All @@ -49,7 +49,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineinvqb_128(<16 x i8>
; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
; X64NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
; X64NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
; X64NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50]
; X64NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
; X64NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64NOBW-NEXT: retq # encoding: [0xc3]
Expand Down Expand Up @@ -100,7 +100,7 @@ define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineinvqb_256(<32 x i8>
; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
; X86NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
; X86NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
; X86NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50]
; X86NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
; X86NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
; X86NOBW-NEXT: retl # encoding: [0xc3]
;
Expand All @@ -118,7 +118,7 @@ define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineinvqb_256(<32 x i8>
; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
; X64NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
; X64NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
; X64NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50]
; X64NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
; X64NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i32 %mask to <32 x i1>
Expand Down Expand Up @@ -252,7 +252,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineqb_128(<16 x i8> %s
; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
; X86NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
; X86NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
; X86NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50]
; X86NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
; X86NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86NOBW-NEXT: retl # encoding: [0xc3]
Expand All @@ -266,7 +266,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineqb_128(<16 x i8> %s
; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
; X64NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
; X64NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
; X64NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50]
; X64NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
; X64NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64NOBW-NEXT: retq # encoding: [0xc3]
Expand Down Expand Up @@ -317,7 +317,7 @@ define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineqb_256(<32 x i8> %s
; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
; X86NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
; X86NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
; X86NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50]
; X86NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
; X86NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
; X86NOBW-NEXT: retl # encoding: [0xc3]
;
Expand All @@ -335,7 +335,7 @@ define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineqb_256(<32 x i8> %s
; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
; X64NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
; X64NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
; X64NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50]
; X64NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
; X64NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i32 %mask to <32 x i1>
Expand Down Expand Up @@ -466,20 +466,20 @@ define <16 x i8> @test_vgf2p8mulb_128_mask(<16 x i8> %src1, <16 x i8> %src2, <16
; X86NOBW-LABEL: test_vgf2p8mulb_128_mask:
; X86NOBW: # %bb.0:
; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1]
; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
; X86NOBW-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 # encoding: [0xc4,0xe3,0x69,0x4c,0xc0,0x10]
; X86NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc9]
; X86NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
; X86NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
; X86NOBW-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0xca]
; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86NOBW-NEXT: retl # encoding: [0xc3]
;
; X64NOBW-LABEL: test_vgf2p8mulb_128_mask:
; X64NOBW: # %bb.0:
; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1]
; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
; X64NOBW-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 # encoding: [0xc4,0xe3,0x69,0x4c,0xc0,0x10]
; X64NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc9]
; X64NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
; X64NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
; X64NOBW-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0xca]
; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i16 %mask to <16 x i1>
Expand Down Expand Up @@ -555,27 +555,27 @@ define <32 x i8> @test_vgf2p8mulb_256_mask(<32 x i8> %src1, <32 x i8> %src2, <32
; X86NOBW: # %bb.0:
; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
; X86NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1]
; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
; X86NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc9]
; X86NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
; X86NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
; X86NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
; X86NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
; X86NOBW-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcb,0x01]
; X86NOBW-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 # encoding: [0xc4,0xe3,0x6d,0x4c,0xc0,0x10]
; X86NOBW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01]
; X86NOBW-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0xca]
; X86NOBW-NEXT: retl # encoding: [0xc3]
;
; X64NOBW-LABEL: test_vgf2p8mulb_256_mask:
; X64NOBW: # %bb.0:
; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64NOBW-NEXT: shrl $16, %edi # encoding: [0xc1,0xef,0x10]
; X64NOBW-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
; X64NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1]
; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
; X64NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc9]
; X64NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
; X64NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
; X64NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
; X64NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
; X64NOBW-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcb,0x01]
; X64NOBW-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 # encoding: [0xc4,0xe3,0x6d,0x4c,0xc0,0x10]
; X64NOBW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01]
; X64NOBW-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0xca]
; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i32 %mask to <32 x i1>
%2 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2)
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/X86/var-permute-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -194,12 +194,12 @@ define <16 x i16> @var_shuffle_v16i16(<16 x i16> %v, <16 x i16> %indices) nounwi
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512VLDQ-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3]
; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm2, %ymm2
; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0
; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3
; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
; AVX512VLDQ-NEXT: vpternlogq $202, %ymm2, %ymm3, %ymm0
; AVX512VLDQ-NEXT: retq
;
; AVX512VLBW-LABEL: var_shuffle_v16i16:
Expand Down Expand Up @@ -313,9 +313,9 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3]
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm2, %ymm2
; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0
; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3
; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
; AVX512VLDQ-NEXT: vpternlogq $202, %ymm3, %ymm2, %ymm0
; AVX512VLDQ-NEXT: retq
;
; AVX512VLBW-LABEL: var_shuffle_v32i8:
Expand Down Expand Up @@ -739,11 +739,11 @@ define <16 x i16> @var_shuffle_v16i16_from_v8i16(<8 x i16> %v, <16 x i16> %indic
; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512VLDQ-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2
; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0
; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3
; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
; AVX512VLDQ-NEXT: vpternlogq $202, %ymm2, %ymm3, %ymm0
; AVX512VLDQ-NEXT: retq
;
; AVX512VLBW-LABEL: var_shuffle_v16i16_from_v8i16:
Expand Down Expand Up @@ -857,9 +857,9 @@ define <32 x i8> @var_shuffle_v32i8_from_v16i8(<16 x i8> %v, <32 x i8> %indices)
; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2
; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0
; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3
; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
; AVX512VLDQ-NEXT: vpternlogq $202, %ymm3, %ymm2, %ymm0
; AVX512VLDQ-NEXT: retq
;
; AVX512VLBW-LABEL: var_shuffle_v32i8_from_v16i8:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-shuffle-v1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ define <32 x i8> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512VL-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
; AVX512VL-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i8:
Expand Down Expand Up @@ -459,7 +459,7 @@ define <32 x i8> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512VL-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm0
; AVX512VL-NEXT: vpternlogq $202, %ymm3, %ymm2, %ymm0
; AVX512VL-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i8_split:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vselect-avx.ll
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ define void @test3(<4 x i32> %induction30, ptr %tmp16, ptr %tmp17, <4 x i16> %t
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vpmovdw %ymm0, %xmm0
; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm1
; AVX512-NEXT: vpternlogq $226, %xmm2, %xmm0, %xmm1
; AVX512-NEXT: vmovq %xmm0, (%rdi)
; AVX512-NEXT: vmovq %xmm1, (%rsi)
; AVX512-NEXT: vzeroupper
Expand Down
Loading

0 comments on commit 4b2be14

Please sign in to comment.