diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5c9313d580142..fb8156b9ac455 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36400,6 +36400,23 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); return TLO.CombineTo(Op, Insert); } + // Vector blend by immediate. + case X86ISD::BLENDI: { + SDLoc DL(Op); + MVT ExtVT = VT.getSimpleVT(); + ExtVT = MVT::getVectorVT(ExtVT.getScalarType(), + ExtSizeInBits / ExtVT.getScalarSizeInBits()); + SDValue Ext0 = + extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits); + SDValue Ext1 = + extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits); + SDValue ExtOp = + TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1, Op.getOperand(2)); + SDValue UndefVec = TLO.DAG.getUNDEF(VT); + SDValue Insert = + insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); + return TLO.CombineTo(Op, Insert); + } } } diff --git a/llvm/test/CodeGen/X86/insertelement-ones.ll b/llvm/test/CodeGen/X86/insertelement-ones.ll index ab95e18faea19..9de1d8e37c6b7 100644 --- a/llvm/test/CodeGen/X86/insertelement-ones.ll +++ b/llvm/test/CodeGen/X86/insertelement-ones.ll @@ -290,18 +290,20 @@ define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) { ; ; AVX2-LABEL: insert_v16i16_x12345x789ABCDEx: ; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7],ymm1[8],ymm0[9,10,11,12,13],ymm1[14],ymm0[15] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] -; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7] +; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15] +; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-NEXT: retq ; ; AVX512F-LABEL: insert_v16i16_x12345x789ABCDEx: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7],ymm1[8],ymm0[9,10,11,12,13],ymm1[14],ymm0[15] -; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] -; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7] +; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15] +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: insert_v16i16_x12345x789ABCDEx: diff --git a/llvm/test/CodeGen/X86/pr31956.ll b/llvm/test/CodeGen/X86/pr31956.ll index 0845051bfd4ef..b8c8b9c129199 100644 --- a/llvm/test/CodeGen/X86/pr31956.ll +++ b/llvm/test/CodeGen/X86/pr31956.ll @@ -9,11 +9,10 @@ target triple = "x86_64-scei-ps4" define <4 x float> @foo() { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2,3,4,5,6,7] +; CHECK-NEXT: vmovaps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0],mem[0,2] ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1] -; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: %V = load <2 x float>, <2 x float>* @G1, align 8 diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll index 1c78d4e1a0cd5..dee266a93b5f6 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -1907,7 +1907,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX2-NEXT: retq ; @@ -1917,7 +1917,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm2 ; AVX512F-NEXT: vpor %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX512F-NEXT: retq ; @@ -1927,7 +1927,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: vpmullw %ymm2, %ymm0, %ymm2 ; AVX512VL-NEXT: vpor %ymm1, %ymm2, %ymm1 -; AVX512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX512VL-NEXT: retq ; @@ -1940,7 +1940,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm2 ; AVX512BW-NEXT: vpor %ymm1, %ymm2, %ymm1 -; AVX512BW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX512BW-NEXT: retq ; @@ -1953,7 +1953,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = ; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm2 ; AVX512VBMI2-NEXT: vpor %ymm1, %ymm2, %ymm1 -; AVX512VBMI2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; AVX512VBMI2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512VBMI2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX512VBMI2-NEXT: retq ; @@ -1992,7 +1992,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; XOPAVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm2 ; XOPAVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; XOPAVX2-NEXT: retq %res = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> ) diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll index e45428d8fff53..40706f415b946 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll @@ -1078,12 +1078,12 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwin ; AVX512F-NEXT: vpmulhuw %ymm4, %ymm3, %ymm3 ; AVX512F-NEXT: vpmullw %ymm4, %ymm2, %ymm5 ; AVX512F-NEXT: vpor %ymm3, %ymm5, %ymm3 -; AVX512F-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3,4,5,6,7],ymm2[8],ymm3[9,10,11,12,13,14,15] +; AVX512F-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3,4,5,6,7] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7] ; AVX512F-NEXT: vpmulhuw %ymm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm3 ; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1 -; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512F-NEXT: retq @@ -1096,12 +1096,12 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwin ; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm3, %ymm3 ; AVX512VL-NEXT: vpmullw %ymm4, %ymm2, %ymm5 ; AVX512VL-NEXT: vpor %ymm3, %ymm5, %ymm3 -; AVX512VL-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3,4,5,6,7],ymm2[8],ymm3[9,10,11,12,13,14,15] +; AVX512VL-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7] ; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm3 ; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1 -; AVX512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll index 6a7865e51d2b4..1c37b721b9c2a 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -523,13 +523,13 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind { ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = ; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vpblendw {{.*#+}} ymm4 = ymm1[0],ymm3[1,2,3,4,5,6,7],ymm1[8],ymm3[9,10,11,12,13,14,15] +; AVX512F-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] ; AVX512F-NEXT: vpmullw %ymm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpor %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2 -; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15] +; AVX512F-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0],xmm2[1,2,3,4,5,6,7] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] ; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0 @@ -541,13 +541,13 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind { ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = ; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 -; AVX512VL-NEXT: vpblendw {{.*#+}} ymm4 = ymm1[0],ymm3[1,2,3,4,5,6,7],ymm1[8],ymm3[9,10,11,12,13,14,15] +; AVX512VL-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] ; AVX512VL-NEXT: vpmullw %ymm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpor %ymm3, %ymm1, %ymm1 ; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2 -; AVX512VL-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15] +; AVX512VL-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0],xmm2[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] ; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll index 963cc54441195..4a1c3092acabe 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -1910,7 +1910,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 ; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15] +; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-NEXT: retq ; @@ -1920,7 +1920,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 ; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15] +; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX512F-NEXT: retq ; @@ -1930,7 +1930,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 ; AVX512VL-NEXT: vpmullw %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15] +; AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX512VL-NEXT: retq ; @@ -1943,7 +1943,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = ; AVX512BW-NEXT: vpsllvw %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: vpor %ymm2, %ymm0, %ymm0 -; AVX512BW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15] +; AVX512BW-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX512BW-NEXT: retq ; @@ -1956,7 +1956,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = ; AVX512VBMI2-NEXT: vpsllvw %zmm3, %zmm0, %zmm0 ; AVX512VBMI2-NEXT: vpor %ymm2, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15] +; AVX512VBMI2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; AVX512VBMI2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX512VBMI2-NEXT: retq ; @@ -1995,7 +1995,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; XOPAVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 ; XOPAVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15] +; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; XOPAVX2-NEXT: retq %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> ) diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll index 6df29e86ce35c..bbacb68ffa949 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -1066,12 +1066,12 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwin ; AVX512F-NEXT: vpmulhuw %ymm4, %ymm3, %ymm5 ; AVX512F-NEXT: vpmullw %ymm4, %ymm2, %ymm2 ; AVX512F-NEXT: vpor %ymm5, %ymm2, %ymm2 -; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm3[0],ymm2[1,2,3,4,5,6,7],ymm3[8],ymm2[9,10,11,12,13,14,15] +; AVX512F-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3,4,5,6,7] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] ; AVX512F-NEXT: vpmulhuw %ymm4, %ymm1, %ymm3 ; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15] +; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512F-NEXT: retq @@ -1084,12 +1084,12 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwin ; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm3, %ymm5 ; AVX512VL-NEXT: vpmullw %ymm4, %ymm2, %ymm2 ; AVX512VL-NEXT: vpor %ymm5, %ymm2, %ymm2 -; AVX512VL-NEXT: vpblendw {{.*#+}} ymm3 = ymm3[0],ymm2[1,2,3,4,5,6,7],ymm3[8],ymm2[9,10,11,12,13,14,15] +; AVX512VL-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] ; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm1, %ymm3 ; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15] +; AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll index d8d300f66c59c..4862a88e4bf74 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -543,13 +543,13 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind { ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = ; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vpblendw {{.*#+}} ymm4 = ymm1[0],ymm3[1,2,3,4,5,6,7],ymm1[8],ymm3[9,10,11,12,13,14,15] +; AVX512F-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; AVX512F-NEXT: vpmullw %ymm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpor %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2 -; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15] +; AVX512F-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0],xmm2[1,2,3,4,5,6,7] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] ; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0 @@ -561,13 +561,13 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind { ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = ; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 -; AVX512VL-NEXT: vpblendw {{.*#+}} ymm4 = ymm1[0],ymm3[1,2,3,4,5,6,7],ymm1[8],ymm3[9,10,11,12,13,14,15] +; AVX512VL-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; AVX512VL-NEXT: vpmullw %ymm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpor %ymm3, %ymm1, %ymm1 ; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2 -; AVX512VL-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15] +; AVX512VL-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0],xmm2[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] ; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll index 72537b87f5238..4315189d4133a 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -1184,9 +1184,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; AVX2-LABEL: constant_shift_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmulhw {{.*}}(%rip), %ymm0, %ymm1 -; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX2-NEXT: vpsraw $1, %xmm0, %xmm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3,4,5,6,7,8],ymm0[9],ymm2[10,11,12,13,14,15] +; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7] ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX2-NEXT: retq ; @@ -1247,9 +1247,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; X32-AVX2-LABEL: constant_shift_v16i16: ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: vpmulhw {{\.LCPI.*}}, %ymm0, %ymm1 -; X32-AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; X32-AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; X32-AVX2-NEXT: vpsraw $1, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3,4,5,6,7,8],ymm0[9],ymm2[10,11,12,13,14,15] +; X32-AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7] ; X32-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; X32-AVX2-NEXT: retl %shift = ashr <16 x i16> %a, diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll index 24395c9169f04..2e7bc7a0207cc 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -971,7 +971,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; AVX2-LABEL: constant_shift_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX2-NEXT: retq ; @@ -986,14 +986,14 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; XOPAVX2-LABEL: constant_shift_v16i16: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1 -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; XOPAVX2-NEXT: retq ; ; AVX512DQ-LABEL: constant_shift_v16i16: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1 -; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX512DQ-NEXT: retq ; @@ -1008,7 +1008,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; AVX512DQVL-LABEL: constant_shift_v16i16: ; AVX512DQVL: # %bb.0: ; AVX512DQVL-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1 -; AVX512DQVL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512DQVL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX512DQVL-NEXT: retq ; @@ -1029,7 +1029,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; X32-AVX2-LABEL: constant_shift_v16i16: ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: vpmulhuw {{\.LCPI.*}}, %ymm0, %ymm1 -; X32-AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; X32-AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; X32-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; X32-AVX2-NEXT: retl %shift = lshr <16 x i16> %a, diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll index 81ab84315ea85..78c33b2d19486 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll @@ -209,10 +209,10 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind { ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = ; AVX512DQ-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 -; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm3[1,2,3,4,5,6,7],ymm1[8],ymm3[9,10,11,12,13,14,15] +; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3,4,5,6,7] ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7] ; AVX512DQ-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2 -; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15] +; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7] ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll index 0208bc3c4ff00..4cdddb4391e4c 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -440,7 +440,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; AVX2-NEXT: retq @@ -463,7 +463,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0 ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq @@ -1490,7 +1490,7 @@ define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_1 ; ; AVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15: ; AVX2: # %bb.0: -; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15] +; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-NEXT: retq ; @@ -1507,7 +1507,7 @@ define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_1 ; ; XOPAVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15] +; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -7277,9 +7277,9 @@ define <16 x i16> @PR34369(<16 x i16> %vec, <16 x i16> %mask) { ; AVX2-LABEL: PR34369: ; AVX2: # %bb.0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[8,9,10,11,4,5,10,11,8,9,10,11,4,5,4,5] +; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,10,11,u,u,u,u,u,u,4,5] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25] -; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6],ymm2[7],ymm0[8,9,10],ymm2[11],ymm0[12,13,14],ymm2[15] +; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1,2],xmm2[3],xmm0[4,5,6],xmm2[7] ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 @@ -7310,9 +7310,8 @@ define <16 x i16> @PR34369(<16 x i16> %vec, <16 x i16> %mask) { ; XOPAVX2-LABEL: PR34369: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 -; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[8,9,10,11,4,5,10,11,8,9,10,11,4,5,4,5] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6],ymm2[7],ymm0[8,9,10],ymm2[11],ymm0[12,13,14],ymm2[15] +; XOPAVX2-NEXT: vpperm {{.*#+}} xmm2 = xmm0[0,1,2,3,4,5],xmm2[10,11],xmm0[8,9,10,11,12,13],xmm2[4,5] ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index e9fc09668367c..1e1bf04258cf6 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -916,7 +916,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_ ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq @@ -946,7 +946,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_ ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq @@ -968,7 +968,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_ ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq @@ -998,7 +998,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_ ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] +; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq