diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 02b20b3ae5301..931a10b700c87 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -13783,10 +13783,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef Mask, // so prevents folding a load into this instruction or making a copy. const int UnpackLoMask[] = {0, 0, 1, 1}; const int UnpackHiMask[] = {2, 2, 3, 3}; - if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) - Mask = UnpackLoMask; - else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) - Mask = UnpackHiMask; + if (!isSingleElementRepeatedMask(Mask)) { + if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) + Mask = UnpackLoMask; + else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) + Mask = UnpackHiMask; + } return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll index dec829fed3535..44cf4e897d626 100644 --- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll +++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll @@ -911,7 +911,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 16(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: paddb (%rdx), %xmm0 ; SSE2-NEXT: movdqa %xmm0, (%rcx) @@ -1898,7 +1898,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 32(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] @@ -4155,7 +4155,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 48(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE2-NEXT: paddb (%rdx), %xmm2 diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll index 3d4cddbb94c7b..89b5c33e3f27b 100644 --- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll +++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll @@ -769,7 +769,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in. ; SSE2-LABEL: vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: paddb (%rsi), %xmm1 ; SSE2-NEXT: movdqa %xmm1, (%rdx) @@ -1522,7 +1522,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in. ; SSE2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] @@ -3335,7 +3335,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in. ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa (%rdi), %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE2-NEXT: paddb (%rsi), %xmm2 diff --git a/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll b/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll index ecd9435c84857..1766b4d1fbb6e 100644 --- a/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll +++ b/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll @@ -58,7 +58,7 @@ define <8 x float> @foo8(<8 x float> %v, ptr%p) nounwind { define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind { ; AVX2-LABEL: undef_splatmask: ; AVX2: # %bb.0: -; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2] ; AVX2-NEXT: retq %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> @@ -68,7 +68,7 @@ define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind { define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind { ; AVX2-LABEL: undef_splatmask2: ; AVX2: # %bb.0: -; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2] ; AVX2-NEXT: retq %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> @@ -78,7 +78,7 @@ define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind { define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind { ; AVX2-LABEL: undef_splatmask3: ; AVX2: # %bb.0: -; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2] ; AVX2-NEXT: retq %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> @@ -88,7 +88,7 @@ define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind { define <4 x i32> @undef_splatmask4(<4 x i32> %v, ptr %p) nounwind { ; AVX2-LABEL: undef_splatmask4: ; AVX2: # %bb.0: -; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,3,3] +; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,2,2] ; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3] ; AVX2-NEXT: vmovaps %xmm0, (%rdi) ; AVX2-NEXT: vmovaps %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll b/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll index 209d6a5a67100..93a692cb002e0 100644 --- a/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll @@ -1911,13 +1911,13 @@ define <2 x i64> @test_v2f64_ogt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmoval %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: ucomisd %xmm4, %xmm2 ; SSE-32-NEXT: cmoval %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2031,13 +2031,13 @@ define <2 x i64> @test_v2f64_oge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovael %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: ucomisd %xmm4, %xmm2 ; SSE-32-NEXT: cmovael %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2151,13 +2151,13 @@ define <2 x i64> @test_v2f64_olt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmoval %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: ucomisd %xmm2, %xmm4 ; SSE-32-NEXT: cmoval %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2269,13 +2269,13 @@ define <2 x i64> @test_v2f64_ole_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovael %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: ucomisd %xmm2, %xmm4 ; SSE-32-NEXT: cmovael %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2680,13 +2680,13 @@ define <2 x i64> @test_v2f64_ugt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovbl %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: ucomisd %xmm2, %xmm4 ; SSE-32-NEXT: cmovbl %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2798,13 +2798,13 @@ define <2 x i64> @test_v2f64_uge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovbel %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: ucomisd %xmm2, %xmm4 ; SSE-32-NEXT: cmovbel %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2916,13 +2916,13 @@ define <2 x i64> @test_v2f64_ult_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovbl %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: ucomisd %xmm4, %xmm2 ; SSE-32-NEXT: cmovbl %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -3036,13 +3036,13 @@ define <2 x i64> @test_v2f64_ule_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovbel %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: ucomisd %xmm4, %xmm2 ; SSE-32-NEXT: cmovbel %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll index 9ecc6296a844a..b378dce2b52fd 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll @@ -162,7 +162,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v2i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE2-NEXT: pslld $23, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -182,7 +182,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; SSE41-LABEL: splatvar_funnnel_v2i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: pslld $23, %xmm1 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -200,7 +200,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; AVX1-LABEL: splatvar_funnnel_v2i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 @@ -277,7 +277,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; @@ -289,7 +289,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; X86-SSE2-NEXT: pslld $23, %xmm1 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll index 322ebe22671e6..06ff7e77753a0 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll @@ -250,7 +250,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v2i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31] ; SSE2-NEXT: movdqa %xmm3, %xmm5 ; SSE2-NEXT: pandn %xmm4, %xmm5 @@ -286,7 +286,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; SSE41-LABEL: splatvar_funnnel_v2i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm3 = [31,31,31,31] ; SSE41-NEXT: movdqa %xmm2, %xmm4 ; SSE41-NEXT: pandn %xmm3, %xmm4 @@ -316,7 +316,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; AVX1-LABEL: splatvar_funnnel_v2i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31] ; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -423,7 +423,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31] ; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0 @@ -450,7 +450,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31] ; X86-SSE2-NEXT: movdqa %xmm3, %xmm5 ; X86-SSE2-NEXT: pandn %xmm4, %xmm5 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll index 178c02f384f9b..ef5ffe4959b9c 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll @@ -172,7 +172,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v2i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: psubd %xmm1, %xmm2 ; SSE2-NEXT: pslld $23, %xmm2 @@ -194,7 +194,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; SSE41-LABEL: splatvar_funnnel_v2i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: psubd %xmm1, %xmm2 ; SSE41-NEXT: pslld $23, %xmm2 @@ -214,7 +214,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; AVX1-LABEL: splatvar_funnnel_v2i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 @@ -293,7 +293,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0 @@ -309,7 +309,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; X86-SSE2-NEXT: pxor %xmm2, %xmm2 ; X86-SSE2-NEXT: psubd %xmm1, %xmm2 ; X86-SSE2-NEXT: pslld $23, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll index 372deb05e550c..2d8670a6d3f23 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll @@ -251,7 +251,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v2i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31] ; SSE2-NEXT: movdqa %xmm3, %xmm5 ; SSE2-NEXT: pand %xmm4, %xmm5 @@ -287,7 +287,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; SSE41-LABEL: splatvar_funnnel_v2i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm3 = [31,31,31,31] ; SSE41-NEXT: movdqa %xmm2, %xmm4 ; SSE41-NEXT: pand %xmm3, %xmm4 @@ -317,7 +317,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; AVX1-LABEL: splatvar_funnnel_v2i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31] ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -425,7 +425,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31] ; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 @@ -452,7 +452,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31] ; X86-SSE2-NEXT: movdqa %xmm3, %xmm5 ; X86-SSE2-NEXT: pand %xmm4, %xmm5 diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index f57efb40bf0e3..1e11ea97396da 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -1409,11 +1409,11 @@ define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) { ; X86-SSE2-NEXT: movzbl %al, %eax ; X86-SSE2-NEXT: negl %eax ; X86-SSE2-NEXT: movd %eax, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] ; X86-SSE2-NEXT: andl $1, %ecx ; X86-SSE2-NEXT: negl %ecx ; X86-SSE2-NEXT: movd %ecx, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X86-SSE2-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll index bd1a48ba5d6ec..7b0f1c9f8a660 100644 --- a/llvm/test/CodeGen/X86/vector-zext.ll +++ b/llvm/test/CodeGen/X86/vector-zext.ll @@ -2555,7 +2555,7 @@ entry: define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) { ; SSE2-LABEL: splatshuf_zext_v4i64: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movdqa %xmm0, %xmm1 @@ -2563,7 +2563,7 @@ define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) { ; ; SSSE3-LABEL: splatshuf_zext_v4i64: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSSE3-NEXT: pxor %xmm1, %xmm1 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: movdqa %xmm0, %xmm1 @@ -2571,7 +2571,7 @@ define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) { ; ; SSE41-LABEL: splatshuf_zext_v4i64: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: retq diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll index 19a31a6eca9bd..31ed74535b125 100644 --- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll +++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll @@ -911,7 +911,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 16(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: paddb (%rdx), %xmm0 ; SSE2-NEXT: movdqa %xmm0, (%rcx) @@ -1898,7 +1898,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 32(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] @@ -4610,7 +4610,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 48(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: pxor %xmm1, %xmm1 @@ -6544,7 +6544,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in. ; SSE2-NEXT: movdqa (%rdi), %xmm0 ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movdqa 16(%rdx), %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll index 239472c5cd1c1..5b4cdd2feca06 100644 --- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll +++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll @@ -769,7 +769,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in. ; SSE2-LABEL: vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: paddb (%rsi), %xmm1 ; SSE2-NEXT: movdqa %xmm1, (%rdx) @@ -1522,7 +1522,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in. ; SSE2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] @@ -3660,7 +3660,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in. ; SSE2-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; SSE2-NEXT: pxor %xmm0, %xmm0 @@ -5250,7 +5250,7 @@ define void @vec512_i16_widen_to_i256_factor16_broadcast_to_v2i256_factor2(ptr % define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.elt.ptr, ptr %out.vec.bias.ptr, ptr %out.vec.ptr) nounwind { ; SSE2-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movdqa 16(%rsi), %xmm1