From cffbaa93b72b307904935c380f90d49d00c7ecdc Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 2 Jul 2019 17:51:02 +0000 Subject: [PATCH] [X86] Add patterns to select (scalar_to_vector (loadf32)) as (V)MOVSSrm instead of COPY_TO_REGCLASS + (V)MOVSSrm_alt. Similar for (V)MOVSD. Ultimately, I'd like to see about folding scalar_to_vector+load to vzload. Which would select as (V)MOVSSrm so this is closer to that. llvm-svn: 364948 --- llvm/lib/Target/X86/X86InstrAVX512.td | 4 + llvm/lib/Target/X86/X86InstrSSE.td | 29 +++++--- llvm/test/CodeGen/X86/avx2-masked-gather.ll | 6 +- llvm/test/CodeGen/X86/build-vector-512.ll | 8 +- llvm/test/CodeGen/X86/buildvec-insertvec.ll | 12 +-- .../X86/copysign-constant-magnitude.ll | 12 +-- llvm/test/CodeGen/X86/fp128-cast.ll | 2 +- llvm/test/CodeGen/X86/gather-addresses.ll | 30 ++++---- llvm/test/CodeGen/X86/half.ll | 14 ++-- .../X86/insert-into-constant-vector.ll | 4 +- llvm/test/CodeGen/X86/masked_expandload.ll | 2 +- llvm/test/CodeGen/X86/masked_load.ll | 8 +- .../X86/merge-consecutive-loads-128.ll | 18 ++--- llvm/test/CodeGen/X86/mmx-build-vector.ll | 4 +- llvm/test/CodeGen/X86/pr2656.ll | 13 ++-- llvm/test/CodeGen/X86/pr30430.ll | 74 +++++++------------ .../CodeGen/X86/select-of-fp-constants.ll | 2 +- llvm/test/CodeGen/X86/sse-fcopysign.ll | 20 ++--- llvm/test/CodeGen/X86/var-permute-128.ll | 10 +-- .../X86/vector-shuffle-variable-128.ll | 36 ++++----- .../X86/vector-shuffle-variable-256.ll | 40 +++++----- 21 files changed, 173 insertions(+), 175 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 2cdcb1e1f0e5d..b2dfc5129327a 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4312,6 +4312,10 @@ let Predicates = [HasAVX512, OptForSpeed] in { } let Predicates = [HasAVX512] in { + def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (VMOVSSZrm addr:$src)>; + def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (VMOVSDZrm addr:$src)>; // Represent the same patterns above but in the form they appear for // 256-bit types diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index c79cf7ade8872..56974c44b4d2b 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -263,6 +263,11 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { // Patterns let Predicates = [UseAVX] in { + def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (VMOVSSrm addr:$src)>; + def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (VMOVSDrm addr:$src)>; + // Represent the same patterns above but in the form they appear for // 256-bit types def : Pat<(v8f32 (X86vzload addr:$src)), @@ -290,17 +295,23 @@ let Predicates = [UseAVX, OptForSize] in { (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>; } -let Predicates = [UseSSE1] in { - let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in { - // Move scalar to XMM zero-extended, zeroing a VR128 then do a - // MOVSS to the lower bits. - def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>; - def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; - } +let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in { +// Move scalar to XMM zero-extended, zeroing a VR128 then do a +// MOVSS to the lower bits. +def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), + (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>; +def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), + (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; } +let Predicates = [UseSSE2] in +def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (MOVSDrm addr:$src)>; + +let Predicates = [UseSSE1] in +def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (MOVSSrm addr:$src)>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Move Aligned/Unaligned FP Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/avx2-masked-gather.ll b/llvm/test/CodeGen/X86/avx2-masked-gather.ll index 63b90aebe93e4..62a9105993fb3 100644 --- a/llvm/test/CodeGen/X86/avx2-masked-gather.ll +++ b/llvm/test/CodeGen/X86/avx2-masked-gather.ll @@ -494,9 +494,9 @@ define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, < ; NOGATHER-NEXT: je .LBB7_10 ; NOGATHER-NEXT: # %bb.9: # %cond.load10 ; NOGATHER-NEXT: vmovq %xmm2, %rax -; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm4 -; NOGATHER-NEXT: vblendps {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3] +; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3 +; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero +; NOGATHER-NEXT: vblendps {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3] ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; NOGATHER-NEXT: .LBB7_10: # %else11 ; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax diff --git a/llvm/test/CodeGen/X86/build-vector-512.ll b/llvm/test/CodeGen/X86/build-vector-512.ll index f4c2065e1cbc5..aba8b13db967b 100644 --- a/llvm/test/CodeGen/X86/build-vector-512.ll +++ b/llvm/test/CodeGen/X86/build-vector-512.ll @@ -39,8 +39,6 @@ define <16 x float> @test_buildvector_v16f32(float %a0, float %a1, float %a2, fl ; ; AVX-64-LABEL: test_buildvector_v16f32: ; AVX-64: # %bb.0: -; AVX-64-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero -; AVX-64-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero ; AVX-64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] ; AVX-64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3] ; AVX-64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0] @@ -48,10 +46,12 @@ define <16 x float> @test_buildvector_v16f32(float %a0, float %a1, float %a2, fl ; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] ; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] ; AVX-64-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 -; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm9[0],mem[0],xmm9[2,3] +; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] ; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] ; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] -; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm8[0],mem[0],xmm8[2,3] +; AVX-64-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] ; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] ; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0] ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 diff --git a/llvm/test/CodeGen/X86/buildvec-insertvec.ll b/llvm/test/CodeGen/X86/buildvec-insertvec.ll index 65b80175f106a..d39e60149b445 100644 --- a/llvm/test/CodeGen/X86/buildvec-insertvec.ll +++ b/llvm/test/CodeGen/X86/buildvec-insertvec.ll @@ -39,10 +39,10 @@ define <4 x float> @test_negative_zero_1(<4 x float> %A) { ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: xorps %xmm2, %xmm2 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] -; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: retq ; @@ -107,10 +107,10 @@ define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* % ; SSE2: # %bb.0: ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; @@ -136,8 +136,8 @@ define <4 x float> @test_buildvector_v4f32_partial_load(float %f0, float %f1, fl ; SSE2-LABEL: test_buildvector_v4f32_partial_load: ; SSE2: # %bb.0: ; SSE2-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll b/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll index aea7b35808791..2e39fb976c752 100644 --- a/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll +++ b/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll @@ -33,17 +33,17 @@ define double @mag_neg0_double(double %x) nounwind { ret double %y } -; CHECK: [[ONE3:L.+]]: -; CHECK-NEXT: .quad 4607182418800017408 ## double 1 ; CHECK: [[SIGNMASK3:L.+]]: ; CHECK-NEXT: .quad -9223372036854775808 ## double -0 ; CHECK-NEXT: .quad -9223372036854775808 ## double -0 +; CHECK: [[ONE3:L.+]]: +; CHECK-NEXT: .quad 4607182418800017408 ## double 1 define double @mag_pos1_double(double %x) nounwind { ; CHECK-LABEL: mag_pos1_double: ; CHECK: ## %bb.0: -; CHECK-NEXT: movsd [[ONE3]](%rip), %xmm1 ; CHECK-NEXT: andps [[SIGNMASK3]](%rip), %xmm0 +; CHECK-NEXT: movsd [[ONE3]](%rip), %xmm1 ; CHECK-NEXT: orps %xmm1, %xmm0 ; CHECK-NEXT: retq ; @@ -99,19 +99,19 @@ define float @mag_neg0_float(float %x) nounwind { ret float %y } -; CHECK: [[ONE7:L.+]]: -; CHECK-NEXT: .long 1065353216 ## float 1 ; CHECK: [[SIGNMASK7:L.+]]: ; CHECK-NEXT: .long 2147483648 ## float -0 ; CHECK-NEXT: .long 2147483648 ## float -0 ; CHECK-NEXT: .long 2147483648 ## float -0 ; CHECK-NEXT: .long 2147483648 ## float -0 +; CHECK: [[ONE7:L.+]]: +; CHECK-NEXT: .long 1065353216 ## float 1 define float @mag_pos1_float(float %x) nounwind { ; CHECK-LABEL: mag_pos1_float: ; CHECK: ## %bb.0: -; CHECK-NEXT: movss [[ONE7]](%rip), %xmm1 ; CHECK-NEXT: andps [[SIGNMASK7]](%rip), %xmm0 +; CHECK-NEXT: movss [[ONE7]](%rip), %xmm1 ; CHECK-NEXT: orps %xmm1, %xmm0 ; CHECK-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll index d299d5337c0b0..f58bee3e189e4 100644 --- a/llvm/test/CodeGen/X86/fp128-cast.ll +++ b/llvm/test/CodeGen/X86/fp128-cast.ll @@ -844,8 +844,8 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind { ; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: pushq %rax ; X64-NEXT: callq __trunctfdf2 -; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; X64-NEXT: andps {{.*}}(%rip), %xmm0 +; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; X64-NEXT: orps %xmm1, %xmm0 ; X64-NEXT: callq __extenddftf2 ; X64-NEXT: addq $8, %rsp diff --git a/llvm/test/CodeGen/X86/gather-addresses.ll b/llvm/test/CodeGen/X86/gather-addresses.ll index 6468523b3c4fe..d76027d402eeb 100644 --- a/llvm/test/CodeGen/X86/gather-addresses.ll +++ b/llvm/test/CodeGen/X86/gather-addresses.ll @@ -26,8 +26,8 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { ; LIN-SSE2-NEXT: movslq %edx, %rdx ; LIN-SSE2-NEXT: movslq %esi, %rsi ; LIN-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; LIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; LIN-SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; LIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; LIN-SSE2-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] ; LIN-SSE2-NEXT: retq ; @@ -42,10 +42,10 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { ; LIN-SSE4-NEXT: cltq ; LIN-SSE4-NEXT: movslq %ecx, %rcx ; LIN-SSE4-NEXT: movslq %edx, %rdx -; LIN-SSE4-NEXT: movslq %esi, %rsi ; LIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; LIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; LIN-SSE4-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; LIN-SSE4-NEXT: movslq %esi, %rax +; LIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; LIN-SSE4-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] ; LIN-SSE4-NEXT: retq ; @@ -60,13 +60,13 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { ; WIN-SSE2-NEXT: movd %xmm1, %r10d ; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] ; WIN-SSE2-NEXT: movd %xmm0, %edx -; WIN-SSE2-NEXT: movslq %r8d, %r11 +; WIN-SSE2-NEXT: movslq %r8d, %rax ; WIN-SSE2-NEXT: movslq %r9d, %r8 -; WIN-SSE2-NEXT: movslq %r10d, %rax +; WIN-SSE2-NEXT: movslq %r10d, %r9 ; WIN-SSE2-NEXT: movslq %edx, %rdx ; WIN-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; WIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; WIN-SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; WIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; WIN-SSE2-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] ; WIN-SSE2-NEXT: retq ; @@ -79,12 +79,12 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { ; WIN-SSE4-NEXT: pextrd $2, %xmm0, %r8d ; WIN-SSE4-NEXT: pextrd $3, %xmm0, %r9d ; WIN-SSE4-NEXT: cltq -; WIN-SSE4-NEXT: movslq %edx, %r10 -; WIN-SSE4-NEXT: movslq %r8d, %rdx -; WIN-SSE4-NEXT: movslq %r9d, %r8 +; WIN-SSE4-NEXT: movslq %edx, %rdx +; WIN-SSE4-NEXT: movslq %r8d, %r8 ; WIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; WIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; WIN-SSE4-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; WIN-SSE4-NEXT: movslq %r9d, %rax +; WIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; WIN-SSE4-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] ; WIN-SSE4-NEXT: retq ; @@ -97,13 +97,13 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { ; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; LIN32-NEXT: movdqa (%edx), %xmm0 ; LIN32-NEXT: pand (%ecx), %xmm0 -; LIN32-NEXT: movd %xmm0, %ecx -; LIN32-NEXT: pextrd $1, %xmm0, %edx -; LIN32-NEXT: pextrd $2, %xmm0, %esi -; LIN32-NEXT: pextrd $3, %xmm0, %edi +; LIN32-NEXT: pextrd $1, %xmm0, %ecx +; LIN32-NEXT: pextrd $2, %xmm0, %edx +; LIN32-NEXT: pextrd $3, %xmm0, %esi +; LIN32-NEXT: movd %xmm0, %edi ; LIN32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; LIN32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; LIN32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; LIN32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; LIN32-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] ; LIN32-NEXT: popl %esi ; LIN32-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll index f180bef412040..095dfa2b04a6c 100644 --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -431,18 +431,18 @@ define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { ; CHECK-I686-NEXT: pushl %esi ; CHECK-I686-NEXT: subl $56, %esp ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-I686-NEXT: movzwl 4(%esi), %eax +; CHECK-I686-NEXT: movzwl 2(%esi), %eax ; CHECK-I686-NEXT: movl %eax, (%esp) ; CHECK-I686-NEXT: calll __gnu_h2f_ieee ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill -; CHECK-I686-NEXT: movzwl 2(%esi), %eax +; CHECK-I686-NEXT: movzwl 4(%esi), %eax ; CHECK-I686-NEXT: movl %eax, (%esp) ; CHECK-I686-NEXT: calll __gnu_h2f_ieee ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill -; CHECK-I686-NEXT: movzwl (%esi), %eax +; CHECK-I686-NEXT: movzwl 6(%esi), %eax ; CHECK-I686-NEXT: movl %eax, (%esp) ; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: movzwl 6(%esi), %eax +; CHECK-I686-NEXT: movzwl (%esi), %eax ; CHECK-I686-NEXT: movl %eax, (%esp) ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload @@ -453,10 +453,10 @@ define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-I686-NEXT: addl $56, %esp ; CHECK-I686-NEXT: popl %esi diff --git a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll index 9d33a8bc44709..579d2aa7d4860 100644 --- a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll +++ b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll @@ -168,8 +168,8 @@ define <2 x i64> @elt0_v2i64(i64 %x) { define <4 x float> @elt1_v4f32(float %x) { ; X32SSE2-LABEL: elt1_v4f32: ; X32SSE2: # %bb.0: -; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0> +; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] ; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] ; X32SSE2-NEXT: retl @@ -305,8 +305,8 @@ define <8 x i32> @elt7_v8i32(i32 %x) { define <8 x float> @elt6_v8f32(float %x) { ; X32SSE2-LABEL: elt6_v8f32: ; X32SSE2: # %bb.0: -; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0> +; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] ; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] ; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0] diff --git a/llvm/test/CodeGen/X86/masked_expandload.ll b/llvm/test/CodeGen/X86/masked_expandload.ll index 0284329eaf069..4eb2f73e57289 100644 --- a/llvm/test/CodeGen/X86/masked_expandload.ll +++ b/llvm/test/CodeGen/X86/masked_expandload.ll @@ -1473,8 +1473,8 @@ define <16 x float> @expandload_v16f32_const(float* %base, <16 x float> %src0) { ; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm2 = xmm1[0],mem[0],xmm1[2,3] ; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] ; AVX1OR2-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm1[4,5,6,7] -; AVX1OR2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; AVX1OR2-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1OR2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; AVX1OR2-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3] ; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] ; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] diff --git a/llvm/test/CodeGen/X86/masked_load.ll b/llvm/test/CodeGen/X86/masked_load.ll index d28b783d4ea56..4fa837ce37201 100644 --- a/llvm/test/CodeGen/X86/masked_load.ll +++ b/llvm/test/CodeGen/X86/masked_load.ll @@ -6606,12 +6606,12 @@ define <4 x float> @mload_constmask_v4f32(<4 x float>* %addr, <4 x float> %dst) ; SSE2: ## %bb.0: ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE2-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] -; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[2,0] -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm1[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,0] ; SSE2-NEXT: retq ; ; SSE42-LABEL: mload_constmask_v4f32: @@ -7069,8 +7069,8 @@ define <8 x double> @mload_constmask_v8f64(<8 x double>* %addr, <8 x double> %ds define <4 x double> @mload_constmask_v4f64_undef_passthrough(<4 x double>* %addr) { ; SSE-LABEL: mload_constmask_v4f64_undef_passthrough: ; SSE: ## %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: movups (%rdi), %xmm0 +; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: retq ; ; AVX1OR2-LABEL: mload_constmask_v4f64_undef_passthrough: diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll index 679b57569b20f..32a8b7fd02d7d 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -286,10 +286,10 @@ define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline s ; X32-SSE1-LABEL: merge_4f32_f32_012u: ; X32-SSE1: # %bb.0: ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-SSE1-NEXT: retl ; @@ -335,10 +335,10 @@ define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline s ; X32-SSE1-LABEL: merge_4f32_f32_019u: ; X32-SSE1: # %bb.0: ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-SSE1-NEXT: retl ; @@ -1197,10 +1197,10 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable n ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-SSE1-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/mmx-build-vector.ll b/llvm/test/CodeGen/X86/mmx-build-vector.ll index f94dd050e59dc..dace3cdc7b23d 100644 --- a/llvm/test/CodeGen/X86/mmx-build-vector.ll +++ b/llvm/test/CodeGen/X86/mmx-build-vector.ll @@ -651,8 +651,8 @@ define void @build_v2f32_01(x86_mmx *%p0, float %a0, float %a1) nounwind { ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movdq2q %xmm1, %mm0 +; X86-SSE-NEXT: movdq2q %xmm0, %mm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movdq2q %xmm0, %mm1 ; X86-SSE-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] ; X86-SSE-NEXT: paddd %mm1, %mm1 diff --git a/llvm/test/CodeGen/X86/pr2656.ll b/llvm/test/CodeGen/X86/pr2656.ll index 53d1ea79f486d..e86f55b1521c2 100644 --- a/llvm/test/CodeGen/X86/pr2656.ll +++ b/llvm/test/CodeGen/X86/pr2656.ll @@ -17,15 +17,16 @@ define void @foo(%struct.anon* byval %p) nounwind { ; CHECK-LABEL: foo: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: subl $28, %esp -; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: movaps {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; CHECK-NEXT: xorps %xmm2, %xmm0 -; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 -; CHECK-NEXT: xorps %xmm2, %xmm1 +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: xorps %xmm0, %xmm1 ; CHECK-NEXT: cvtss2sd %xmm1, %xmm1 -; CHECK-NEXT: movsd %xmm1, {{[0-9]+}}(%esp) +; CHECK-NEXT: xorps %xmm0, %xmm2 +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: cvtss2sd %xmm2, %xmm0 ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movsd %xmm1, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $_.str, (%esp) ; CHECK-NEXT: calll _printf ; CHECK-NEXT: addl $28, %esp diff --git a/llvm/test/CodeGen/X86/pr30430.ll b/llvm/test/CodeGen/X86/pr30430.ll index 4422d408db4f9..4d40aa09eeab1 100644 --- a/llvm/test/CodeGen/X86/pr30430.ll +++ b/llvm/test/CodeGen/X86/pr30430.ll @@ -10,7 +10,7 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: andq $-64, %rsp -; CHECK-NEXT: subq $320, %rsp # imm = 0x140 +; CHECK-NEXT: subq $256, %rsp # imm = 0x100 ; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero @@ -61,59 +61,41 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float ; CHECK-NEXT: vmovss %xmm23, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: vmovaps %zmm21, %zmm0 -; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: vmovaps %zmm20, %zmm0 -; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; CHECK-NEXT: vmovaps %zmm22, %zmm1 +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] -; CHECK-NEXT: vmovaps %zmm23, %zmm1 +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] -; CHECK-NEXT: vmovaps %zmm17, %zmm1 -; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: vmovaps %zmm16, %zmm0 -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; CHECK-NEXT: vmovaps %zmm18, %zmm1 +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] +; CHECK-NEXT: # implicit-def: $ymm2 +; CHECK-NEXT: vmovaps %xmm1, %xmm2 +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2 +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] -; CHECK-NEXT: vmovaps %zmm19, %zmm1 +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] -; CHECK-NEXT: # implicit-def: $ymm1 -; CHECK-NEXT: vmovaps %xmm0, %xmm1 -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm4[0],xmm5[0],xmm4[2,3] -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm6[0],xmm0[3] -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm7[0] -; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 4-byte Reload -; CHECK-NEXT: # xmm4 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 4-byte Reload -; CHECK-NEXT: # xmm5 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[2,3] -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm4[0,1],xmm2[0],xmm4[3] -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[2,3] +; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] +; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[0] ; CHECK-NEXT: # implicit-def: $ymm3 -; CHECK-NEXT: vmovaps %xmm2, %xmm3 +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm3 ; CHECK-NEXT: # implicit-def: $zmm24 ; CHECK-NEXT: vmovaps %zmm3, %zmm24 -; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm24, %zmm24 +; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm24, %zmm24 ; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0 ; CHECK-NEXT: movq %rbp, %rsp diff --git a/llvm/test/CodeGen/X86/select-of-fp-constants.ll b/llvm/test/CodeGen/X86/select-of-fp-constants.ll index 9ab12bc89b1a9..cc9ac9e56cda7 100644 --- a/llvm/test/CodeGen/X86/select-of-fp-constants.ll +++ b/llvm/test/CodeGen/X86/select-of-fp-constants.ll @@ -76,9 +76,9 @@ define float @fcmp_select_fp_constants(float %x) nounwind readnone { ; ; X64_AVX2-LABEL: fcmp_select_fp_constants: ; X64_AVX2: # %bb.0: +; X64_AVX2-NEXT: vcmpneqss {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64_AVX2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X64_AVX2-NEXT: vcmpneqss {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 ; X64_AVX2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/sse-fcopysign.ll b/llvm/test/CodeGen/X86/sse-fcopysign.ll index 492c8735a2745..883fb5290f0e8 100644 --- a/llvm/test/CodeGen/X86/sse-fcopysign.ll +++ b/llvm/test/CodeGen/X86/sse-fcopysign.ll @@ -65,11 +65,11 @@ define float @int1(float %a, float %b) nounwind { ; X32: # %bb.0: ; X32-NEXT: pushl %eax ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: andps {{\.LCPI.*}}, %xmm0 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-NEXT: andps {{\.LCPI.*}}, %xmm1 -; X32-NEXT: andps {{\.LCPI.*}}, %xmm0 -; X32-NEXT: orps %xmm1, %xmm0 -; X32-NEXT: movss %xmm0, (%esp) +; X32-NEXT: orps %xmm0, %xmm1 +; X32-NEXT: movss %xmm1, (%esp) ; X32-NEXT: flds (%esp) ; X32-NEXT: popl %eax ; X32-NEXT: retl @@ -91,14 +91,14 @@ define double @int2(double %a, float %b, float %c) nounwind { ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $8, %esp -; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-NEXT: addss 20(%ebp), %xmm1 -; X32-NEXT: andps {{\.LCPI.*}}, %xmm0 -; X32-NEXT: cvtss2sd %xmm1, %xmm1 +; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: addss 20(%ebp), %xmm0 +; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; X32-NEXT: andps {{\.LCPI.*}}, %xmm1 -; X32-NEXT: orps %xmm0, %xmm1 -; X32-NEXT: movlps %xmm1, (%esp) +; X32-NEXT: cvtss2sd %xmm0, %xmm0 +; X32-NEXT: andps {{\.LCPI.*}}, %xmm0 +; X32-NEXT: orps %xmm1, %xmm0 +; X32-NEXT: movlps %xmm0, (%esp) ; X32-NEXT: fldl (%esp) ; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll index 3a1214edfc50a..a81ad74f8f506 100644 --- a/llvm/test/CodeGen/X86/var-permute-128.ll +++ b/llvm/test/CodeGen/X86/var-permute-128.ll @@ -436,15 +436,15 @@ define <4 x float> @var_shuffle_v4f32(<4 x float> %v, <4 x i32> %indices) nounwi ; SSE3-NEXT: movd %xmm1, %esi ; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE3-NEXT: andl $3, %eax -; SSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE3-NEXT: andl $3, %ecx -; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE3-NEXT: andl $3, %edx -; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE3-NEXT: andl $3, %esi +; SSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE3-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll b/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll index 1243768099929..18b97b195f74c 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll @@ -73,16 +73,16 @@ define <4 x float> @var_shuffle_v4f32_v4f32_xxxx_i32(<4 x float> %x, i32 %i0, i3 ; SSE2-NEXT: # kill: def $esi killed $esi def $rsi ; SSE2-NEXT: # kill: def $edi killed $edi def $rdi ; SSE2-NEXT: andl $3, %edi -; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: andl $3, %esi -; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: andl $3, %edx -; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: andl $3, %ecx +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; @@ -93,16 +93,16 @@ define <4 x float> @var_shuffle_v4f32_v4f32_xxxx_i32(<4 x float> %x, i32 %i0, i3 ; SSSE3-NEXT: # kill: def $esi killed $esi def $rsi ; SSSE3-NEXT: # kill: def $edi killed $edi def $rdi ; SSSE3-NEXT: andl $3, %edi -; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSSE3-NEXT: andl $3, %esi -; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: andl $3, %edx -; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSSE3-NEXT: andl $3, %ecx +; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSSE3-NEXT: retq ; @@ -113,11 +113,11 @@ define <4 x float> @var_shuffle_v4f32_v4f32_xxxx_i32(<4 x float> %x, i32 %i0, i3 ; SSE41-NEXT: # kill: def $esi killed $esi def $rsi ; SSE41-NEXT: # kill: def $edi killed $edi def $rdi ; SSE41-NEXT: andl $3, %edi -; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE41-NEXT: andl $3, %esi ; SSE41-NEXT: andl $3, %edx +; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE41-NEXT: andl $3, %ecx +; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] @@ -130,11 +130,11 @@ define <4 x float> @var_shuffle_v4f32_v4f32_xxxx_i32(<4 x float> %x, i32 %i0, i3 ; AVX-NEXT: # kill: def $esi killed $esi def $rsi ; AVX-NEXT: # kill: def $edi killed $edi def $rdi ; AVX-NEXT: andl $3, %edi -; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: andl $3, %esi ; AVX-NEXT: andl $3, %edx +; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AVX-NEXT: andl $3, %ecx +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] @@ -1222,10 +1222,10 @@ define <4 x float> @var_shuffle_v4f32_v4f32_x0yx_i32(<4 x float> %x, <4 x float> ; SSE-NEXT: andl $3, %edi ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) ; SSE-NEXT: andl $3, %edx -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: andl $3, %ecx ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -1239,10 +1239,10 @@ define <4 x float> @var_shuffle_v4f32_v4f32_x0yx_i32(<4 x float> %x, <4 x float> ; AVX-NEXT: andl $3, %edi ; AVX-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp) ; AVX-NEXT: andl $3, %edx -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AVX-NEXT: andl $3, %ecx ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] diff --git a/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll b/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll index 2c1b146fcebda..6a159ab3aea36 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll @@ -14,15 +14,15 @@ define <4 x double> @var_shuffle_v4f64_v4f64_xxxx_i64(<4 x double> %x, i64 %i0, ; ALL-NEXT: andq $-32, %rsp ; ALL-NEXT: subq $64, %rsp ; ALL-NEXT: andl $3, %esi +; ALL-NEXT: andl $3, %edi ; ALL-NEXT: andl $3, %ecx ; ALL-NEXT: andl $3, %edx -; ALL-NEXT: andl $3, %edi ; ALL-NEXT: vmovaps %ymm0, (%rsp) ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; ALL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; ALL-NEXT: movq %rbp, %rsp ; ALL-NEXT: popq %rbp ; ALL-NEXT: retq @@ -68,15 +68,15 @@ define <4 x double> @var_shuffle_v4f64_v2f64_xxxx_i64(<2 x double> %x, i64 %i0, ; ALL-LABEL: var_shuffle_v4f64_v2f64_xxxx_i64: ; ALL: # %bb.0: ; ALL-NEXT: andl $1, %esi +; ALL-NEXT: andl $1, %edi ; ALL-NEXT: andl $1, %ecx ; ALL-NEXT: andl $1, %edx -; ALL-NEXT: andl $1, %edi ; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; ALL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; ALL-NEXT: retq %x0 = extractelement <2 x double> %x, i64 %i0 %x1 = extractelement <2 x double> %x, i64 %i1 @@ -194,21 +194,21 @@ define <8 x float> @var_shuffle_v8f32_v8f32_xxxxxxxx_i32(<8 x float> %x, i32 %i0 ; ALL-NEXT: movl 16(%rbp), %eax ; ALL-NEXT: andl $7, %eax ; ALL-NEXT: andl $7, %edi -; ALL-NEXT: vmovaps %ymm0, (%rsp) -; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ALL-NEXT: andl $7, %esi ; ALL-NEXT: andl $7, %edx ; ALL-NEXT: andl $7, %ecx ; ALL-NEXT: andl $7, %r8d -; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; ALL-NEXT: vmovaps %ymm0, (%rsp) ; ALL-NEXT: andl $7, %r9d -; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] -; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] -; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] ; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] -; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] +; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] +; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; ALL-NEXT: movq %rbp, %rsp ; ALL-NEXT: popq %rbp ; ALL-NEXT: retq @@ -245,21 +245,21 @@ define <8 x float> @var_shuffle_v8f32_v4f32_xxxxxxxx_i32(<4 x float> %x, i32 %i0 ; ALL-NEXT: movl {{[0-9]+}}(%rsp), %eax ; ALL-NEXT: andl $3, %eax ; ALL-NEXT: andl $3, %edi -; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ALL-NEXT: andl $3, %esi ; ALL-NEXT: andl $3, %edx ; ALL-NEXT: andl $3, %ecx ; ALL-NEXT: andl $3, %r8d -; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; ALL-NEXT: andl $3, %r9d -; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] -; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] -; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] ; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] -; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] +; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] +; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; ALL-NEXT: retq %x0 = extractelement <4 x float> %x, i32 %i0 %x1 = extractelement <4 x float> %x, i32 %i1