Skip to content

Commit

Permalink
[X86] Add patterns to select (scalar_to_vector (loadf32)) as (V)MOVSS…
Browse files Browse the repository at this point in the history
…rm instead of COPY_TO_REGCLASS + (V)MOVSSrm_alt.

Similar for (V)MOVSD. Ultimately, I'd like to see about folding
scalar_to_vector+load to vzload. Which would select as (V)MOVSSrm
so this is closer to that.

llvm-svn: 364948
  • Loading branch information
topperc committed Jul 2, 2019
1 parent 36face4 commit cffbaa9
Show file tree
Hide file tree
Showing 21 changed files with 173 additions and 175 deletions.
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Expand Up @@ -4312,6 +4312,10 @@ let Predicates = [HasAVX512, OptForSpeed] in {
}

let Predicates = [HasAVX512] in {
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
(VMOVSSZrm addr:$src)>;
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
(VMOVSDZrm addr:$src)>;

// Represent the same patterns above but in the form they appear for
// 256-bit types
Expand Down
29 changes: 20 additions & 9 deletions llvm/lib/Target/X86/X86InstrSSE.td
Expand Up @@ -263,6 +263,11 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {

// Patterns
let Predicates = [UseAVX] in {
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
(VMOVSSrm addr:$src)>;
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
(VMOVSDrm addr:$src)>;

// Represent the same patterns above but in the form they appear for
// 256-bit types
def : Pat<(v8f32 (X86vzload addr:$src)),
Expand Down Expand Up @@ -290,17 +295,23 @@ let Predicates = [UseAVX, OptForSize] in {
(v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>;
}

let Predicates = [UseSSE1] in {
let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVSS to the lower bits.
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
(MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
(MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
}
let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVSS to the lower bits.
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
(MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
(MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
}

let Predicates = [UseSSE2] in
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
(MOVSDrm addr:$src)>;

let Predicates = [UseSSE1] in
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
(MOVSSrm addr:$src)>;

//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
//===----------------------------------------------------------------------===//
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/avx2-masked-gather.ll
Expand Up @@ -494,9 +494,9 @@ define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <
; NOGATHER-NEXT: je .LBB7_10
; NOGATHER-NEXT: # %bb.9: # %cond.load10
; NOGATHER-NEXT: vmovq %xmm2, %rax
; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm4
; NOGATHER-NEXT: vblendps {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3]
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vblendps {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB7_10: # %else11
; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/build-vector-512.ll
Expand Up @@ -39,19 +39,19 @@ define <16 x float> @test_buildvector_v16f32(float %a0, float %a1, float %a2, fl
;
; AVX-64-LABEL: test_buildvector_v16f32:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
; AVX-64-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
; AVX-64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
; AVX-64-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm9[0],mem[0],xmm9[2,3]
; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm8[0],mem[0],xmm8[2,3]
; AVX-64-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/buildvec-insertvec.ll
Expand Up @@ -39,10 +39,10 @@ define <4 x float> @test_negative_zero_1(<4 x float> %A) {
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
Expand Down Expand Up @@ -107,10 +107,10 @@ define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* %
; SSE2: # %bb.0:
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
Expand All @@ -136,8 +136,8 @@ define <4 x float> @test_buildvector_v4f32_partial_load(float %f0, float %f1, fl
; SSE2-LABEL: test_buildvector_v4f32_partial_load:
; SSE2: # %bb.0:
; SSE2-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/copysign-constant-magnitude.ll
Expand Up @@ -33,17 +33,17 @@ define double @mag_neg0_double(double %x) nounwind {
ret double %y
}

; CHECK: [[ONE3:L.+]]:
; CHECK-NEXT: .quad 4607182418800017408 ## double 1
; CHECK: [[SIGNMASK3:L.+]]:
; CHECK-NEXT: .quad -9223372036854775808 ## double -0
; CHECK-NEXT: .quad -9223372036854775808 ## double -0
; CHECK: [[ONE3:L.+]]:
; CHECK-NEXT: .quad 4607182418800017408 ## double 1

define double @mag_pos1_double(double %x) nounwind {
; CHECK-LABEL: mag_pos1_double:
; CHECK: ## %bb.0:
; CHECK-NEXT: movsd [[ONE3]](%rip), %xmm1
; CHECK-NEXT: andps [[SIGNMASK3]](%rip), %xmm0
; CHECK-NEXT: movsd [[ONE3]](%rip), %xmm1
; CHECK-NEXT: orps %xmm1, %xmm0
; CHECK-NEXT: retq
;
Expand Down Expand Up @@ -99,19 +99,19 @@ define float @mag_neg0_float(float %x) nounwind {
ret float %y
}

; CHECK: [[ONE7:L.+]]:
; CHECK-NEXT: .long 1065353216 ## float 1
; CHECK: [[SIGNMASK7:L.+]]:
; CHECK-NEXT: .long 2147483648 ## float -0
; CHECK-NEXT: .long 2147483648 ## float -0
; CHECK-NEXT: .long 2147483648 ## float -0
; CHECK-NEXT: .long 2147483648 ## float -0
; CHECK: [[ONE7:L.+]]:
; CHECK-NEXT: .long 1065353216 ## float 1

define float @mag_pos1_float(float %x) nounwind {
; CHECK-LABEL: mag_pos1_float:
; CHECK: ## %bb.0:
; CHECK-NEXT: movss [[ONE7]](%rip), %xmm1
; CHECK-NEXT: andps [[SIGNMASK7]](%rip), %xmm0
; CHECK-NEXT: movss [[ONE7]](%rip), %xmm1
; CHECK-NEXT: orps %xmm1, %xmm0
; CHECK-NEXT: retq
;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/fp128-cast.ll
Expand Up @@ -844,8 +844,8 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind {
; X64-NEXT: # %bb.1: # %if.then
; X64-NEXT: pushq %rax
; X64-NEXT: callq __trunctfdf2
; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: andps {{.*}}(%rip), %xmm0
; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: orps %xmm1, %xmm0
; X64-NEXT: callq __extenddftf2
; X64-NEXT: addq $8, %rsp
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/X86/gather-addresses.ll
Expand Up @@ -26,8 +26,8 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
; LIN-SSE2-NEXT: movslq %edx, %rdx
; LIN-SSE2-NEXT: movslq %esi, %rsi
; LIN-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; LIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; LIN-SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; LIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; LIN-SSE2-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; LIN-SSE2-NEXT: retq
;
Expand All @@ -42,10 +42,10 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
; LIN-SSE4-NEXT: cltq
; LIN-SSE4-NEXT: movslq %ecx, %rcx
; LIN-SSE4-NEXT: movslq %edx, %rdx
; LIN-SSE4-NEXT: movslq %esi, %rsi
; LIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; LIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; LIN-SSE4-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; LIN-SSE4-NEXT: movslq %esi, %rax
; LIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; LIN-SSE4-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; LIN-SSE4-NEXT: retq
;
Expand All @@ -60,13 +60,13 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
; WIN-SSE2-NEXT: movd %xmm1, %r10d
; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
; WIN-SSE2-NEXT: movd %xmm0, %edx
; WIN-SSE2-NEXT: movslq %r8d, %r11
; WIN-SSE2-NEXT: movslq %r8d, %rax
; WIN-SSE2-NEXT: movslq %r9d, %r8
; WIN-SSE2-NEXT: movslq %r10d, %rax
; WIN-SSE2-NEXT: movslq %r10d, %r9
; WIN-SSE2-NEXT: movslq %edx, %rdx
; WIN-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; WIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; WIN-SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; WIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; WIN-SSE2-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; WIN-SSE2-NEXT: retq
;
Expand All @@ -79,12 +79,12 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
; WIN-SSE4-NEXT: pextrd $2, %xmm0, %r8d
; WIN-SSE4-NEXT: pextrd $3, %xmm0, %r9d
; WIN-SSE4-NEXT: cltq
; WIN-SSE4-NEXT: movslq %edx, %r10
; WIN-SSE4-NEXT: movslq %r8d, %rdx
; WIN-SSE4-NEXT: movslq %r9d, %r8
; WIN-SSE4-NEXT: movslq %edx, %rdx
; WIN-SSE4-NEXT: movslq %r8d, %r8
; WIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; WIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; WIN-SSE4-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; WIN-SSE4-NEXT: movslq %r9d, %rax
; WIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; WIN-SSE4-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; WIN-SSE4-NEXT: retq
;
Expand All @@ -97,13 +97,13 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; LIN32-NEXT: movdqa (%edx), %xmm0
; LIN32-NEXT: pand (%ecx), %xmm0
; LIN32-NEXT: movd %xmm0, %ecx
; LIN32-NEXT: pextrd $1, %xmm0, %edx
; LIN32-NEXT: pextrd $2, %xmm0, %esi
; LIN32-NEXT: pextrd $3, %xmm0, %edi
; LIN32-NEXT: pextrd $1, %xmm0, %ecx
; LIN32-NEXT: pextrd $2, %xmm0, %edx
; LIN32-NEXT: pextrd $3, %xmm0, %esi
; LIN32-NEXT: movd %xmm0, %edi
; LIN32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; LIN32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; LIN32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; LIN32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; LIN32-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; LIN32-NEXT: popl %esi
; LIN32-NEXT: popl %edi
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/X86/half.ll
Expand Up @@ -431,18 +431,18 @@ define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 {
; CHECK-I686-NEXT: pushl %esi
; CHECK-I686-NEXT: subl $56, %esp
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-I686-NEXT: movzwl 4(%esi), %eax
; CHECK-I686-NEXT: movzwl 2(%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-I686-NEXT: movzwl 2(%esi), %eax
; CHECK-I686-NEXT: movzwl 4(%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-I686-NEXT: movzwl (%esi), %eax
; CHECK-I686-NEXT: movzwl 6(%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
; CHECK-I686-NEXT: movzwl 6(%esi), %eax
; CHECK-I686-NEXT: movzwl (%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
Expand All @@ -453,10 +453,10 @@ define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 {
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-I686-NEXT: addl $56, %esp
; CHECK-I686-NEXT: popl %esi
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/insert-into-constant-vector.ll
Expand Up @@ -168,8 +168,8 @@ define <2 x i64> @elt0_v2i64(i64 %x) {
define <4 x float> @elt1_v4f32(float %x) {
; X32SSE2-LABEL: elt1_v4f32:
; X32SSE2: # %bb.0:
; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; X32SSE2-NEXT: retl
Expand Down Expand Up @@ -305,8 +305,8 @@ define <8 x i32> @elt7_v8i32(i32 %x) {
define <8 x float> @elt6_v8f32(float %x) {
; X32SSE2-LABEL: elt6_v8f32:
; X32SSE2: # %bb.0:
; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/masked_expandload.ll
Expand Up @@ -1473,8 +1473,8 @@ define <16 x float> @expandload_v16f32_const(float* %base, <16 x float> %src0) {
; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm2 = xmm1[0],mem[0],xmm1[2,3]
; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
; AVX1OR2-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm1[4,5,6,7]
; AVX1OR2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; AVX1OR2-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1OR2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; AVX1OR2-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/masked_load.ll
Expand Up @@ -6606,12 +6606,12 @@ define <4 x float> @mload_constmask_v4f32(<4 x float>* %addr, <4 x float> %dst)
; SSE2: ## %bb.0:
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[2,0]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm1[2,0]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,0]
; SSE2-NEXT: retq
;
; SSE42-LABEL: mload_constmask_v4f32:
Expand Down Expand Up @@ -7069,8 +7069,8 @@ define <8 x double> @mload_constmask_v8f64(<8 x double>* %addr, <8 x double> %ds
define <4 x double> @mload_constmask_v4f64_undef_passthrough(<4 x double>* %addr) {
; SSE-LABEL: mload_constmask_v4f64_undef_passthrough:
; SSE: ## %bb.0:
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: movups (%rdi), %xmm0
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: retq
;
; AVX1OR2-LABEL: mload_constmask_v4f64_undef_passthrough:
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
Expand Up @@ -286,10 +286,10 @@ define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline s
; X32-SSE1-LABEL: merge_4f32_f32_012u:
; X32-SSE1: # %bb.0:
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-SSE1-NEXT: retl
;
Expand Down Expand Up @@ -335,10 +335,10 @@ define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline s
; X32-SSE1-LABEL: merge_4f32_f32_019u:
; X32-SSE1: # %bb.0:
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-SSE1-NEXT: retl
;
Expand Down Expand Up @@ -1197,10 +1197,10 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable n
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-SSE1-NEXT: retl
;
Expand Down

0 comments on commit cffbaa9

Please sign in to comment.